]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drm/amdgpu: add gfx support for renoir
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55
56 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
62
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
108 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
110
111 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
112 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
113 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
117
118 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
119 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
120 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
121 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
122 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
123 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
124 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
125 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
126 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
127 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
128 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
129 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
130
131 enum ta_ras_gfx_subblock {
132         /*CPC*/
133         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
134         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
135         TA_RAS_BLOCK__GFX_CPC_UCODE,
136         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
137         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
138         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
139         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
140         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
141         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
142         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
143         /* CPF*/
144         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
145         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
146         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
147         TA_RAS_BLOCK__GFX_CPF_TAG,
148         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
149         /* CPG*/
150         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
151         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
152         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
153         TA_RAS_BLOCK__GFX_CPG_TAG,
154         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
155         /* GDS*/
156         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
157         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
158         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
159         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
160         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
161         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
162         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
163         /* SPI*/
164         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
165         /* SQ*/
166         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
167         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
168         TA_RAS_BLOCK__GFX_SQ_LDS_D,
169         TA_RAS_BLOCK__GFX_SQ_LDS_I,
170         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
171         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
172         /* SQC (3 ranges)*/
173         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
174         /* SQC range 0*/
175         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
176         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
177                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
178         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
179         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
180         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
181         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
182         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
183         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
184         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
185                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
186         /* SQC range 1*/
187         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
188         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
189                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
190         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
191         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
192         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
193         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
194         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
195         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
196         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
197         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
198         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
199                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
200         /* SQC range 2*/
201         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
202         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
203                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
204         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
205         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
206         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
207         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
208         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
209         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
210         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
211         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
212         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
213                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
214         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
215         /* TA*/
216         TA_RAS_BLOCK__GFX_TA_INDEX_START,
217         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
218         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
219         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
220         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
221         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
222         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
223         /* TCA*/
224         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
225         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
226         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
227         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
228         /* TCC (5 sub-ranges)*/
229         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
230         /* TCC range 0*/
231         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
232         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
233         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
234         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
235         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
236         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
237         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
238         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
239         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
240         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
241         /* TCC range 1*/
242         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
243         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
244         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
245         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
246                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
247         /* TCC range 2*/
248         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
249         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
250         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
251         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
252         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
253         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
254         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
255         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
256         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
257         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
258                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
259         /* TCC range 3*/
260         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
261         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
262         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
263         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
264                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
265         /* TCC range 4*/
266         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
267         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
268                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
269         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
270         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
271                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
272         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
273         /* TCI*/
274         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
275         /* TCP*/
276         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
277         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
278         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
279         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
280         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
281         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
282         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
283         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
284         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
285         /* TD*/
286         TA_RAS_BLOCK__GFX_TD_INDEX_START,
287         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
288         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
289         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
290         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
291         /* EA (3 sub-ranges)*/
292         TA_RAS_BLOCK__GFX_EA_INDEX_START,
293         /* EA range 0*/
294         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
295         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
296         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
297         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
298         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
299         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
300         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
301         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
302         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
303         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
304         /* EA range 1*/
305         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
306         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
307         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
308         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
309         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
310         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
311         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
312         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
313         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
314         /* EA range 2*/
315         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
316         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
317         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
318         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
319         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
320         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
321         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
322         /* UTC VM L2 bank*/
323         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
324         /* UTC VM walker*/
325         TA_RAS_BLOCK__UTC_VML2_WALKER,
326         /* UTC ATC L2 2MB cache*/
327         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
328         /* UTC ATC L2 4KB cache*/
329         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
330         TA_RAS_BLOCK__GFX_MAX
331 };
332
333 struct ras_gfx_subblock {
334         unsigned char *name;
335         int ta_subblock;
336         int hw_supported_error_type;
337         int sw_supported_error_type;
338 };
339
340 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
341         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
342                 #subblock,                                                     \
343                 TA_RAS_BLOCK__##subblock,                                      \
344                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
345                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
346         }
347
348 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
349         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
350         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
351         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
352         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
353         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
354         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
355         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
356         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
357         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
358         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
359         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
360         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
361         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
362         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
363         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
364         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
365         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
366                              0),
367         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
368                              0),
369         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
371         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
373         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
376         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
377                              0, 0),
378         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
379                              0),
380         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
381                              0, 0),
382         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
383                              0),
384         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
385                              0, 0),
386         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
387                              0),
388         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
389                              1),
390         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
391                              0, 0, 0),
392         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
393                              0),
394         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
395                              0),
396         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
397                              0),
398         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
399                              0),
400         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
401                              0),
402         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
403                              0, 0),
404         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
405                              0),
406         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
407                              0),
408         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
409                              0, 0, 0),
410         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
411                              0),
412         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
413                              0),
414         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
415                              0),
416         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
417                              0),
418         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
419                              0),
420         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
421                              0, 0),
422         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
423                              0),
424         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
425         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
426         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
428         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
430         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
432         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
433                              1),
434         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
435                              1),
436         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
437                              1),
438         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
439                              0),
440         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
441                              0),
442         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
444         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
446         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
450         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
452         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
454                              0),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
456         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
457                              0),
458         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
459                              0, 0),
460         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
461                              0),
462         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
464         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
471         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
474         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
476         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
479         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
496 };
497
498 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
499 {
500         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
501         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
502         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
503         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
504         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
505         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
506         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
507         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
508         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
509         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
510         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
511         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
512         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
513         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
514         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
515         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
516         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
520 };
521
522 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
523 {
524         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
525         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
526         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
537         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
538         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
542 };
543
544 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
545 {
546         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
547         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
548         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
550         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
557 };
558
559 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
560 {
561         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
562         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
563         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
564         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
565         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
585 };
586
587 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
588 {
589         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
590         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
591         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
592         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
596 };
597
598 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
599 {
600         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
601         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
602         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
603         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
604         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
613         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
614         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
619 };
620
621 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
622 {
623         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
624         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
625         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
626 };
627
628 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
629 {
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
636         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
637         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
638         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
639         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
640         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
641         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
642         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
643         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
645         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
646 };
647
648 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
649 {
650         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
663 };
664
665 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
666 {
667         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
668         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
675 };
676
677 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
678 {
679         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
680         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
681         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
682         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
683         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
684         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
685         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
686         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
687 };
688
689 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
690 {
691         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
692         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
693         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
694         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
695         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
696         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
697         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
698         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
699 };
700
701 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
702 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
703 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
704 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
705
706 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
707 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
708 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
709 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
710 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
711                                  struct amdgpu_cu_info *cu_info);
712 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
713 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
714 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
715 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
716 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
717                                           void *ras_error_status);
718 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
719                                      void *inject_if);
720
721 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
722 {
723         switch (adev->asic_type) {
724         case CHIP_VEGA10:
725                 soc15_program_register_sequence(adev,
726                                                 golden_settings_gc_9_0,
727                                                 ARRAY_SIZE(golden_settings_gc_9_0));
728                 soc15_program_register_sequence(adev,
729                                                 golden_settings_gc_9_0_vg10,
730                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
731                 break;
732         case CHIP_VEGA12:
733                 soc15_program_register_sequence(adev,
734                                                 golden_settings_gc_9_2_1,
735                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
736                 soc15_program_register_sequence(adev,
737                                                 golden_settings_gc_9_2_1_vg12,
738                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
739                 break;
740         case CHIP_VEGA20:
741                 soc15_program_register_sequence(adev,
742                                                 golden_settings_gc_9_0,
743                                                 ARRAY_SIZE(golden_settings_gc_9_0));
744                 soc15_program_register_sequence(adev,
745                                                 golden_settings_gc_9_0_vg20,
746                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
747                 break;
748         case CHIP_ARCTURUS:
749                 soc15_program_register_sequence(adev,
750                                                 golden_settings_gc_9_4_1_arct,
751                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
752                 break;
753         case CHIP_RAVEN:
754                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
755                                                 ARRAY_SIZE(golden_settings_gc_9_1));
756                 if (adev->rev_id >= 8)
757                         soc15_program_register_sequence(adev,
758                                                         golden_settings_gc_9_1_rv2,
759                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
760                 else
761                         soc15_program_register_sequence(adev,
762                                                         golden_settings_gc_9_1_rv1,
763                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
764                 break;
765         default:
766                 break;
767         }
768
769         if (adev->asic_type != CHIP_ARCTURUS)
770                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
771                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
772 }
773
774 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
775 {
776         adev->gfx.scratch.num_reg = 8;
777         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
778         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
779 }
780
781 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
782                                        bool wc, uint32_t reg, uint32_t val)
783 {
784         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
785         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
786                                 WRITE_DATA_DST_SEL(0) |
787                                 (wc ? WR_CONFIRM : 0));
788         amdgpu_ring_write(ring, reg);
789         amdgpu_ring_write(ring, 0);
790         amdgpu_ring_write(ring, val);
791 }
792
793 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
794                                   int mem_space, int opt, uint32_t addr0,
795                                   uint32_t addr1, uint32_t ref, uint32_t mask,
796                                   uint32_t inv)
797 {
798         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
799         amdgpu_ring_write(ring,
800                                  /* memory (1) or register (0) */
801                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
802                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
803                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
804                                  WAIT_REG_MEM_ENGINE(eng_sel)));
805
806         if (mem_space)
807                 BUG_ON(addr0 & 0x3); /* Dword align */
808         amdgpu_ring_write(ring, addr0);
809         amdgpu_ring_write(ring, addr1);
810         amdgpu_ring_write(ring, ref);
811         amdgpu_ring_write(ring, mask);
812         amdgpu_ring_write(ring, inv); /* poll interval */
813 }
814
815 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
816 {
817         struct amdgpu_device *adev = ring->adev;
818         uint32_t scratch;
819         uint32_t tmp = 0;
820         unsigned i;
821         int r;
822
823         r = amdgpu_gfx_scratch_get(adev, &scratch);
824         if (r)
825                 return r;
826
827         WREG32(scratch, 0xCAFEDEAD);
828         r = amdgpu_ring_alloc(ring, 3);
829         if (r)
830                 goto error_free_scratch;
831
832         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
833         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
834         amdgpu_ring_write(ring, 0xDEADBEEF);
835         amdgpu_ring_commit(ring);
836
837         for (i = 0; i < adev->usec_timeout; i++) {
838                 tmp = RREG32(scratch);
839                 if (tmp == 0xDEADBEEF)
840                         break;
841                 udelay(1);
842         }
843
844         if (i >= adev->usec_timeout)
845                 r = -ETIMEDOUT;
846
847 error_free_scratch:
848         amdgpu_gfx_scratch_free(adev, scratch);
849         return r;
850 }
851
852 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
853 {
854         struct amdgpu_device *adev = ring->adev;
855         struct amdgpu_ib ib;
856         struct dma_fence *f = NULL;
857
858         unsigned index;
859         uint64_t gpu_addr;
860         uint32_t tmp;
861         long r;
862
863         r = amdgpu_device_wb_get(adev, &index);
864         if (r)
865                 return r;
866
867         gpu_addr = adev->wb.gpu_addr + (index * 4);
868         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
869         memset(&ib, 0, sizeof(ib));
870         r = amdgpu_ib_get(adev, NULL, 16, &ib);
871         if (r)
872                 goto err1;
873
874         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
875         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
876         ib.ptr[2] = lower_32_bits(gpu_addr);
877         ib.ptr[3] = upper_32_bits(gpu_addr);
878         ib.ptr[4] = 0xDEADBEEF;
879         ib.length_dw = 5;
880
881         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
882         if (r)
883                 goto err2;
884
885         r = dma_fence_wait_timeout(f, false, timeout);
886         if (r == 0) {
887                 r = -ETIMEDOUT;
888                 goto err2;
889         } else if (r < 0) {
890                 goto err2;
891         }
892
893         tmp = adev->wb.wb[index];
894         if (tmp == 0xDEADBEEF)
895                 r = 0;
896         else
897                 r = -EINVAL;
898
899 err2:
900         amdgpu_ib_free(adev, &ib, NULL);
901         dma_fence_put(f);
902 err1:
903         amdgpu_device_wb_free(adev, index);
904         return r;
905 }
906
907
908 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
909 {
910         release_firmware(adev->gfx.pfp_fw);
911         adev->gfx.pfp_fw = NULL;
912         release_firmware(adev->gfx.me_fw);
913         adev->gfx.me_fw = NULL;
914         release_firmware(adev->gfx.ce_fw);
915         adev->gfx.ce_fw = NULL;
916         release_firmware(adev->gfx.rlc_fw);
917         adev->gfx.rlc_fw = NULL;
918         release_firmware(adev->gfx.mec_fw);
919         adev->gfx.mec_fw = NULL;
920         release_firmware(adev->gfx.mec2_fw);
921         adev->gfx.mec2_fw = NULL;
922
923         kfree(adev->gfx.rlc.register_list_format);
924 }
925
926 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
927 {
928         const struct rlc_firmware_header_v2_1 *rlc_hdr;
929
930         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
931         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
932         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
933         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
934         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
935         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
936         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
937         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
938         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
939         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
940         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
941         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
942         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
943         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
944                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
945 }
946
947 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
948 {
949         adev->gfx.me_fw_write_wait = false;
950         adev->gfx.mec_fw_write_wait = false;
951
952         switch (adev->asic_type) {
953         case CHIP_VEGA10:
954                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
955                     (adev->gfx.me_feature_version >= 42) &&
956                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
957                     (adev->gfx.pfp_feature_version >= 42))
958                         adev->gfx.me_fw_write_wait = true;
959
960                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
961                     (adev->gfx.mec_feature_version >= 42))
962                         adev->gfx.mec_fw_write_wait = true;
963                 break;
964         case CHIP_VEGA12:
965                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
966                     (adev->gfx.me_feature_version >= 44) &&
967                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
968                     (adev->gfx.pfp_feature_version >= 44))
969                         adev->gfx.me_fw_write_wait = true;
970
971                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
972                     (adev->gfx.mec_feature_version >= 44))
973                         adev->gfx.mec_fw_write_wait = true;
974                 break;
975         case CHIP_VEGA20:
976                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
977                     (adev->gfx.me_feature_version >= 44) &&
978                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
979                     (adev->gfx.pfp_feature_version >= 44))
980                         adev->gfx.me_fw_write_wait = true;
981
982                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
983                     (adev->gfx.mec_feature_version >= 44))
984                         adev->gfx.mec_fw_write_wait = true;
985                 break;
986         case CHIP_RAVEN:
987                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
988                     (adev->gfx.me_feature_version >= 42) &&
989                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
990                     (adev->gfx.pfp_feature_version >= 42))
991                         adev->gfx.me_fw_write_wait = true;
992
993                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
994                     (adev->gfx.mec_feature_version >= 42))
995                         adev->gfx.mec_fw_write_wait = true;
996                 break;
997         default:
998                 break;
999         }
1000 }
1001
1002 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1003 {
1004         switch (adev->asic_type) {
1005         case CHIP_VEGA10:
1006         case CHIP_VEGA12:
1007         case CHIP_VEGA20:
1008                 break;
1009         case CHIP_RAVEN:
1010                 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1011                         break;
1012                 if ((adev->gfx.rlc_fw_version != 106 &&
1013                      adev->gfx.rlc_fw_version < 531) ||
1014                     (adev->gfx.rlc_fw_version == 53815) ||
1015                     (adev->gfx.rlc_feature_version < 1) ||
1016                     !adev->gfx.rlc.is_rlc_v2_1)
1017                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1018                 break;
1019         default:
1020                 break;
1021         }
1022 }
1023
1024 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1025                                           const char *chip_name)
1026 {
1027         char fw_name[30];
1028         int err;
1029         struct amdgpu_firmware_info *info = NULL;
1030         const struct common_firmware_header *header = NULL;
1031         const struct gfx_firmware_header_v1_0 *cp_hdr;
1032
1033         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1034         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1035         if (err)
1036                 goto out;
1037         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1038         if (err)
1039                 goto out;
1040         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1041         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1042         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1043
1044         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1045         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1046         if (err)
1047                 goto out;
1048         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1049         if (err)
1050                 goto out;
1051         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1052         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1053         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1054
1055         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1056         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1057         if (err)
1058                 goto out;
1059         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1060         if (err)
1061                 goto out;
1062         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1063         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1064         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1065
1066         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1067                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1068                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1069                 info->fw = adev->gfx.pfp_fw;
1070                 header = (const struct common_firmware_header *)info->fw->data;
1071                 adev->firmware.fw_size +=
1072                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1073
1074                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1075                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1076                 info->fw = adev->gfx.me_fw;
1077                 header = (const struct common_firmware_header *)info->fw->data;
1078                 adev->firmware.fw_size +=
1079                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1080
1081                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1082                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1083                 info->fw = adev->gfx.ce_fw;
1084                 header = (const struct common_firmware_header *)info->fw->data;
1085                 adev->firmware.fw_size +=
1086                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1087         }
1088
1089 out:
1090         if (err) {
1091                 dev_err(adev->dev,
1092                         "gfx9: Failed to load firmware \"%s\"\n",
1093                         fw_name);
1094                 release_firmware(adev->gfx.pfp_fw);
1095                 adev->gfx.pfp_fw = NULL;
1096                 release_firmware(adev->gfx.me_fw);
1097                 adev->gfx.me_fw = NULL;
1098                 release_firmware(adev->gfx.ce_fw);
1099                 adev->gfx.ce_fw = NULL;
1100         }
1101         return err;
1102 }
1103
1104 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1105                                           const char *chip_name)
1106 {
1107         char fw_name[30];
1108         int err;
1109         struct amdgpu_firmware_info *info = NULL;
1110         const struct common_firmware_header *header = NULL;
1111         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1112         unsigned int *tmp = NULL;
1113         unsigned int i = 0;
1114         uint16_t version_major;
1115         uint16_t version_minor;
1116         uint32_t smu_version;
1117
1118         /*
1119          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1120          * instead of picasso_rlc.bin.
1121          * Judgment method:
1122          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1123          *          or revision >= 0xD8 && revision <= 0xDF
1124          * otherwise is PCO FP5
1125          */
1126         if (!strcmp(chip_name, "picasso") &&
1127                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1128                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1129                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1130         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1131                 (smu_version >= 0x41e2b))
1132                 /**
1133                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1134                 */
1135                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1136         else
1137                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1138         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1139         if (err)
1140                 goto out;
1141         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1142         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1143
1144         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1145         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1146         if (version_major == 2 && version_minor == 1)
1147                 adev->gfx.rlc.is_rlc_v2_1 = true;
1148
1149         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1150         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1151         adev->gfx.rlc.save_and_restore_offset =
1152                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1153         adev->gfx.rlc.clear_state_descriptor_offset =
1154                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1155         adev->gfx.rlc.avail_scratch_ram_locations =
1156                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1157         adev->gfx.rlc.reg_restore_list_size =
1158                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1159         adev->gfx.rlc.reg_list_format_start =
1160                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1161         adev->gfx.rlc.reg_list_format_separate_start =
1162                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1163         adev->gfx.rlc.starting_offsets_start =
1164                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1165         adev->gfx.rlc.reg_list_format_size_bytes =
1166                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1167         adev->gfx.rlc.reg_list_size_bytes =
1168                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1169         adev->gfx.rlc.register_list_format =
1170                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1171                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1172         if (!adev->gfx.rlc.register_list_format) {
1173                 err = -ENOMEM;
1174                 goto out;
1175         }
1176
1177         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1178                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1179         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1180                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1181
1182         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1183
1184         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1185                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1186         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1187                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1188
1189         if (adev->gfx.rlc.is_rlc_v2_1)
1190                 gfx_v9_0_init_rlc_ext_microcode(adev);
1191
1192         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1193                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1194                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1195                 info->fw = adev->gfx.rlc_fw;
1196                 header = (const struct common_firmware_header *)info->fw->data;
1197                 adev->firmware.fw_size +=
1198                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1199
1200                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1201                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1202                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1203                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1204                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1205                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1206                         info->fw = adev->gfx.rlc_fw;
1207                         adev->firmware.fw_size +=
1208                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1209
1210                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1211                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1212                         info->fw = adev->gfx.rlc_fw;
1213                         adev->firmware.fw_size +=
1214                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1215
1216                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1217                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1218                         info->fw = adev->gfx.rlc_fw;
1219                         adev->firmware.fw_size +=
1220                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1221                 }
1222         }
1223
1224 out:
1225         if (err) {
1226                 dev_err(adev->dev,
1227                         "gfx9: Failed to load firmware \"%s\"\n",
1228                         fw_name);
1229                 release_firmware(adev->gfx.rlc_fw);
1230                 adev->gfx.rlc_fw = NULL;
1231         }
1232         return err;
1233 }
1234
1235 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1236                                           const char *chip_name)
1237 {
1238         char fw_name[30];
1239         int err;
1240         struct amdgpu_firmware_info *info = NULL;
1241         const struct common_firmware_header *header = NULL;
1242         const struct gfx_firmware_header_v1_0 *cp_hdr;
1243
1244         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1245         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1246         if (err)
1247                 goto out;
1248         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1249         if (err)
1250                 goto out;
1251         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1252         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1253         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1254
1255
1256         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1257         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1258         if (!err) {
1259                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1260                 if (err)
1261                         goto out;
1262                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1263                 adev->gfx.mec2_fw->data;
1264                 adev->gfx.mec2_fw_version =
1265                 le32_to_cpu(cp_hdr->header.ucode_version);
1266                 adev->gfx.mec2_feature_version =
1267                 le32_to_cpu(cp_hdr->ucode_feature_version);
1268         } else {
1269                 err = 0;
1270                 adev->gfx.mec2_fw = NULL;
1271         }
1272
1273         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1274                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1275                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1276                 info->fw = adev->gfx.mec_fw;
1277                 header = (const struct common_firmware_header *)info->fw->data;
1278                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1279                 adev->firmware.fw_size +=
1280                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1281
1282                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1283                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1284                 info->fw = adev->gfx.mec_fw;
1285                 adev->firmware.fw_size +=
1286                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1287
1288                 if (adev->gfx.mec2_fw) {
1289                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1290                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1291                         info->fw = adev->gfx.mec2_fw;
1292                         header = (const struct common_firmware_header *)info->fw->data;
1293                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1294                         adev->firmware.fw_size +=
1295                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1296
1297                         /* TODO: Determine if MEC2 JT FW loading can be removed
1298                                  for all GFX V9 asic and above */
1299                         if (adev->asic_type != CHIP_ARCTURUS) {
1300                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1301                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1302                                 info->fw = adev->gfx.mec2_fw;
1303                                 adev->firmware.fw_size +=
1304                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1305                                         PAGE_SIZE);
1306                         }
1307                 }
1308         }
1309
1310 out:
1311         gfx_v9_0_check_if_need_gfxoff(adev);
1312         gfx_v9_0_check_fw_write_wait(adev);
1313         if (err) {
1314                 dev_err(adev->dev,
1315                         "gfx9: Failed to load firmware \"%s\"\n",
1316                         fw_name);
1317                 release_firmware(adev->gfx.mec_fw);
1318                 adev->gfx.mec_fw = NULL;
1319                 release_firmware(adev->gfx.mec2_fw);
1320                 adev->gfx.mec2_fw = NULL;
1321         }
1322         return err;
1323 }
1324
1325 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1326 {
1327         const char *chip_name;
1328         int r;
1329
1330         DRM_DEBUG("\n");
1331
1332         switch (adev->asic_type) {
1333         case CHIP_VEGA10:
1334                 chip_name = "vega10";
1335                 break;
1336         case CHIP_VEGA12:
1337                 chip_name = "vega12";
1338                 break;
1339         case CHIP_VEGA20:
1340                 chip_name = "vega20";
1341                 break;
1342         case CHIP_RAVEN:
1343                 if (adev->rev_id >= 8)
1344                         chip_name = "raven2";
1345                 else if (adev->pdev->device == 0x15d8)
1346                         chip_name = "picasso";
1347                 else
1348                         chip_name = "raven";
1349                 break;
1350         case CHIP_ARCTURUS:
1351                 chip_name = "arcturus";
1352                 break;
1353         case CHIP_RENOIR:
1354                 chip_name = "renoir";
1355                 break;
1356         default:
1357                 BUG();
1358         }
1359
1360         /* No CPG in Arcturus */
1361         if (adev->asic_type != CHIP_ARCTURUS) {
1362                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1363                 if (r)
1364                         return r;
1365         }
1366
1367         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1368         if (r)
1369                 return r;
1370
1371         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1372         if (r)
1373                 return r;
1374
1375         return r;
1376 }
1377
1378 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1379 {
1380         u32 count = 0;
1381         const struct cs_section_def *sect = NULL;
1382         const struct cs_extent_def *ext = NULL;
1383
1384         /* begin clear state */
1385         count += 2;
1386         /* context control state */
1387         count += 3;
1388
1389         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1390                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1391                         if (sect->id == SECT_CONTEXT)
1392                                 count += 2 + ext->reg_count;
1393                         else
1394                                 return 0;
1395                 }
1396         }
1397
1398         /* end clear state */
1399         count += 2;
1400         /* clear state */
1401         count += 2;
1402
1403         return count;
1404 }
1405
1406 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1407                                     volatile u32 *buffer)
1408 {
1409         u32 count = 0, i;
1410         const struct cs_section_def *sect = NULL;
1411         const struct cs_extent_def *ext = NULL;
1412
1413         if (adev->gfx.rlc.cs_data == NULL)
1414                 return;
1415         if (buffer == NULL)
1416                 return;
1417
1418         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1419         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1420
1421         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1422         buffer[count++] = cpu_to_le32(0x80000000);
1423         buffer[count++] = cpu_to_le32(0x80000000);
1424
1425         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1426                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1427                         if (sect->id == SECT_CONTEXT) {
1428                                 buffer[count++] =
1429                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1430                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1431                                                 PACKET3_SET_CONTEXT_REG_START);
1432                                 for (i = 0; i < ext->reg_count; i++)
1433                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1434                         } else {
1435                                 return;
1436                         }
1437                 }
1438         }
1439
1440         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1441         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1442
1443         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1444         buffer[count++] = cpu_to_le32(0);
1445 }
1446
1447 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1448 {
1449         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1450         uint32_t pg_always_on_cu_num = 2;
1451         uint32_t always_on_cu_num;
1452         uint32_t i, j, k;
1453         uint32_t mask, cu_bitmap, counter;
1454
1455         if (adev->flags & AMD_IS_APU)
1456                 always_on_cu_num = 4;
1457         else if (adev->asic_type == CHIP_VEGA12)
1458                 always_on_cu_num = 8;
1459         else
1460                 always_on_cu_num = 12;
1461
1462         mutex_lock(&adev->grbm_idx_mutex);
1463         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1464                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1465                         mask = 1;
1466                         cu_bitmap = 0;
1467                         counter = 0;
1468                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1469
1470                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1471                                 if (cu_info->bitmap[i][j] & mask) {
1472                                         if (counter == pg_always_on_cu_num)
1473                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1474                                         if (counter < always_on_cu_num)
1475                                                 cu_bitmap |= mask;
1476                                         else
1477                                                 break;
1478                                         counter++;
1479                                 }
1480                                 mask <<= 1;
1481                         }
1482
1483                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1484                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1485                 }
1486         }
1487         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1488         mutex_unlock(&adev->grbm_idx_mutex);
1489 }
1490
1491 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1492 {
1493         uint32_t data;
1494
1495         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1496         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1497         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1498         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1499         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1500
1501         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1502         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1503
1504         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1505         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1506
1507         mutex_lock(&adev->grbm_idx_mutex);
1508         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1509         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1510         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1511
1512         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1513         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1514         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1515         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1516         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1517
1518         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1519         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1520         data &= 0x0000FFFF;
1521         data |= 0x00C00000;
1522         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1523
1524         /*
1525          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1526          * programmed in gfx_v9_0_init_always_on_cu_mask()
1527          */
1528
1529         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1530          * but used for RLC_LB_CNTL configuration */
1531         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1532         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1533         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1534         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1535         mutex_unlock(&adev->grbm_idx_mutex);
1536
1537         gfx_v9_0_init_always_on_cu_mask(adev);
1538 }
1539
1540 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1541 {
1542         uint32_t data;
1543
1544         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1545         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1546         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1547         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1548         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1549
1550         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1551         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1552
1553         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1554         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1555
1556         mutex_lock(&adev->grbm_idx_mutex);
1557         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1558         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1559         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1560
1561         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1562         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1563         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1564         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1565         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1566
1567         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1568         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1569         data &= 0x0000FFFF;
1570         data |= 0x00C00000;
1571         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1572
1573         /*
1574          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1575          * programmed in gfx_v9_0_init_always_on_cu_mask()
1576          */
1577
1578         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1579          * but used for RLC_LB_CNTL configuration */
1580         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1581         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1582         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1583         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1584         mutex_unlock(&adev->grbm_idx_mutex);
1585
1586         gfx_v9_0_init_always_on_cu_mask(adev);
1587 }
1588
1589 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1590 {
1591         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1592 }
1593
1594 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1595 {
1596         return 5;
1597 }
1598
1599 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1600 {
1601         const struct cs_section_def *cs_data;
1602         int r;
1603
1604         adev->gfx.rlc.cs_data = gfx9_cs_data;
1605
1606         cs_data = adev->gfx.rlc.cs_data;
1607
1608         if (cs_data) {
1609                 /* init clear state block */
1610                 r = amdgpu_gfx_rlc_init_csb(adev);
1611                 if (r)
1612                         return r;
1613         }
1614
1615         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1616                 /* TODO: double check the cp_table_size for RV */
1617                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1618                 r = amdgpu_gfx_rlc_init_cpt(adev);
1619                 if (r)
1620                         return r;
1621         }
1622
1623         switch (adev->asic_type) {
1624         case CHIP_RAVEN:
1625                 gfx_v9_0_init_lbpw(adev);
1626                 break;
1627         case CHIP_VEGA20:
1628                 gfx_v9_4_init_lbpw(adev);
1629                 break;
1630         default:
1631                 break;
1632         }
1633
1634         return 0;
1635 }
1636
1637 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1638 {
1639         int r;
1640
1641         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1642         if (unlikely(r != 0))
1643                 return r;
1644
1645         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1646                         AMDGPU_GEM_DOMAIN_VRAM);
1647         if (!r)
1648                 adev->gfx.rlc.clear_state_gpu_addr =
1649                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1650
1651         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1652
1653         return r;
1654 }
1655
1656 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1657 {
1658         int r;
1659
1660         if (!adev->gfx.rlc.clear_state_obj)
1661                 return;
1662
1663         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1664         if (likely(r == 0)) {
1665                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1666                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1667         }
1668 }
1669
1670 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1671 {
1672         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1673         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1674 }
1675
1676 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1677 {
1678         int r;
1679         u32 *hpd;
1680         const __le32 *fw_data;
1681         unsigned fw_size;
1682         u32 *fw;
1683         size_t mec_hpd_size;
1684
1685         const struct gfx_firmware_header_v1_0 *mec_hdr;
1686
1687         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1688
1689         /* take ownership of the relevant compute queues */
1690         amdgpu_gfx_compute_queue_acquire(adev);
1691         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1692
1693         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1694                                       AMDGPU_GEM_DOMAIN_VRAM,
1695                                       &adev->gfx.mec.hpd_eop_obj,
1696                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1697                                       (void **)&hpd);
1698         if (r) {
1699                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1700                 gfx_v9_0_mec_fini(adev);
1701                 return r;
1702         }
1703
1704         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1705
1706         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1707         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1708
1709         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1710
1711         fw_data = (const __le32 *)
1712                 (adev->gfx.mec_fw->data +
1713                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1714         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1715
1716         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1717                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1718                                       &adev->gfx.mec.mec_fw_obj,
1719                                       &adev->gfx.mec.mec_fw_gpu_addr,
1720                                       (void **)&fw);
1721         if (r) {
1722                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1723                 gfx_v9_0_mec_fini(adev);
1724                 return r;
1725         }
1726
1727         memcpy(fw, fw_data, fw_size);
1728
1729         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1730         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1731
1732         return 0;
1733 }
1734
1735 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1736 {
1737         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1738                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1739                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1740                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1741                 (SQ_IND_INDEX__FORCE_READ_MASK));
1742         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1743 }
1744
1745 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1746                            uint32_t wave, uint32_t thread,
1747                            uint32_t regno, uint32_t num, uint32_t *out)
1748 {
1749         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1750                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1751                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1752                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1753                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1754                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1755                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1756         while (num--)
1757                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1758 }
1759
1760 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1761 {
1762         /* type 1 wave data */
1763         dst[(*no_fields)++] = 1;
1764         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1765         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1766         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1767         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1768         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1769         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1770         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1771         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1772         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1773         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1774         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1775         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1776         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1777         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1778 }
1779
1780 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1781                                      uint32_t wave, uint32_t start,
1782                                      uint32_t size, uint32_t *dst)
1783 {
1784         wave_read_regs(
1785                 adev, simd, wave, 0,
1786                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1787 }
1788
1789 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1790                                      uint32_t wave, uint32_t thread,
1791                                      uint32_t start, uint32_t size,
1792                                      uint32_t *dst)
1793 {
1794         wave_read_regs(
1795                 adev, simd, wave, thread,
1796                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1797 }
1798
1799 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1800                                   u32 me, u32 pipe, u32 q, u32 vm)
1801 {
1802         soc15_grbm_select(adev, me, pipe, q, vm);
1803 }
1804
1805 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1806         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1807         .select_se_sh = &gfx_v9_0_select_se_sh,
1808         .read_wave_data = &gfx_v9_0_read_wave_data,
1809         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1810         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1811         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1812         .ras_error_inject = &gfx_v9_0_ras_error_inject,
1813         .query_ras_error_count = &gfx_v9_0_query_ras_error_count
1814 };
1815
1816 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1817 {
1818         u32 gb_addr_config;
1819         int err;
1820
1821         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1822
1823         switch (adev->asic_type) {
1824         case CHIP_VEGA10:
1825                 adev->gfx.config.max_hw_contexts = 8;
1826                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1827                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1828                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1829                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1830                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1831                 break;
1832         case CHIP_VEGA12:
1833                 adev->gfx.config.max_hw_contexts = 8;
1834                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1835                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1836                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1837                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1838                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1839                 DRM_INFO("fix gfx.config for vega12\n");
1840                 break;
1841         case CHIP_VEGA20:
1842                 adev->gfx.config.max_hw_contexts = 8;
1843                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1844                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1845                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1846                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1847                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1848                 gb_addr_config &= ~0xf3e777ff;
1849                 gb_addr_config |= 0x22014042;
1850                 /* check vbios table if gpu info is not available */
1851                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1852                 if (err)
1853                         return err;
1854                 break;
1855         case CHIP_RAVEN:
1856                 adev->gfx.config.max_hw_contexts = 8;
1857                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1858                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1859                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1860                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1861                 if (adev->rev_id >= 8)
1862                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1863                 else
1864                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1865                 break;
1866         case CHIP_ARCTURUS:
1867                 adev->gfx.config.max_hw_contexts = 8;
1868                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1869                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1870                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1871                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1872                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1873                 gb_addr_config &= ~0xf3e777ff;
1874                 gb_addr_config |= 0x22014042;
1875                 break;
1876         case CHIP_RENOIR:
1877                 adev->gfx.config.max_hw_contexts = 8;
1878                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1879                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1880                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1881                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1882                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1883                 gb_addr_config &= ~0xf3e777ff;
1884                 gb_addr_config |= 0x22010042;
1885                 break;
1886         default:
1887                 BUG();
1888                 break;
1889         }
1890
1891         adev->gfx.config.gb_addr_config = gb_addr_config;
1892
1893         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1894                         REG_GET_FIELD(
1895                                         adev->gfx.config.gb_addr_config,
1896                                         GB_ADDR_CONFIG,
1897                                         NUM_PIPES);
1898
1899         adev->gfx.config.max_tile_pipes =
1900                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1901
1902         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1903                         REG_GET_FIELD(
1904                                         adev->gfx.config.gb_addr_config,
1905                                         GB_ADDR_CONFIG,
1906                                         NUM_BANKS);
1907         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1908                         REG_GET_FIELD(
1909                                         adev->gfx.config.gb_addr_config,
1910                                         GB_ADDR_CONFIG,
1911                                         MAX_COMPRESSED_FRAGS);
1912         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1913                         REG_GET_FIELD(
1914                                         adev->gfx.config.gb_addr_config,
1915                                         GB_ADDR_CONFIG,
1916                                         NUM_RB_PER_SE);
1917         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1918                         REG_GET_FIELD(
1919                                         adev->gfx.config.gb_addr_config,
1920                                         GB_ADDR_CONFIG,
1921                                         NUM_SHADER_ENGINES);
1922         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1923                         REG_GET_FIELD(
1924                                         adev->gfx.config.gb_addr_config,
1925                                         GB_ADDR_CONFIG,
1926                                         PIPE_INTERLEAVE_SIZE));
1927
1928         return 0;
1929 }
1930
1931 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1932                                    struct amdgpu_ngg_buf *ngg_buf,
1933                                    int size_se,
1934                                    int default_size_se)
1935 {
1936         int r;
1937
1938         if (size_se < 0) {
1939                 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1940                 return -EINVAL;
1941         }
1942         size_se = size_se ? size_se : default_size_se;
1943
1944         ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1945         r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1946                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1947                                     &ngg_buf->bo,
1948                                     &ngg_buf->gpu_addr,
1949                                     NULL);
1950         if (r) {
1951                 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1952                 return r;
1953         }
1954         ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1955
1956         return r;
1957 }
1958
1959 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1960 {
1961         int i;
1962
1963         for (i = 0; i < NGG_BUF_MAX; i++)
1964                 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1965                                       &adev->gfx.ngg.buf[i].gpu_addr,
1966                                       NULL);
1967
1968         memset(&adev->gfx.ngg.buf[0], 0,
1969                         sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1970
1971         adev->gfx.ngg.init = false;
1972
1973         return 0;
1974 }
1975
1976 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1977 {
1978         int r;
1979
1980         if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1981                 return 0;
1982
1983         /* GDS reserve memory: 64 bytes alignment */
1984         adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1985         adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1986         adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1987         adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1988
1989         /* Primitive Buffer */
1990         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1991                                     amdgpu_prim_buf_per_se,
1992                                     64 * 1024);
1993         if (r) {
1994                 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1995                 goto err;
1996         }
1997
1998         /* Position Buffer */
1999         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
2000                                     amdgpu_pos_buf_per_se,
2001                                     256 * 1024);
2002         if (r) {
2003                 dev_err(adev->dev, "Failed to create Position Buffer\n");
2004                 goto err;
2005         }
2006
2007         /* Control Sideband */
2008         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
2009                                     amdgpu_cntl_sb_buf_per_se,
2010                                     256);
2011         if (r) {
2012                 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
2013                 goto err;
2014         }
2015
2016         /* Parameter Cache, not created by default */
2017         if (amdgpu_param_buf_per_se <= 0)
2018                 goto out;
2019
2020         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
2021                                     amdgpu_param_buf_per_se,
2022                                     512 * 1024);
2023         if (r) {
2024                 dev_err(adev->dev, "Failed to create Parameter Cache\n");
2025                 goto err;
2026         }
2027
2028 out:
2029         adev->gfx.ngg.init = true;
2030         return 0;
2031 err:
2032         gfx_v9_0_ngg_fini(adev);
2033         return r;
2034 }
2035
2036 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
2037 {
2038         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2039         int r;
2040         u32 data, base;
2041
2042         if (!amdgpu_ngg)
2043                 return 0;
2044
2045         /* Program buffer size */
2046         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
2047                              adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
2048         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
2049                              adev->gfx.ngg.buf[NGG_POS].size >> 8);
2050         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
2051
2052         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
2053                              adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
2054         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
2055                              adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
2056         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
2057
2058         /* Program buffer base address */
2059         base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2060         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
2061         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
2062
2063         base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2064         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
2065         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
2066
2067         base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2068         data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
2069         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
2070
2071         base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2072         data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
2073         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
2074
2075         base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2076         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
2077         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
2078
2079         base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2080         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
2081         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
2082
2083         /* Clear GDS reserved memory */
2084         r = amdgpu_ring_alloc(ring, 17);
2085         if (r) {
2086                 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
2087                           ring->name, r);
2088                 return r;
2089         }
2090
2091         gfx_v9_0_write_data_to_reg(ring, 0, false,
2092                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
2093                                    (adev->gds.gds_size +
2094                                     adev->gfx.ngg.gds_reserve_size));
2095
2096         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
2097         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
2098                                 PACKET3_DMA_DATA_DST_SEL(1) |
2099                                 PACKET3_DMA_DATA_SRC_SEL(2)));
2100         amdgpu_ring_write(ring, 0);
2101         amdgpu_ring_write(ring, 0);
2102         amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
2103         amdgpu_ring_write(ring, 0);
2104         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
2105                                 adev->gfx.ngg.gds_reserve_size);
2106
2107         gfx_v9_0_write_data_to_reg(ring, 0, false,
2108                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
2109
2110         amdgpu_ring_commit(ring);
2111
2112         return 0;
2113 }
2114
2115 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2116                                       int mec, int pipe, int queue)
2117 {
2118         int r;
2119         unsigned irq_type;
2120         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2121
2122         ring = &adev->gfx.compute_ring[ring_id];
2123
2124         /* mec0 is me1 */
2125         ring->me = mec + 1;
2126         ring->pipe = pipe;
2127         ring->queue = queue;
2128
2129         ring->ring_obj = NULL;
2130         ring->use_doorbell = true;
2131         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2132         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2133                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2134         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2135
2136         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2137                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2138                 + ring->pipe;
2139
2140         /* type-2 packets are deprecated on MEC, use type-3 instead */
2141         r = amdgpu_ring_init(adev, ring, 1024,
2142                              &adev->gfx.eop_irq, irq_type);
2143         if (r)
2144                 return r;
2145
2146
2147         return 0;
2148 }
2149
2150 static int gfx_v9_0_sw_init(void *handle)
2151 {
2152         int i, j, k, r, ring_id;
2153         struct amdgpu_ring *ring;
2154         struct amdgpu_kiq *kiq;
2155         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2156
2157         switch (adev->asic_type) {
2158         case CHIP_VEGA10:
2159         case CHIP_VEGA12:
2160         case CHIP_VEGA20:
2161         case CHIP_RAVEN:
2162         case CHIP_ARCTURUS:
2163         case CHIP_RENOIR:
2164                 adev->gfx.mec.num_mec = 2;
2165                 break;
2166         default:
2167                 adev->gfx.mec.num_mec = 1;
2168                 break;
2169         }
2170
2171         adev->gfx.mec.num_pipe_per_mec = 4;
2172         adev->gfx.mec.num_queue_per_pipe = 8;
2173
2174         /* EOP Event */
2175         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2176         if (r)
2177                 return r;
2178
2179         /* Privileged reg */
2180         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2181                               &adev->gfx.priv_reg_irq);
2182         if (r)
2183                 return r;
2184
2185         /* Privileged inst */
2186         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2187                               &adev->gfx.priv_inst_irq);
2188         if (r)
2189                 return r;
2190
2191         /* ECC error */
2192         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2193                               &adev->gfx.cp_ecc_error_irq);
2194         if (r)
2195                 return r;
2196
2197         /* FUE error */
2198         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2199                               &adev->gfx.cp_ecc_error_irq);
2200         if (r)
2201                 return r;
2202
2203         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2204
2205         gfx_v9_0_scratch_init(adev);
2206
2207         r = gfx_v9_0_init_microcode(adev);
2208         if (r) {
2209                 DRM_ERROR("Failed to load gfx firmware!\n");
2210                 return r;
2211         }
2212
2213         r = adev->gfx.rlc.funcs->init(adev);
2214         if (r) {
2215                 DRM_ERROR("Failed to init rlc BOs!\n");
2216                 return r;
2217         }
2218
2219         r = gfx_v9_0_mec_init(adev);
2220         if (r) {
2221                 DRM_ERROR("Failed to init MEC BOs!\n");
2222                 return r;
2223         }
2224
2225         /* set up the gfx ring */
2226         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2227                 ring = &adev->gfx.gfx_ring[i];
2228                 ring->ring_obj = NULL;
2229                 if (!i)
2230                         sprintf(ring->name, "gfx");
2231                 else
2232                         sprintf(ring->name, "gfx_%d", i);
2233                 ring->use_doorbell = true;
2234                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2235                 r = amdgpu_ring_init(adev, ring, 1024,
2236                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2237                 if (r)
2238                         return r;
2239         }
2240
2241         /* set up the compute queues - allocate horizontally across pipes */
2242         ring_id = 0;
2243         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2244                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2245                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2246                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2247                                         continue;
2248
2249                                 r = gfx_v9_0_compute_ring_init(adev,
2250                                                                ring_id,
2251                                                                i, k, j);
2252                                 if (r)
2253                                         return r;
2254
2255                                 ring_id++;
2256                         }
2257                 }
2258         }
2259
2260         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2261         if (r) {
2262                 DRM_ERROR("Failed to init KIQ BOs!\n");
2263                 return r;
2264         }
2265
2266         kiq = &adev->gfx.kiq;
2267         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2268         if (r)
2269                 return r;
2270
2271         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2272         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2273         if (r)
2274                 return r;
2275
2276         adev->gfx.ce_ram_size = 0x8000;
2277
2278         r = gfx_v9_0_gpu_early_init(adev);
2279         if (r)
2280                 return r;
2281
2282         r = gfx_v9_0_ngg_init(adev);
2283         if (r)
2284                 return r;
2285
2286         return 0;
2287 }
2288
2289
2290 static int gfx_v9_0_sw_fini(void *handle)
2291 {
2292         int i;
2293         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2294
2295         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
2296                         adev->gfx.ras_if) {
2297                 struct ras_common_if *ras_if = adev->gfx.ras_if;
2298                 struct ras_ih_if ih_info = {
2299                         .head = *ras_if,
2300                 };
2301
2302                 amdgpu_ras_debugfs_remove(adev, ras_if);
2303                 amdgpu_ras_sysfs_remove(adev, ras_if);
2304                 amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
2305                 amdgpu_ras_feature_enable(adev, ras_if, 0);
2306                 kfree(ras_if);
2307         }
2308
2309         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2310                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2311         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2312                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2313
2314         amdgpu_gfx_mqd_sw_fini(adev);
2315         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2316         amdgpu_gfx_kiq_fini(adev);
2317
2318         gfx_v9_0_mec_fini(adev);
2319         gfx_v9_0_ngg_fini(adev);
2320         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2321         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2322                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2323                                 &adev->gfx.rlc.cp_table_gpu_addr,
2324                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2325         }
2326         gfx_v9_0_free_microcode(adev);
2327
2328         return 0;
2329 }
2330
2331
2332 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2333 {
2334         /* TODO */
2335 }
2336
2337 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2338 {
2339         u32 data;
2340
2341         if (instance == 0xffffffff)
2342                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2343         else
2344                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2345
2346         if (se_num == 0xffffffff)
2347                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2348         else
2349                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2350
2351         if (sh_num == 0xffffffff)
2352                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2353         else
2354                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2355
2356         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2357 }
2358
2359 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2360 {
2361         u32 data, mask;
2362
2363         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2364         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2365
2366         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2367         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2368
2369         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2370                                          adev->gfx.config.max_sh_per_se);
2371
2372         return (~data) & mask;
2373 }
2374
2375 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2376 {
2377         int i, j;
2378         u32 data;
2379         u32 active_rbs = 0;
2380         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2381                                         adev->gfx.config.max_sh_per_se;
2382
2383         mutex_lock(&adev->grbm_idx_mutex);
2384         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2385                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2386                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2387                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2388                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2389                                                rb_bitmap_width_per_sh);
2390                 }
2391         }
2392         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2393         mutex_unlock(&adev->grbm_idx_mutex);
2394
2395         adev->gfx.config.backend_enable_mask = active_rbs;
2396         adev->gfx.config.num_rbs = hweight32(active_rbs);
2397 }
2398
2399 #define DEFAULT_SH_MEM_BASES    (0x6000)
2400 #define FIRST_COMPUTE_VMID      (8)
2401 #define LAST_COMPUTE_VMID       (16)
2402 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2403 {
2404         int i;
2405         uint32_t sh_mem_config;
2406         uint32_t sh_mem_bases;
2407
2408         /*
2409          * Configure apertures:
2410          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2411          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2412          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2413          */
2414         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2415
2416         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2417                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2418                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2419
2420         mutex_lock(&adev->srbm_mutex);
2421         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2422                 soc15_grbm_select(adev, 0, 0, 0, i);
2423                 /* CP and shaders */
2424                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2425                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2426         }
2427         soc15_grbm_select(adev, 0, 0, 0, 0);
2428         mutex_unlock(&adev->srbm_mutex);
2429
2430         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2431            acccess. These should be enabled by FW for target VMIDs. */
2432         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2433                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2434                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2435                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2436                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2437         }
2438 }
2439
2440 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2441 {
2442         int vmid;
2443
2444         /*
2445          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2446          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2447          * the driver can enable them for graphics. VMID0 should maintain
2448          * access so that HWS firmware can save/restore entries.
2449          */
2450         for (vmid = 1; vmid < 16; vmid++) {
2451                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2452                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2453                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2454                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2455         }
2456 }
2457
2458 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2459 {
2460         u32 tmp;
2461         int i;
2462
2463         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2464
2465         gfx_v9_0_tiling_mode_table_init(adev);
2466
2467         gfx_v9_0_setup_rb(adev);
2468         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2469         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2470
2471         /* XXX SH_MEM regs */
2472         /* where to put LDS, scratch, GPUVM in FSA64 space */
2473         mutex_lock(&adev->srbm_mutex);
2474         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2475                 soc15_grbm_select(adev, 0, 0, 0, i);
2476                 /* CP and shaders */
2477                 if (i == 0) {
2478                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2479                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2480                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2481                                             !!amdgpu_noretry);
2482                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2483                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2484                 } else {
2485                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2486                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2487                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2488                                             !!amdgpu_noretry);
2489                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2490                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2491                                 (adev->gmc.private_aperture_start >> 48));
2492                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2493                                 (adev->gmc.shared_aperture_start >> 48));
2494                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2495                 }
2496         }
2497         soc15_grbm_select(adev, 0, 0, 0, 0);
2498
2499         mutex_unlock(&adev->srbm_mutex);
2500
2501         gfx_v9_0_init_compute_vmid(adev);
2502         gfx_v9_0_init_gds_vmid(adev);
2503 }
2504
2505 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2506 {
2507         u32 i, j, k;
2508         u32 mask;
2509
2510         mutex_lock(&adev->grbm_idx_mutex);
2511         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2512                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2513                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2514                         for (k = 0; k < adev->usec_timeout; k++) {
2515                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2516                                         break;
2517                                 udelay(1);
2518                         }
2519                         if (k == adev->usec_timeout) {
2520                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2521                                                       0xffffffff, 0xffffffff);
2522                                 mutex_unlock(&adev->grbm_idx_mutex);
2523                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2524                                          i, j);
2525                                 return;
2526                         }
2527                 }
2528         }
2529         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2530         mutex_unlock(&adev->grbm_idx_mutex);
2531
2532         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2533                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2534                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2535                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2536         for (k = 0; k < adev->usec_timeout; k++) {
2537                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2538                         break;
2539                 udelay(1);
2540         }
2541 }
2542
2543 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2544                                                bool enable)
2545 {
2546         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2547
2548         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2549         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2550         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2551         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2552
2553         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2554 }
2555
2556 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2557 {
2558         /* csib */
2559         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2560                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2561         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2562                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2563         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2564                         adev->gfx.rlc.clear_state_size);
2565 }
2566
2567 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2568                                 int indirect_offset,
2569                                 int list_size,
2570                                 int *unique_indirect_regs,
2571                                 int unique_indirect_reg_count,
2572                                 int *indirect_start_offsets,
2573                                 int *indirect_start_offsets_count,
2574                                 int max_start_offsets_count)
2575 {
2576         int idx;
2577
2578         for (; indirect_offset < list_size; indirect_offset++) {
2579                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2580                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2581                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2582
2583                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2584                         indirect_offset += 2;
2585
2586                         /* look for the matching indice */
2587                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2588                                 if (unique_indirect_regs[idx] ==
2589                                         register_list_format[indirect_offset] ||
2590                                         !unique_indirect_regs[idx])
2591                                         break;
2592                         }
2593
2594                         BUG_ON(idx >= unique_indirect_reg_count);
2595
2596                         if (!unique_indirect_regs[idx])
2597                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2598
2599                         indirect_offset++;
2600                 }
2601         }
2602 }
2603
2604 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2605 {
2606         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2607         int unique_indirect_reg_count = 0;
2608
2609         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2610         int indirect_start_offsets_count = 0;
2611
2612         int list_size = 0;
2613         int i = 0, j = 0;
2614         u32 tmp = 0;
2615
2616         u32 *register_list_format =
2617                 kmemdup(adev->gfx.rlc.register_list_format,
2618                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2619         if (!register_list_format)
2620                 return -ENOMEM;
2621
2622         /* setup unique_indirect_regs array and indirect_start_offsets array */
2623         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2624         gfx_v9_1_parse_ind_reg_list(register_list_format,
2625                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2626                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2627                                     unique_indirect_regs,
2628                                     unique_indirect_reg_count,
2629                                     indirect_start_offsets,
2630                                     &indirect_start_offsets_count,
2631                                     ARRAY_SIZE(indirect_start_offsets));
2632
2633         /* enable auto inc in case it is disabled */
2634         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2635         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2636         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2637
2638         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2639         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2640                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2641         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2642                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2643                         adev->gfx.rlc.register_restore[i]);
2644
2645         /* load indirect register */
2646         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2647                 adev->gfx.rlc.reg_list_format_start);
2648
2649         /* direct register portion */
2650         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2651                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2652                         register_list_format[i]);
2653
2654         /* indirect register portion */
2655         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2656                 if (register_list_format[i] == 0xFFFFFFFF) {
2657                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2658                         continue;
2659                 }
2660
2661                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2662                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2663
2664                 for (j = 0; j < unique_indirect_reg_count; j++) {
2665                         if (register_list_format[i] == unique_indirect_regs[j]) {
2666                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2667                                 break;
2668                         }
2669                 }
2670
2671                 BUG_ON(j >= unique_indirect_reg_count);
2672
2673                 i++;
2674         }
2675
2676         /* set save/restore list size */
2677         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2678         list_size = list_size >> 1;
2679         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2680                 adev->gfx.rlc.reg_restore_list_size);
2681         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2682
2683         /* write the starting offsets to RLC scratch ram */
2684         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2685                 adev->gfx.rlc.starting_offsets_start);
2686         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2687                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2688                        indirect_start_offsets[i]);
2689
2690         /* load unique indirect regs*/
2691         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2692                 if (unique_indirect_regs[i] != 0) {
2693                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2694                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2695                                unique_indirect_regs[i] & 0x3FFFF);
2696
2697                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2698                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2699                                unique_indirect_regs[i] >> 20);
2700                 }
2701         }
2702
2703         kfree(register_list_format);
2704         return 0;
2705 }
2706
2707 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2708 {
2709         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2710 }
2711
2712 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2713                                              bool enable)
2714 {
2715         uint32_t data = 0;
2716         uint32_t default_data = 0;
2717
2718         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2719         if (enable == true) {
2720                 /* enable GFXIP control over CGPG */
2721                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2722                 if(default_data != data)
2723                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2724
2725                 /* update status */
2726                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2727                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2728                 if(default_data != data)
2729                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2730         } else {
2731                 /* restore GFXIP control over GCPG */
2732                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2733                 if(default_data != data)
2734                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2735         }
2736 }
2737
2738 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2739 {
2740         uint32_t data = 0;
2741
2742         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2743                               AMD_PG_SUPPORT_GFX_SMG |
2744                               AMD_PG_SUPPORT_GFX_DMG)) {
2745                 /* init IDLE_POLL_COUNT = 60 */
2746                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2747                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2748                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2749                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2750
2751                 /* init RLC PG Delay */
2752                 data = 0;
2753                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2754                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2755                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2756                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2757                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2758
2759                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2760                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2761                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2762                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2763
2764                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2765                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2766                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2767                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2768
2769                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2770                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2771
2772                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2773                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2774                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2775
2776                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2777         }
2778 }
2779
2780 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2781                                                 bool enable)
2782 {
2783         uint32_t data = 0;
2784         uint32_t default_data = 0;
2785
2786         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2787         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2788                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2789                              enable ? 1 : 0);
2790         if (default_data != data)
2791                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2792 }
2793
2794 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2795                                                 bool enable)
2796 {
2797         uint32_t data = 0;
2798         uint32_t default_data = 0;
2799
2800         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2801         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2802                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2803                              enable ? 1 : 0);
2804         if(default_data != data)
2805                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2806 }
2807
2808 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2809                                         bool enable)
2810 {
2811         uint32_t data = 0;
2812         uint32_t default_data = 0;
2813
2814         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2815         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2816                              CP_PG_DISABLE,
2817                              enable ? 0 : 1);
2818         if(default_data != data)
2819                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2820 }
2821
2822 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2823                                                 bool enable)
2824 {
2825         uint32_t data, default_data;
2826
2827         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2828         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2829                              GFX_POWER_GATING_ENABLE,
2830                              enable ? 1 : 0);
2831         if(default_data != data)
2832                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2833 }
2834
2835 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2836                                                 bool enable)
2837 {
2838         uint32_t data, default_data;
2839
2840         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2841         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2842                              GFX_PIPELINE_PG_ENABLE,
2843                              enable ? 1 : 0);
2844         if(default_data != data)
2845                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2846
2847         if (!enable)
2848                 /* read any GFX register to wake up GFX */
2849                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2850 }
2851
2852 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2853                                                        bool enable)
2854 {
2855         uint32_t data, default_data;
2856
2857         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2858         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2859                              STATIC_PER_CU_PG_ENABLE,
2860                              enable ? 1 : 0);
2861         if(default_data != data)
2862                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2863 }
2864
2865 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2866                                                 bool enable)
2867 {
2868         uint32_t data, default_data;
2869
2870         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2871         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2872                              DYN_PER_CU_PG_ENABLE,
2873                              enable ? 1 : 0);
2874         if(default_data != data)
2875                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2876 }
2877
2878 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2879 {
2880         gfx_v9_0_init_csb(adev);
2881
2882         /*
2883          * Rlc save restore list is workable since v2_1.
2884          * And it's needed by gfxoff feature.
2885          */
2886         if (adev->gfx.rlc.is_rlc_v2_1) {
2887                 gfx_v9_1_init_rlc_save_restore_list(adev);
2888                 gfx_v9_0_enable_save_restore_machine(adev);
2889         }
2890
2891         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2892                               AMD_PG_SUPPORT_GFX_SMG |
2893                               AMD_PG_SUPPORT_GFX_DMG |
2894                               AMD_PG_SUPPORT_CP |
2895                               AMD_PG_SUPPORT_GDS |
2896                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2897                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2898                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2899                 gfx_v9_0_init_gfx_power_gating(adev);
2900         }
2901 }
2902
2903 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2904 {
2905         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2906         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2907         gfx_v9_0_wait_for_rlc_serdes(adev);
2908 }
2909
2910 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2911 {
2912         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2913         udelay(50);
2914         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2915         udelay(50);
2916 }
2917
2918 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2919 {
2920 #ifdef AMDGPU_RLC_DEBUG_RETRY
2921         u32 rlc_ucode_ver;
2922 #endif
2923
2924         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2925         udelay(50);
2926
2927         /* carrizo do enable cp interrupt after cp inited */
2928         if (!(adev->flags & AMD_IS_APU)) {
2929                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2930                 udelay(50);
2931         }
2932
2933 #ifdef AMDGPU_RLC_DEBUG_RETRY
2934         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2935         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2936         if(rlc_ucode_ver == 0x108) {
2937                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2938                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2939                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2940                  * default is 0x9C4 to create a 100us interval */
2941                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2942                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2943                  * to disable the page fault retry interrupts, default is
2944                  * 0x100 (256) */
2945                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2946         }
2947 #endif
2948 }
2949
2950 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2951 {
2952         const struct rlc_firmware_header_v2_0 *hdr;
2953         const __le32 *fw_data;
2954         unsigned i, fw_size;
2955
2956         if (!adev->gfx.rlc_fw)
2957                 return -EINVAL;
2958
2959         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2960         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2961
2962         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2963                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2964         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2965
2966         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2967                         RLCG_UCODE_LOADING_START_ADDRESS);
2968         for (i = 0; i < fw_size; i++)
2969                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2970         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2971
2972         return 0;
2973 }
2974
2975 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2976 {
2977         int r;
2978
2979         if (amdgpu_sriov_vf(adev)) {
2980                 gfx_v9_0_init_csb(adev);
2981                 return 0;
2982         }
2983
2984         adev->gfx.rlc.funcs->stop(adev);
2985
2986         /* disable CG */
2987         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2988
2989         gfx_v9_0_init_pg(adev);
2990
2991         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2992                 /* legacy rlc firmware loading */
2993                 r = gfx_v9_0_rlc_load_microcode(adev);
2994                 if (r)
2995                         return r;
2996         }
2997
2998         switch (adev->asic_type) {
2999         case CHIP_RAVEN:
3000         case CHIP_RENOIR:
3001                 if (amdgpu_lbpw == 0)
3002                         gfx_v9_0_enable_lbpw(adev, false);
3003                 else
3004                         gfx_v9_0_enable_lbpw(adev, true);
3005                 break;
3006         case CHIP_VEGA20:
3007                 if (amdgpu_lbpw > 0)
3008                         gfx_v9_0_enable_lbpw(adev, true);
3009                 else
3010                         gfx_v9_0_enable_lbpw(adev, false);
3011                 break;
3012         default:
3013                 break;
3014         }
3015
3016         adev->gfx.rlc.funcs->start(adev);
3017
3018         return 0;
3019 }
3020
3021 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3022 {
3023         int i;
3024         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3025
3026         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3027         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3028         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3029         if (!enable) {
3030                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3031                         adev->gfx.gfx_ring[i].sched.ready = false;
3032         }
3033         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3034         udelay(50);
3035 }
3036
3037 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3038 {
3039         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3040         const struct gfx_firmware_header_v1_0 *ce_hdr;
3041         const struct gfx_firmware_header_v1_0 *me_hdr;
3042         const __le32 *fw_data;
3043         unsigned i, fw_size;
3044
3045         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3046                 return -EINVAL;
3047
3048         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3049                 adev->gfx.pfp_fw->data;
3050         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3051                 adev->gfx.ce_fw->data;
3052         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3053                 adev->gfx.me_fw->data;
3054
3055         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3056         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3057         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3058
3059         gfx_v9_0_cp_gfx_enable(adev, false);
3060
3061         /* PFP */
3062         fw_data = (const __le32 *)
3063                 (adev->gfx.pfp_fw->data +
3064                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3065         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3066         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3067         for (i = 0; i < fw_size; i++)
3068                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3069         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3070
3071         /* CE */
3072         fw_data = (const __le32 *)
3073                 (adev->gfx.ce_fw->data +
3074                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3075         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3076         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3077         for (i = 0; i < fw_size; i++)
3078                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3079         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3080
3081         /* ME */
3082         fw_data = (const __le32 *)
3083                 (adev->gfx.me_fw->data +
3084                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3085         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3086         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3087         for (i = 0; i < fw_size; i++)
3088                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3089         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3090
3091         return 0;
3092 }
3093
3094 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3095 {
3096         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3097         const struct cs_section_def *sect = NULL;
3098         const struct cs_extent_def *ext = NULL;
3099         int r, i, tmp;
3100
3101         /* init the CP */
3102         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3103         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3104
3105         gfx_v9_0_cp_gfx_enable(adev, true);
3106
3107         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3108         if (r) {
3109                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3110                 return r;
3111         }
3112
3113         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3114         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3115
3116         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3117         amdgpu_ring_write(ring, 0x80000000);
3118         amdgpu_ring_write(ring, 0x80000000);
3119
3120         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3121                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3122                         if (sect->id == SECT_CONTEXT) {
3123                                 amdgpu_ring_write(ring,
3124                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3125                                                ext->reg_count));
3126                                 amdgpu_ring_write(ring,
3127                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3128                                 for (i = 0; i < ext->reg_count; i++)
3129                                         amdgpu_ring_write(ring, ext->extent[i]);
3130                         }
3131                 }
3132         }
3133
3134         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3135         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3136
3137         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3138         amdgpu_ring_write(ring, 0);
3139
3140         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3141         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3142         amdgpu_ring_write(ring, 0x8000);
3143         amdgpu_ring_write(ring, 0x8000);
3144
3145         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3146         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3147                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3148         amdgpu_ring_write(ring, tmp);
3149         amdgpu_ring_write(ring, 0);
3150
3151         amdgpu_ring_commit(ring);
3152
3153         return 0;
3154 }
3155
3156 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3157 {
3158         struct amdgpu_ring *ring;
3159         u32 tmp;
3160         u32 rb_bufsz;
3161         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3162
3163         /* Set the write pointer delay */
3164         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3165
3166         /* set the RB to use vmid 0 */
3167         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3168
3169         /* Set ring buffer size */
3170         ring = &adev->gfx.gfx_ring[0];
3171         rb_bufsz = order_base_2(ring->ring_size / 8);
3172         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3173         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3174 #ifdef __BIG_ENDIAN
3175         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3176 #endif
3177         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3178
3179         /* Initialize the ring buffer's write pointers */
3180         ring->wptr = 0;
3181         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3182         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3183
3184         /* set the wb address wether it's enabled or not */
3185         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3186         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3187         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3188
3189         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3190         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3191         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3192
3193         mdelay(1);
3194         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3195
3196         rb_addr = ring->gpu_addr >> 8;
3197         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3198         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3199
3200         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3201         if (ring->use_doorbell) {
3202                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3203                                     DOORBELL_OFFSET, ring->doorbell_index);
3204                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3205                                     DOORBELL_EN, 1);
3206         } else {
3207                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3208         }
3209         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3210
3211         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3212                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3213         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3214
3215         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3216                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3217
3218
3219         /* start the ring */
3220         gfx_v9_0_cp_gfx_start(adev);
3221         ring->sched.ready = true;
3222
3223         return 0;
3224 }
3225
3226 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3227 {
3228         int i;
3229
3230         if (enable) {
3231                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3232         } else {
3233                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3234                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3235                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3236                         adev->gfx.compute_ring[i].sched.ready = false;
3237                 adev->gfx.kiq.ring.sched.ready = false;
3238         }
3239         udelay(50);
3240 }
3241
3242 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3243 {
3244         const struct gfx_firmware_header_v1_0 *mec_hdr;
3245         const __le32 *fw_data;
3246         unsigned i;
3247         u32 tmp;
3248
3249         if (!adev->gfx.mec_fw)
3250                 return -EINVAL;
3251
3252         gfx_v9_0_cp_compute_enable(adev, false);
3253
3254         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3255         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3256
3257         fw_data = (const __le32 *)
3258                 (adev->gfx.mec_fw->data +
3259                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3260         tmp = 0;
3261         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3262         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3263         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3264
3265         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3266                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3267         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3268                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3269
3270         /* MEC1 */
3271         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3272                          mec_hdr->jt_offset);
3273         for (i = 0; i < mec_hdr->jt_size; i++)
3274                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3275                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3276
3277         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3278                         adev->gfx.mec_fw_version);
3279         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3280
3281         return 0;
3282 }
3283
3284 /* KIQ functions */
3285 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3286 {
3287         uint32_t tmp;
3288         struct amdgpu_device *adev = ring->adev;
3289
3290         /* tell RLC which is KIQ queue */
3291         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3292         tmp &= 0xffffff00;
3293         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3294         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3295         tmp |= 0x80;
3296         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3297 }
3298
3299 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3300 {
3301         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3302         uint64_t queue_mask = 0;
3303         int r, i;
3304
3305         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3306                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3307                         continue;
3308
3309                 /* This situation may be hit in the future if a new HW
3310                  * generation exposes more than 64 queues. If so, the
3311                  * definition of queue_mask needs updating */
3312                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3313                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3314                         break;
3315                 }
3316
3317                 queue_mask |= (1ull << i);
3318         }
3319
3320         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3321         if (r) {
3322                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3323                 return r;
3324         }
3325
3326         /* set resources */
3327         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3328         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3329                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
3330         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
3331         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
3332         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
3333         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
3334         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
3335         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
3336         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3337                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3338                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3339                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3340
3341                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3342                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3343                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3344                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3345                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3346                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3347                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3348                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3349                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3350                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3351                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3352                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3353                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3354                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3355                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3356                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3357                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3358         }
3359
3360         r = amdgpu_ring_test_helper(kiq_ring);
3361         if (r)
3362                 DRM_ERROR("KCQ enable failed\n");
3363
3364         return r;
3365 }
3366
3367 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3368 {
3369         struct amdgpu_device *adev = ring->adev;
3370         struct v9_mqd *mqd = ring->mqd_ptr;
3371         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3372         uint32_t tmp;
3373
3374         mqd->header = 0xC0310800;
3375         mqd->compute_pipelinestat_enable = 0x00000001;
3376         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3377         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3378         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3379         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3380         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3381         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3382         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3383         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3384         mqd->compute_misc_reserved = 0x00000003;
3385
3386         mqd->dynamic_cu_mask_addr_lo =
3387                 lower_32_bits(ring->mqd_gpu_addr
3388                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3389         mqd->dynamic_cu_mask_addr_hi =
3390                 upper_32_bits(ring->mqd_gpu_addr
3391                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3392
3393         eop_base_addr = ring->eop_gpu_addr >> 8;
3394         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3395         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3396
3397         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3398         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3399         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3400                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3401
3402         mqd->cp_hqd_eop_control = tmp;
3403
3404         /* enable doorbell? */
3405         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3406
3407         if (ring->use_doorbell) {
3408                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3409                                     DOORBELL_OFFSET, ring->doorbell_index);
3410                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3411                                     DOORBELL_EN, 1);
3412                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3413                                     DOORBELL_SOURCE, 0);
3414                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3415                                     DOORBELL_HIT, 0);
3416         } else {
3417                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3418                                          DOORBELL_EN, 0);
3419         }
3420
3421         mqd->cp_hqd_pq_doorbell_control = tmp;
3422
3423         /* disable the queue if it's active */
3424         ring->wptr = 0;
3425         mqd->cp_hqd_dequeue_request = 0;
3426         mqd->cp_hqd_pq_rptr = 0;
3427         mqd->cp_hqd_pq_wptr_lo = 0;
3428         mqd->cp_hqd_pq_wptr_hi = 0;
3429
3430         /* set the pointer to the MQD */
3431         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3432         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3433
3434         /* set MQD vmid to 0 */
3435         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3436         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3437         mqd->cp_mqd_control = tmp;
3438
3439         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3440         hqd_gpu_addr = ring->gpu_addr >> 8;
3441         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3442         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3443
3444         /* set up the HQD, this is similar to CP_RB0_CNTL */
3445         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3446         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3447                             (order_base_2(ring->ring_size / 4) - 1));
3448         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3449                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3450 #ifdef __BIG_ENDIAN
3451         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3452 #endif
3453         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3454         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3455         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3456         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3457         mqd->cp_hqd_pq_control = tmp;
3458
3459         /* set the wb address whether it's enabled or not */
3460         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3461         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3462         mqd->cp_hqd_pq_rptr_report_addr_hi =
3463                 upper_32_bits(wb_gpu_addr) & 0xffff;
3464
3465         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3466         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3467         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3468         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3469
3470         tmp = 0;
3471         /* enable the doorbell if requested */
3472         if (ring->use_doorbell) {
3473                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3474                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3475                                 DOORBELL_OFFSET, ring->doorbell_index);
3476
3477                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3478                                          DOORBELL_EN, 1);
3479                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3480                                          DOORBELL_SOURCE, 0);
3481                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3482                                          DOORBELL_HIT, 0);
3483         }
3484
3485         mqd->cp_hqd_pq_doorbell_control = tmp;
3486
3487         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3488         ring->wptr = 0;
3489         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3490
3491         /* set the vmid for the queue */
3492         mqd->cp_hqd_vmid = 0;
3493
3494         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3495         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3496         mqd->cp_hqd_persistent_state = tmp;
3497
3498         /* set MIN_IB_AVAIL_SIZE */
3499         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3500         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3501         mqd->cp_hqd_ib_control = tmp;
3502
3503         /* activate the queue */
3504         mqd->cp_hqd_active = 1;
3505
3506         return 0;
3507 }
3508
3509 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3510 {
3511         struct amdgpu_device *adev = ring->adev;
3512         struct v9_mqd *mqd = ring->mqd_ptr;
3513         int j;
3514
3515         /* disable wptr polling */
3516         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3517
3518         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3519                mqd->cp_hqd_eop_base_addr_lo);
3520         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3521                mqd->cp_hqd_eop_base_addr_hi);
3522
3523         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3524         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3525                mqd->cp_hqd_eop_control);
3526
3527         /* enable doorbell? */
3528         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3529                mqd->cp_hqd_pq_doorbell_control);
3530
3531         /* disable the queue if it's active */
3532         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3533                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3534                 for (j = 0; j < adev->usec_timeout; j++) {
3535                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3536                                 break;
3537                         udelay(1);
3538                 }
3539                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3540                        mqd->cp_hqd_dequeue_request);
3541                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3542                        mqd->cp_hqd_pq_rptr);
3543                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3544                        mqd->cp_hqd_pq_wptr_lo);
3545                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3546                        mqd->cp_hqd_pq_wptr_hi);
3547         }
3548
3549         /* set the pointer to the MQD */
3550         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3551                mqd->cp_mqd_base_addr_lo);
3552         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3553                mqd->cp_mqd_base_addr_hi);
3554
3555         /* set MQD vmid to 0 */
3556         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3557                mqd->cp_mqd_control);
3558
3559         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3560         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3561                mqd->cp_hqd_pq_base_lo);
3562         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3563                mqd->cp_hqd_pq_base_hi);
3564
3565         /* set up the HQD, this is similar to CP_RB0_CNTL */
3566         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3567                mqd->cp_hqd_pq_control);
3568
3569         /* set the wb address whether it's enabled or not */
3570         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3571                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3572         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3573                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3574
3575         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3576         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3577                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3578         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3579                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3580
3581         /* enable the doorbell if requested */
3582         if (ring->use_doorbell) {
3583                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3584                                         (adev->doorbell_index.kiq * 2) << 2);
3585                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3586                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3587         }
3588
3589         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3590                mqd->cp_hqd_pq_doorbell_control);
3591
3592         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3593         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3594                mqd->cp_hqd_pq_wptr_lo);
3595         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3596                mqd->cp_hqd_pq_wptr_hi);
3597
3598         /* set the vmid for the queue */
3599         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3600
3601         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3602                mqd->cp_hqd_persistent_state);
3603
3604         /* activate the queue */
3605         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3606                mqd->cp_hqd_active);
3607
3608         if (ring->use_doorbell)
3609                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3610
3611         return 0;
3612 }
3613
3614 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3615 {
3616         struct amdgpu_device *adev = ring->adev;
3617         int j;
3618
3619         /* disable the queue if it's active */
3620         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3621
3622                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3623
3624                 for (j = 0; j < adev->usec_timeout; j++) {
3625                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3626                                 break;
3627                         udelay(1);
3628                 }
3629
3630                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3631                         DRM_DEBUG("KIQ dequeue request failed.\n");
3632
3633                         /* Manual disable if dequeue request times out */
3634                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3635                 }
3636
3637                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3638                       0);
3639         }
3640
3641         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3642         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3643         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3644         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3645         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3646         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3647         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3648         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3649
3650         return 0;
3651 }
3652
3653 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3654 {
3655         struct amdgpu_device *adev = ring->adev;
3656         struct v9_mqd *mqd = ring->mqd_ptr;
3657         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3658
3659         gfx_v9_0_kiq_setting(ring);
3660
3661         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3662                 /* reset MQD to a clean status */
3663                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3664                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3665
3666                 /* reset ring buffer */
3667                 ring->wptr = 0;
3668                 amdgpu_ring_clear_ring(ring);
3669
3670                 mutex_lock(&adev->srbm_mutex);
3671                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3672                 gfx_v9_0_kiq_init_register(ring);
3673                 soc15_grbm_select(adev, 0, 0, 0, 0);
3674                 mutex_unlock(&adev->srbm_mutex);
3675         } else {
3676                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3677                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3678                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3679                 mutex_lock(&adev->srbm_mutex);
3680                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3681                 gfx_v9_0_mqd_init(ring);
3682                 gfx_v9_0_kiq_init_register(ring);
3683                 soc15_grbm_select(adev, 0, 0, 0, 0);
3684                 mutex_unlock(&adev->srbm_mutex);
3685
3686                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3687                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3688         }
3689
3690         return 0;
3691 }
3692
3693 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3694 {
3695         struct amdgpu_device *adev = ring->adev;
3696         struct v9_mqd *mqd = ring->mqd_ptr;
3697         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3698
3699         if (!adev->in_gpu_reset && !adev->in_suspend) {
3700                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3701                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3702                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3703                 mutex_lock(&adev->srbm_mutex);
3704                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3705                 gfx_v9_0_mqd_init(ring);
3706                 soc15_grbm_select(adev, 0, 0, 0, 0);
3707                 mutex_unlock(&adev->srbm_mutex);
3708
3709                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3710                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3711         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3712                 /* reset MQD to a clean status */
3713                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3714                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3715
3716                 /* reset ring buffer */
3717                 ring->wptr = 0;
3718                 amdgpu_ring_clear_ring(ring);
3719         } else {
3720                 amdgpu_ring_clear_ring(ring);
3721         }
3722
3723         return 0;
3724 }
3725
3726 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3727 {
3728         struct amdgpu_ring *ring;
3729         int r;
3730
3731         ring = &adev->gfx.kiq.ring;
3732
3733         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3734         if (unlikely(r != 0))
3735                 return r;
3736
3737         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3738         if (unlikely(r != 0))
3739                 return r;
3740
3741         gfx_v9_0_kiq_init_queue(ring);
3742         amdgpu_bo_kunmap(ring->mqd_obj);
3743         ring->mqd_ptr = NULL;
3744         amdgpu_bo_unreserve(ring->mqd_obj);
3745         ring->sched.ready = true;
3746         return 0;
3747 }
3748
3749 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3750 {
3751         struct amdgpu_ring *ring = NULL;
3752         int r = 0, i;
3753
3754         gfx_v9_0_cp_compute_enable(adev, true);
3755
3756         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3757                 ring = &adev->gfx.compute_ring[i];
3758
3759                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3760                 if (unlikely(r != 0))
3761                         goto done;
3762                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3763                 if (!r) {
3764                         r = gfx_v9_0_kcq_init_queue(ring);
3765                         amdgpu_bo_kunmap(ring->mqd_obj);
3766                         ring->mqd_ptr = NULL;
3767                 }
3768                 amdgpu_bo_unreserve(ring->mqd_obj);
3769                 if (r)
3770                         goto done;
3771         }
3772
3773         r = gfx_v9_0_kiq_kcq_enable(adev);
3774 done:
3775         return r;
3776 }
3777
3778 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3779 {
3780         int r, i;
3781         struct amdgpu_ring *ring;
3782
3783         if (!(adev->flags & AMD_IS_APU))
3784                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3785
3786         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3787                 if (adev->asic_type != CHIP_ARCTURUS) {
3788                         /* legacy firmware loading */
3789                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3790                         if (r)
3791                                 return r;
3792                 }
3793
3794                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3795                 if (r)
3796                         return r;
3797         }
3798
3799         r = gfx_v9_0_kiq_resume(adev);
3800         if (r)
3801                 return r;
3802
3803         if (adev->asic_type != CHIP_ARCTURUS) {
3804                 r = gfx_v9_0_cp_gfx_resume(adev);
3805                 if (r)
3806                         return r;
3807         }
3808
3809         r = gfx_v9_0_kcq_resume(adev);
3810         if (r)
3811                 return r;
3812
3813         if (adev->asic_type != CHIP_ARCTURUS) {
3814                 ring = &adev->gfx.gfx_ring[0];
3815                 r = amdgpu_ring_test_helper(ring);
3816                 if (r)
3817                         return r;
3818         }
3819
3820         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3821                 ring = &adev->gfx.compute_ring[i];
3822                 amdgpu_ring_test_helper(ring);
3823         }
3824
3825         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3826
3827         return 0;
3828 }
3829
3830 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3831 {
3832         if (adev->asic_type != CHIP_ARCTURUS)
3833                 gfx_v9_0_cp_gfx_enable(adev, enable);
3834         gfx_v9_0_cp_compute_enable(adev, enable);
3835 }
3836
3837 static int gfx_v9_0_hw_init(void *handle)
3838 {
3839         int r;
3840         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3841
3842         if (!amdgpu_sriov_vf(adev))
3843                 gfx_v9_0_init_golden_registers(adev);
3844
3845         gfx_v9_0_constants_init(adev);
3846
3847         r = gfx_v9_0_csb_vram_pin(adev);
3848         if (r)
3849                 return r;
3850
3851         r = adev->gfx.rlc.funcs->resume(adev);
3852         if (r)
3853                 return r;
3854
3855         r = gfx_v9_0_cp_resume(adev);
3856         if (r)
3857                 return r;
3858
3859         if (adev->asic_type != CHIP_ARCTURUS) {
3860                 r = gfx_v9_0_ngg_en(adev);
3861                 if (r)
3862                         return r;
3863         }
3864
3865         return r;
3866 }
3867
3868 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3869 {
3870         int r, i;
3871         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3872
3873         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3874         if (r)
3875                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3876
3877         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3878                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3879
3880                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3881                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3882                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3883                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3884                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3885                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3886                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3887                 amdgpu_ring_write(kiq_ring, 0);
3888                 amdgpu_ring_write(kiq_ring, 0);
3889                 amdgpu_ring_write(kiq_ring, 0);
3890         }
3891         r = amdgpu_ring_test_helper(kiq_ring);
3892         if (r)
3893                 DRM_ERROR("KCQ disable failed\n");
3894
3895         return r;
3896 }
3897
3898 static int gfx_v9_0_hw_fini(void *handle)
3899 {
3900         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3901
3902         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3903         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3904         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3905
3906         /* disable KCQ to avoid CPC touch memory not valid anymore */
3907         gfx_v9_0_kcq_disable(adev);
3908
3909         if (amdgpu_sriov_vf(adev)) {
3910                 gfx_v9_0_cp_gfx_enable(adev, false);
3911                 /* must disable polling for SRIOV when hw finished, otherwise
3912                  * CPC engine may still keep fetching WB address which is already
3913                  * invalid after sw finished and trigger DMAR reading error in
3914                  * hypervisor side.
3915                  */
3916                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3917                 return 0;
3918         }
3919
3920         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3921          * otherwise KIQ is hanging when binding back
3922          */
3923         if (!adev->in_gpu_reset && !adev->in_suspend) {
3924                 mutex_lock(&adev->srbm_mutex);
3925                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3926                                 adev->gfx.kiq.ring.pipe,
3927                                 adev->gfx.kiq.ring.queue, 0);
3928                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3929                 soc15_grbm_select(adev, 0, 0, 0, 0);
3930                 mutex_unlock(&adev->srbm_mutex);
3931         }
3932
3933         gfx_v9_0_cp_enable(adev, false);
3934         adev->gfx.rlc.funcs->stop(adev);
3935
3936         gfx_v9_0_csb_vram_unpin(adev);
3937
3938         return 0;
3939 }
3940
3941 static int gfx_v9_0_suspend(void *handle)
3942 {
3943         return gfx_v9_0_hw_fini(handle);
3944 }
3945
3946 static int gfx_v9_0_resume(void *handle)
3947 {
3948         return gfx_v9_0_hw_init(handle);
3949 }
3950
3951 static bool gfx_v9_0_is_idle(void *handle)
3952 {
3953         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3954
3955         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3956                                 GRBM_STATUS, GUI_ACTIVE))
3957                 return false;
3958         else
3959                 return true;
3960 }
3961
3962 static int gfx_v9_0_wait_for_idle(void *handle)
3963 {
3964         unsigned i;
3965         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3966
3967         for (i = 0; i < adev->usec_timeout; i++) {
3968                 if (gfx_v9_0_is_idle(handle))
3969                         return 0;
3970                 udelay(1);
3971         }
3972         return -ETIMEDOUT;
3973 }
3974
3975 static int gfx_v9_0_soft_reset(void *handle)
3976 {
3977         u32 grbm_soft_reset = 0;
3978         u32 tmp;
3979         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3980
3981         /* GRBM_STATUS */
3982         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3983         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3984                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3985                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3986                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3987                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3988                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3989                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3990                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3991                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3992                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3993         }
3994
3995         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3996                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3997                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3998         }
3999
4000         /* GRBM_STATUS2 */
4001         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4002         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4003                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4004                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4005
4006
4007         if (grbm_soft_reset) {
4008                 /* stop the rlc */
4009                 adev->gfx.rlc.funcs->stop(adev);
4010
4011                 if (adev->asic_type != CHIP_ARCTURUS)
4012                         /* Disable GFX parsing/prefetching */
4013                         gfx_v9_0_cp_gfx_enable(adev, false);
4014
4015                 /* Disable MEC parsing/prefetching */
4016                 gfx_v9_0_cp_compute_enable(adev, false);
4017
4018                 if (grbm_soft_reset) {
4019                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4020                         tmp |= grbm_soft_reset;
4021                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4022                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4023                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4024
4025                         udelay(50);
4026
4027                         tmp &= ~grbm_soft_reset;
4028                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4029                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4030                 }
4031
4032                 /* Wait a little for things to settle down */
4033                 udelay(50);
4034         }
4035         return 0;
4036 }
4037
4038 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4039 {
4040         uint64_t clock;
4041
4042         mutex_lock(&adev->gfx.gpu_clock_mutex);
4043         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4044         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4045                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4046         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4047         return clock;
4048 }
4049
4050 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4051                                           uint32_t vmid,
4052                                           uint32_t gds_base, uint32_t gds_size,
4053                                           uint32_t gws_base, uint32_t gws_size,
4054                                           uint32_t oa_base, uint32_t oa_size)
4055 {
4056         struct amdgpu_device *adev = ring->adev;
4057
4058         /* GDS Base */
4059         gfx_v9_0_write_data_to_reg(ring, 0, false,
4060                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4061                                    gds_base);
4062
4063         /* GDS Size */
4064         gfx_v9_0_write_data_to_reg(ring, 0, false,
4065                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4066                                    gds_size);
4067
4068         /* GWS */
4069         gfx_v9_0_write_data_to_reg(ring, 0, false,
4070                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4071                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4072
4073         /* OA */
4074         gfx_v9_0_write_data_to_reg(ring, 0, false,
4075                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4076                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4077 }
4078
4079 static const u32 vgpr_init_compute_shader[] =
4080 {
4081         0xb07c0000, 0xbe8000ff,
4082         0x000000f8, 0xbf110800,
4083         0x7e000280, 0x7e020280,
4084         0x7e040280, 0x7e060280,
4085         0x7e080280, 0x7e0a0280,
4086         0x7e0c0280, 0x7e0e0280,
4087         0x80808800, 0xbe803200,
4088         0xbf84fff5, 0xbf9c0000,
4089         0xd28c0001, 0x0001007f,
4090         0xd28d0001, 0x0002027e,
4091         0x10020288, 0xb8810904,
4092         0xb7814000, 0xd1196a01,
4093         0x00000301, 0xbe800087,
4094         0xbefc00c1, 0xd89c4000,
4095         0x00020201, 0xd89cc080,
4096         0x00040401, 0x320202ff,
4097         0x00000800, 0x80808100,
4098         0xbf84fff8, 0x7e020280,
4099         0xbf810000, 0x00000000,
4100 };
4101
4102 static const u32 sgpr_init_compute_shader[] =
4103 {
4104         0xb07c0000, 0xbe8000ff,
4105         0x0000005f, 0xbee50080,
4106         0xbe812c65, 0xbe822c65,
4107         0xbe832c65, 0xbe842c65,
4108         0xbe852c65, 0xb77c0005,
4109         0x80808500, 0xbf84fff8,
4110         0xbe800080, 0xbf810000,
4111 };
4112
4113 static const struct soc15_reg_entry vgpr_init_regs[] = {
4114    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4115    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4116    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4117    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4118    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4119    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4120    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4121    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4122    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
4123    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4124 };
4125
4126 static const struct soc15_reg_entry sgpr_init_regs[] = {
4127    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4128    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4129    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4130    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4131    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4132    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4133    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4134    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4135    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4136    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4137 };
4138
4139 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4140    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4141    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4142    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4143    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4144    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4145    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4146    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4147    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4148    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4149    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4150    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4151    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4152    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4153    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4154    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4155    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4156    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4157    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4158    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4159    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4160    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4161    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4162    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4163    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4164    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4165    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4166    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4167    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4168    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4169    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4170    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4171    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4172 };
4173
4174 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4175 {
4176         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4177         int i, r;
4178
4179         r = amdgpu_ring_alloc(ring, 7);
4180         if (r) {
4181                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4182                         ring->name, r);
4183                 return r;
4184         }
4185
4186         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4187         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4188
4189         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4190         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4191                                 PACKET3_DMA_DATA_DST_SEL(1) |
4192                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4193                                 PACKET3_DMA_DATA_ENGINE(0)));
4194         amdgpu_ring_write(ring, 0);
4195         amdgpu_ring_write(ring, 0);
4196         amdgpu_ring_write(ring, 0);
4197         amdgpu_ring_write(ring, 0);
4198         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4199                                 adev->gds.gds_size);
4200
4201         amdgpu_ring_commit(ring);
4202
4203         for (i = 0; i < adev->usec_timeout; i++) {
4204                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4205                         break;
4206                 udelay(1);
4207         }
4208
4209         if (i >= adev->usec_timeout)
4210                 r = -ETIMEDOUT;
4211
4212         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4213
4214         return r;
4215 }
4216
4217 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4218 {
4219         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4220         struct amdgpu_ib ib;
4221         struct dma_fence *f = NULL;
4222         int r, i, j, k;
4223         unsigned total_size, vgpr_offset, sgpr_offset;
4224         u64 gpu_addr;
4225
4226         /* only support when RAS is enabled */
4227         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4228                 return 0;
4229
4230         /* bail if the compute ring is not ready */
4231         if (!ring->sched.ready)
4232                 return 0;
4233
4234         total_size =
4235                 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4236         total_size +=
4237                 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4238         total_size = ALIGN(total_size, 256);
4239         vgpr_offset = total_size;
4240         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4241         sgpr_offset = total_size;
4242         total_size += sizeof(sgpr_init_compute_shader);
4243
4244         /* allocate an indirect buffer to put the commands in */
4245         memset(&ib, 0, sizeof(ib));
4246         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4247         if (r) {
4248                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4249                 return r;
4250         }
4251
4252         /* load the compute shaders */
4253         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4254                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4255
4256         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4257                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4258
4259         /* init the ib length to 0 */
4260         ib.length_dw = 0;
4261
4262         /* VGPR */
4263         /* write the register state for the compute dispatch */
4264         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4265                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4266                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4267                                                                 - PACKET3_SET_SH_REG_START;
4268                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4269         }
4270         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4271         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4272         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4273         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4274                                                         - PACKET3_SET_SH_REG_START;
4275         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4276         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4277
4278         /* write dispatch packet */
4279         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4280         ib.ptr[ib.length_dw++] = 128; /* x */
4281         ib.ptr[ib.length_dw++] = 1; /* y */
4282         ib.ptr[ib.length_dw++] = 1; /* z */
4283         ib.ptr[ib.length_dw++] =
4284                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4285
4286         /* write CS partial flush packet */
4287         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4288         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4289
4290         /* SGPR */
4291         /* write the register state for the compute dispatch */
4292         for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4293                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4294                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4295                                                                 - PACKET3_SET_SH_REG_START;
4296                 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4297         }
4298         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4299         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4300         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4301         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4302                                                         - PACKET3_SET_SH_REG_START;
4303         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4304         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4305
4306         /* write dispatch packet */
4307         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4308         ib.ptr[ib.length_dw++] = 128; /* x */
4309         ib.ptr[ib.length_dw++] = 1; /* y */
4310         ib.ptr[ib.length_dw++] = 1; /* z */
4311         ib.ptr[ib.length_dw++] =
4312                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4313
4314         /* write CS partial flush packet */
4315         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4316         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4317
4318         /* shedule the ib on the ring */
4319         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4320         if (r) {
4321                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4322                 goto fail;
4323         }
4324
4325         /* wait for the GPU to finish processing the IB */
4326         r = dma_fence_wait(f, false);
4327         if (r) {
4328                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4329                 goto fail;
4330         }
4331
4332         /* read back registers to clear the counters */
4333         mutex_lock(&adev->grbm_idx_mutex);
4334         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4335                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4336                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4337                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4338                                 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4339                         }
4340                 }
4341         }
4342         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4343         mutex_unlock(&adev->grbm_idx_mutex);
4344
4345 fail:
4346         amdgpu_ib_free(adev, &ib, NULL);
4347         dma_fence_put(f);
4348
4349         return r;
4350 }
4351
4352 static int gfx_v9_0_early_init(void *handle)
4353 {
4354         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4355
4356         if (adev->asic_type == CHIP_ARCTURUS)
4357                 adev->gfx.num_gfx_rings = 0;
4358         else
4359                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4360         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4361         gfx_v9_0_set_ring_funcs(adev);
4362         gfx_v9_0_set_irq_funcs(adev);
4363         gfx_v9_0_set_gds_init(adev);
4364         gfx_v9_0_set_rlc_funcs(adev);
4365
4366         return 0;
4367 }
4368
4369 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
4370                 struct ras_err_data *err_data,
4371                 struct amdgpu_iv_entry *entry);
4372
4373 static int gfx_v9_0_ecc_late_init(void *handle)
4374 {
4375         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4376         struct ras_common_if **ras_if = &adev->gfx.ras_if;
4377         struct ras_ih_if ih_info = {
4378                 .cb = gfx_v9_0_process_ras_data_cb,
4379         };
4380         struct ras_fs_if fs_info = {
4381                 .sysfs_name = "gfx_err_count",
4382                 .debugfs_name = "gfx_err_inject",
4383         };
4384         struct ras_common_if ras_block = {
4385                 .block = AMDGPU_RAS_BLOCK__GFX,
4386                 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
4387                 .sub_block_index = 0,
4388                 .name = "gfx",
4389         };
4390         int r;
4391
4392         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
4393                 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
4394                 return 0;
4395         }
4396
4397         r = gfx_v9_0_do_edc_gds_workarounds(adev);
4398         if (r)
4399                 return r;
4400
4401         /* requires IBs so do in late init after IB pool is initialized */
4402         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4403         if (r)
4404                 return r;
4405
4406         /* handle resume path. */
4407         if (*ras_if) {
4408                 /* resend ras TA enable cmd during resume.
4409                  * prepare to handle failure.
4410                  */
4411                 ih_info.head = **ras_if;
4412                 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4413                 if (r) {
4414                         if (r == -EAGAIN) {
4415                                 /* request a gpu reset. will run again. */
4416                                 amdgpu_ras_request_reset_on_boot(adev,
4417                                                 AMDGPU_RAS_BLOCK__GFX);
4418                                 return 0;
4419                         }
4420                         /* fail to enable ras, cleanup all. */
4421                         goto irq;
4422                 }
4423                 /* enable successfully. continue. */
4424                 goto resume;
4425         }
4426
4427         *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
4428         if (!*ras_if)
4429                 return -ENOMEM;
4430
4431         **ras_if = ras_block;
4432
4433         r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4434         if (r) {
4435                 if (r == -EAGAIN) {
4436                         amdgpu_ras_request_reset_on_boot(adev,
4437                                         AMDGPU_RAS_BLOCK__GFX);
4438                         r = 0;
4439                 }
4440                 goto feature;
4441         }
4442
4443         ih_info.head = **ras_if;
4444         fs_info.head = **ras_if;
4445
4446         r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
4447         if (r)
4448                 goto interrupt;
4449
4450         amdgpu_ras_debugfs_create(adev, &fs_info);
4451
4452         r = amdgpu_ras_sysfs_create(adev, &fs_info);
4453         if (r)
4454                 goto sysfs;
4455 resume:
4456         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
4457         if (r)
4458                 goto irq;
4459
4460         return 0;
4461 irq:
4462         amdgpu_ras_sysfs_remove(adev, *ras_if);
4463 sysfs:
4464         amdgpu_ras_debugfs_remove(adev, *ras_if);
4465         amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
4466 interrupt:
4467         amdgpu_ras_feature_enable(adev, *ras_if, 0);
4468 feature:
4469         kfree(*ras_if);
4470         *ras_if = NULL;
4471         return r;
4472 }
4473
4474 static int gfx_v9_0_late_init(void *handle)
4475 {
4476         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4477         int r;
4478
4479         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4480         if (r)
4481                 return r;
4482
4483         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4484         if (r)
4485                 return r;
4486
4487         r = gfx_v9_0_ecc_late_init(handle);
4488         if (r)
4489                 return r;
4490
4491         return 0;
4492 }
4493
4494 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4495 {
4496         uint32_t rlc_setting;
4497
4498         /* if RLC is not enabled, do nothing */
4499         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4500         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4501                 return false;
4502
4503         return true;
4504 }
4505
4506 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4507 {
4508         uint32_t data;
4509         unsigned i;
4510
4511         data = RLC_SAFE_MODE__CMD_MASK;
4512         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4513         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4514
4515         /* wait for RLC_SAFE_MODE */
4516         for (i = 0; i < adev->usec_timeout; i++) {
4517                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4518                         break;
4519                 udelay(1);
4520         }
4521 }
4522
4523 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4524 {
4525         uint32_t data;
4526
4527         data = RLC_SAFE_MODE__CMD_MASK;
4528         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4529 }
4530
4531 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4532                                                 bool enable)
4533 {
4534         amdgpu_gfx_rlc_enter_safe_mode(adev);
4535
4536         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4537                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4538                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4539                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4540         } else {
4541                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4542                 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4543         }
4544
4545         amdgpu_gfx_rlc_exit_safe_mode(adev);
4546 }
4547
4548 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4549                                                 bool enable)
4550 {
4551         /* TODO: double check if we need to perform under safe mode */
4552         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4553
4554         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4555                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4556         else
4557                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4558
4559         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4560                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4561         else
4562                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4563
4564         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4565 }
4566
4567 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4568                                                       bool enable)
4569 {
4570         uint32_t data, def;
4571
4572         amdgpu_gfx_rlc_enter_safe_mode(adev);
4573
4574         /* It is disabled by HW by default */
4575         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4576                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4577                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4578
4579                 if (adev->asic_type != CHIP_VEGA12)
4580                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4581
4582                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4583                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4584                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4585
4586                 /* only for Vega10 & Raven1 */
4587                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4588
4589                 if (def != data)
4590                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4591
4592                 /* MGLS is a global flag to control all MGLS in GFX */
4593                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4594                         /* 2 - RLC memory Light sleep */
4595                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4596                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4597                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4598                                 if (def != data)
4599                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4600                         }
4601                         /* 3 - CP memory Light sleep */
4602                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4603                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4604                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4605                                 if (def != data)
4606                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4607                         }
4608                 }
4609         } else {
4610                 /* 1 - MGCG_OVERRIDE */
4611                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4612
4613                 if (adev->asic_type != CHIP_VEGA12)
4614                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4615
4616                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4617                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4618                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4619                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4620
4621                 if (def != data)
4622                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4623
4624                 /* 2 - disable MGLS in RLC */
4625                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4626                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4627                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4628                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4629                 }
4630
4631                 /* 3 - disable MGLS in CP */
4632                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4633                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4634                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4635                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4636                 }
4637         }
4638
4639         amdgpu_gfx_rlc_exit_safe_mode(adev);
4640 }
4641
4642 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4643                                            bool enable)
4644 {
4645         uint32_t data, def;
4646
4647         if (adev->asic_type == CHIP_ARCTURUS)
4648                 return;
4649
4650         amdgpu_gfx_rlc_enter_safe_mode(adev);
4651
4652         /* Enable 3D CGCG/CGLS */
4653         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4654                 /* write cmd to clear cgcg/cgls ov */
4655                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4656                 /* unset CGCG override */
4657                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4658                 /* update CGCG and CGLS override bits */
4659                 if (def != data)
4660                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4661
4662                 /* enable 3Dcgcg FSM(0x0000363f) */
4663                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4664
4665                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4666                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4667                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4668                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4669                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4670                 if (def != data)
4671                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4672
4673                 /* set IDLE_POLL_COUNT(0x00900100) */
4674                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4675                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4676                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4677                 if (def != data)
4678                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4679         } else {
4680                 /* Disable CGCG/CGLS */
4681                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4682                 /* disable cgcg, cgls should be disabled */
4683                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4684                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4685                 /* disable cgcg and cgls in FSM */
4686                 if (def != data)
4687                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4688         }
4689
4690         amdgpu_gfx_rlc_exit_safe_mode(adev);
4691 }
4692
4693 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4694                                                       bool enable)
4695 {
4696         uint32_t def, data;
4697
4698         amdgpu_gfx_rlc_enter_safe_mode(adev);
4699
4700         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4701                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4702                 /* unset CGCG override */
4703                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4704                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4705                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4706                 else
4707                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4708                 /* update CGCG and CGLS override bits */
4709                 if (def != data)
4710                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4711
4712                 /* enable cgcg FSM(0x0000363F) */
4713                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4714
4715                 if (adev->asic_type == CHIP_ARCTURUS)
4716                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4717                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4718                 else
4719                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4720                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4721                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4722                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4723                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4724                 if (def != data)
4725                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4726
4727                 /* set IDLE_POLL_COUNT(0x00900100) */
4728                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4729                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4730                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4731                 if (def != data)
4732                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4733         } else {
4734                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4735                 /* reset CGCG/CGLS bits */
4736                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4737                 /* disable cgcg and cgls in FSM */
4738                 if (def != data)
4739                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4740         }
4741
4742         amdgpu_gfx_rlc_exit_safe_mode(adev);
4743 }
4744
4745 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4746                                             bool enable)
4747 {
4748         if (enable) {
4749                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4750                  * ===  MGCG + MGLS ===
4751                  */
4752                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4753                 /* ===  CGCG /CGLS for GFX 3D Only === */
4754                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4755                 /* ===  CGCG + CGLS === */
4756                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4757         } else {
4758                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4759                  * ===  CGCG + CGLS ===
4760                  */
4761                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4762                 /* ===  CGCG /CGLS for GFX 3D Only === */
4763                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4764                 /* ===  MGCG + MGLS === */
4765                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4766         }
4767         return 0;
4768 }
4769
4770 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4771         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4772         .set_safe_mode = gfx_v9_0_set_safe_mode,
4773         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4774         .init = gfx_v9_0_rlc_init,
4775         .get_csb_size = gfx_v9_0_get_csb_size,
4776         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4777         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4778         .resume = gfx_v9_0_rlc_resume,
4779         .stop = gfx_v9_0_rlc_stop,
4780         .reset = gfx_v9_0_rlc_reset,
4781         .start = gfx_v9_0_rlc_start
4782 };
4783
4784 static int gfx_v9_0_set_powergating_state(void *handle,
4785                                           enum amd_powergating_state state)
4786 {
4787         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4788         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4789
4790         switch (adev->asic_type) {
4791         case CHIP_RAVEN:
4792                 if (!enable) {
4793                         amdgpu_gfx_off_ctrl(adev, false);
4794                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4795                 }
4796                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4797                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4798                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4799                 } else {
4800                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4801                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4802                 }
4803
4804                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4805                         gfx_v9_0_enable_cp_power_gating(adev, true);
4806                 else
4807                         gfx_v9_0_enable_cp_power_gating(adev, false);
4808
4809                 /* update gfx cgpg state */
4810                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4811
4812                 /* update mgcg state */
4813                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4814
4815                 if (enable)
4816                         amdgpu_gfx_off_ctrl(adev, true);
4817                 break;
4818         case CHIP_VEGA12:
4819                 if (!enable) {
4820                         amdgpu_gfx_off_ctrl(adev, false);
4821                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4822                 } else {
4823                         amdgpu_gfx_off_ctrl(adev, true);
4824                 }
4825                 break;
4826         default:
4827                 break;
4828         }
4829
4830         return 0;
4831 }
4832
4833 static int gfx_v9_0_set_clockgating_state(void *handle,
4834                                           enum amd_clockgating_state state)
4835 {
4836         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4837
4838         if (amdgpu_sriov_vf(adev))
4839                 return 0;
4840
4841         switch (adev->asic_type) {
4842         case CHIP_VEGA10:
4843         case CHIP_VEGA12:
4844         case CHIP_VEGA20:
4845         case CHIP_RAVEN:
4846         case CHIP_ARCTURUS:
4847                 gfx_v9_0_update_gfx_clock_gating(adev,
4848                                                  state == AMD_CG_STATE_GATE ? true : false);
4849                 break;
4850         default:
4851                 break;
4852         }
4853         return 0;
4854 }
4855
4856 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4857 {
4858         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4859         int data;
4860
4861         if (amdgpu_sriov_vf(adev))
4862                 *flags = 0;
4863
4864         /* AMD_CG_SUPPORT_GFX_MGCG */
4865         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4866         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4867                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4868
4869         /* AMD_CG_SUPPORT_GFX_CGCG */
4870         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4871         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4872                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4873
4874         /* AMD_CG_SUPPORT_GFX_CGLS */
4875         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4876                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4877
4878         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4879         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4880         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4881                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4882
4883         /* AMD_CG_SUPPORT_GFX_CP_LS */
4884         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4885         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4886                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4887
4888         if (adev->asic_type != CHIP_ARCTURUS) {
4889                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4890                 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4891                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4892                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4893
4894                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4895                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4896                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4897         }
4898 }
4899
4900 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4901 {
4902         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4903 }
4904
4905 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4906 {
4907         struct amdgpu_device *adev = ring->adev;
4908         u64 wptr;
4909
4910         /* XXX check if swapping is necessary on BE */
4911         if (ring->use_doorbell) {
4912                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4913         } else {
4914                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4915                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4916         }
4917
4918         return wptr;
4919 }
4920
4921 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4922 {
4923         struct amdgpu_device *adev = ring->adev;
4924
4925         if (ring->use_doorbell) {
4926                 /* XXX check if swapping is necessary on BE */
4927                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4928                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4929         } else {
4930                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4931                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4932         }
4933 }
4934
4935 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4936 {
4937         struct amdgpu_device *adev = ring->adev;
4938         u32 ref_and_mask, reg_mem_engine;
4939         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4940
4941         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4942                 switch (ring->me) {
4943                 case 1:
4944                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4945                         break;
4946                 case 2:
4947                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4948                         break;
4949                 default:
4950                         return;
4951                 }
4952                 reg_mem_engine = 0;
4953         } else {
4954                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4955                 reg_mem_engine = 1; /* pfp */
4956         }
4957
4958         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4959                               adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4960                               adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4961                               ref_and_mask, ref_and_mask, 0x20);
4962 }
4963
4964 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4965                                         struct amdgpu_job *job,
4966                                         struct amdgpu_ib *ib,
4967                                         uint32_t flags)
4968 {
4969         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4970         u32 header, control = 0;
4971
4972         if (ib->flags & AMDGPU_IB_FLAG_CE)
4973                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4974         else
4975                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4976
4977         control |= ib->length_dw | (vmid << 24);
4978
4979         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4980                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4981
4982                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4983                         gfx_v9_0_ring_emit_de_meta(ring);
4984         }
4985
4986         amdgpu_ring_write(ring, header);
4987         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4988         amdgpu_ring_write(ring,
4989 #ifdef __BIG_ENDIAN
4990                 (2 << 0) |
4991 #endif
4992                 lower_32_bits(ib->gpu_addr));
4993         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4994         amdgpu_ring_write(ring, control);
4995 }
4996
4997 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4998                                           struct amdgpu_job *job,
4999                                           struct amdgpu_ib *ib,
5000                                           uint32_t flags)
5001 {
5002         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5003         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5004
5005         /* Currently, there is a high possibility to get wave ID mismatch
5006          * between ME and GDS, leading to a hw deadlock, because ME generates
5007          * different wave IDs than the GDS expects. This situation happens
5008          * randomly when at least 5 compute pipes use GDS ordered append.
5009          * The wave IDs generated by ME are also wrong after suspend/resume.
5010          * Those are probably bugs somewhere else in the kernel driver.
5011          *
5012          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5013          * GDS to 0 for this ring (me/pipe).
5014          */
5015         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5016                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5017                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5018                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5019         }
5020
5021         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5022         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5023         amdgpu_ring_write(ring,
5024 #ifdef __BIG_ENDIAN
5025                                 (2 << 0) |
5026 #endif
5027                                 lower_32_bits(ib->gpu_addr));
5028         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5029         amdgpu_ring_write(ring, control);
5030 }
5031
5032 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5033                                      u64 seq, unsigned flags)
5034 {
5035         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5036         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5037         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5038
5039         /* RELEASE_MEM - flush caches, send int */
5040         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5041         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5042                                                EOP_TC_NC_ACTION_EN) :
5043                                               (EOP_TCL1_ACTION_EN |
5044                                                EOP_TC_ACTION_EN |
5045                                                EOP_TC_WB_ACTION_EN |
5046                                                EOP_TC_MD_ACTION_EN)) |
5047                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5048                                  EVENT_INDEX(5)));
5049         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5050
5051         /*
5052          * the address should be Qword aligned if 64bit write, Dword
5053          * aligned if only send 32bit data low (discard data high)
5054          */
5055         if (write64bit)
5056                 BUG_ON(addr & 0x7);
5057         else
5058                 BUG_ON(addr & 0x3);
5059         amdgpu_ring_write(ring, lower_32_bits(addr));
5060         amdgpu_ring_write(ring, upper_32_bits(addr));
5061         amdgpu_ring_write(ring, lower_32_bits(seq));
5062         amdgpu_ring_write(ring, upper_32_bits(seq));
5063         amdgpu_ring_write(ring, 0);
5064 }
5065
5066 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5067 {
5068         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5069         uint32_t seq = ring->fence_drv.sync_seq;
5070         uint64_t addr = ring->fence_drv.gpu_addr;
5071
5072         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5073                               lower_32_bits(addr), upper_32_bits(addr),
5074                               seq, 0xffffffff, 4);
5075 }
5076
5077 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5078                                         unsigned vmid, uint64_t pd_addr)
5079 {
5080         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5081
5082         /* compute doesn't have PFP */
5083         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5084                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5085                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5086                 amdgpu_ring_write(ring, 0x0);
5087         }
5088 }
5089
5090 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5091 {
5092         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5093 }
5094
5095 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5096 {
5097         u64 wptr;
5098
5099         /* XXX check if swapping is necessary on BE */
5100         if (ring->use_doorbell)
5101                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5102         else
5103                 BUG();
5104         return wptr;
5105 }
5106
5107 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5108                                            bool acquire)
5109 {
5110         struct amdgpu_device *adev = ring->adev;
5111         int pipe_num, tmp, reg;
5112         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5113
5114         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5115
5116         /* first me only has 2 entries, GFX and HP3D */
5117         if (ring->me > 0)
5118                 pipe_num -= 2;
5119
5120         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5121         tmp = RREG32(reg);
5122         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5123         WREG32(reg, tmp);
5124 }
5125
5126 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5127                                             struct amdgpu_ring *ring,
5128                                             bool acquire)
5129 {
5130         int i, pipe;
5131         bool reserve;
5132         struct amdgpu_ring *iring;
5133
5134         mutex_lock(&adev->gfx.pipe_reserve_mutex);
5135         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5136         if (acquire)
5137                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5138         else
5139                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5140
5141         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5142                 /* Clear all reservations - everyone reacquires all resources */
5143                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5144                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5145                                                        true);
5146
5147                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5148                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5149                                                        true);
5150         } else {
5151                 /* Lower all pipes without a current reservation */
5152                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5153                         iring = &adev->gfx.gfx_ring[i];
5154                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5155                                                            iring->me,
5156                                                            iring->pipe,
5157                                                            0);
5158                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5159                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5160                 }
5161
5162                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5163                         iring = &adev->gfx.compute_ring[i];
5164                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5165                                                            iring->me,
5166                                                            iring->pipe,
5167                                                            0);
5168                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5169                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5170                 }
5171         }
5172
5173         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5174 }
5175
5176 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5177                                       struct amdgpu_ring *ring,
5178                                       bool acquire)
5179 {
5180         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5181         uint32_t queue_priority = acquire ? 0xf : 0x0;
5182
5183         mutex_lock(&adev->srbm_mutex);
5184         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5185
5186         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5187         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5188
5189         soc15_grbm_select(adev, 0, 0, 0, 0);
5190         mutex_unlock(&adev->srbm_mutex);
5191 }
5192
5193 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5194                                                enum drm_sched_priority priority)
5195 {
5196         struct amdgpu_device *adev = ring->adev;
5197         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5198
5199         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5200                 return;
5201
5202         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5203         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5204 }
5205
5206 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5207 {
5208         struct amdgpu_device *adev = ring->adev;
5209
5210         /* XXX check if swapping is necessary on BE */
5211         if (ring->use_doorbell) {
5212                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5213                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5214         } else{
5215                 BUG(); /* only DOORBELL method supported on gfx9 now */
5216         }
5217 }
5218
5219 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5220                                          u64 seq, unsigned int flags)
5221 {
5222         struct amdgpu_device *adev = ring->adev;
5223
5224         /* we only allocate 32bit for each seq wb address */
5225         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5226
5227         /* write fence seq to the "addr" */
5228         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5229         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5230                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5231         amdgpu_ring_write(ring, lower_32_bits(addr));
5232         amdgpu_ring_write(ring, upper_32_bits(addr));
5233         amdgpu_ring_write(ring, lower_32_bits(seq));
5234
5235         if (flags & AMDGPU_FENCE_FLAG_INT) {
5236                 /* set register to trigger INT */
5237                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5238                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5239                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5240                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5241                 amdgpu_ring_write(ring, 0);
5242                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5243         }
5244 }
5245
5246 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5247 {
5248         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5249         amdgpu_ring_write(ring, 0);
5250 }
5251
5252 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5253 {
5254         struct v9_ce_ib_state ce_payload = {0};
5255         uint64_t csa_addr;
5256         int cnt;
5257
5258         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5259         csa_addr = amdgpu_csa_vaddr(ring->adev);
5260
5261         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5262         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5263                                  WRITE_DATA_DST_SEL(8) |
5264                                  WR_CONFIRM) |
5265                                  WRITE_DATA_CACHE_POLICY(0));
5266         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5267         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5268         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5269 }
5270
5271 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5272 {
5273         struct v9_de_ib_state de_payload = {0};
5274         uint64_t csa_addr, gds_addr;
5275         int cnt;
5276
5277         csa_addr = amdgpu_csa_vaddr(ring->adev);
5278         gds_addr = csa_addr + 4096;
5279         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5280         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5281
5282         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5283         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5284         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5285                                  WRITE_DATA_DST_SEL(8) |
5286                                  WR_CONFIRM) |
5287                                  WRITE_DATA_CACHE_POLICY(0));
5288         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5289         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5290         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5291 }
5292
5293 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5294 {
5295         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5296         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5297 }
5298
5299 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5300 {
5301         uint32_t dw2 = 0;
5302
5303         if (amdgpu_sriov_vf(ring->adev))
5304                 gfx_v9_0_ring_emit_ce_meta(ring);
5305
5306         gfx_v9_0_ring_emit_tmz(ring, true);
5307
5308         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5309         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5310                 /* set load_global_config & load_global_uconfig */
5311                 dw2 |= 0x8001;
5312                 /* set load_cs_sh_regs */
5313                 dw2 |= 0x01000000;
5314                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5315                 dw2 |= 0x10002;
5316
5317                 /* set load_ce_ram if preamble presented */
5318                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5319                         dw2 |= 0x10000000;
5320         } else {
5321                 /* still load_ce_ram if this is the first time preamble presented
5322                  * although there is no context switch happens.
5323                  */
5324                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5325                         dw2 |= 0x10000000;
5326         }
5327
5328         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5329         amdgpu_ring_write(ring, dw2);
5330         amdgpu_ring_write(ring, 0);
5331 }
5332
5333 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5334 {
5335         unsigned ret;
5336         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5337         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5338         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5339         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5340         ret = ring->wptr & ring->buf_mask;
5341         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5342         return ret;
5343 }
5344
5345 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5346 {
5347         unsigned cur;
5348         BUG_ON(offset > ring->buf_mask);
5349         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5350
5351         cur = (ring->wptr & ring->buf_mask) - 1;
5352         if (likely(cur > offset))
5353                 ring->ring[offset] = cur - offset;
5354         else
5355                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5356 }
5357
5358 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5359 {
5360         struct amdgpu_device *adev = ring->adev;
5361
5362         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5363         amdgpu_ring_write(ring, 0 |     /* src: register*/
5364                                 (5 << 8) |      /* dst: memory */
5365                                 (1 << 20));     /* write confirm */
5366         amdgpu_ring_write(ring, reg);
5367         amdgpu_ring_write(ring, 0);
5368         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5369                                 adev->virt.reg_val_offs * 4));
5370         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5371                                 adev->virt.reg_val_offs * 4));
5372 }
5373
5374 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5375                                     uint32_t val)
5376 {
5377         uint32_t cmd = 0;
5378
5379         switch (ring->funcs->type) {
5380         case AMDGPU_RING_TYPE_GFX:
5381                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5382                 break;
5383         case AMDGPU_RING_TYPE_KIQ:
5384                 cmd = (1 << 16); /* no inc addr */
5385                 break;
5386         default:
5387                 cmd = WR_CONFIRM;
5388                 break;
5389         }
5390         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5391         amdgpu_ring_write(ring, cmd);
5392         amdgpu_ring_write(ring, reg);
5393         amdgpu_ring_write(ring, 0);
5394         amdgpu_ring_write(ring, val);
5395 }
5396
5397 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5398                                         uint32_t val, uint32_t mask)
5399 {
5400         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5401 }
5402
5403 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5404                                                   uint32_t reg0, uint32_t reg1,
5405                                                   uint32_t ref, uint32_t mask)
5406 {
5407         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5408         struct amdgpu_device *adev = ring->adev;
5409         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5410                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5411
5412         if (fw_version_ok)
5413                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5414                                       ref, mask, 0x20);
5415         else
5416                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5417                                                            ref, mask);
5418 }
5419
5420 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5421 {
5422         struct amdgpu_device *adev = ring->adev;
5423         uint32_t value = 0;
5424
5425         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5426         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5427         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5428         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5429         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5430 }
5431
5432 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5433                                                  enum amdgpu_interrupt_state state)
5434 {
5435         switch (state) {
5436         case AMDGPU_IRQ_STATE_DISABLE:
5437         case AMDGPU_IRQ_STATE_ENABLE:
5438                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5439                                TIME_STAMP_INT_ENABLE,
5440                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5441                 break;
5442         default:
5443                 break;
5444         }
5445 }
5446
5447 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5448                                                      int me, int pipe,
5449                                                      enum amdgpu_interrupt_state state)
5450 {
5451         u32 mec_int_cntl, mec_int_cntl_reg;
5452
5453         /*
5454          * amdgpu controls only the first MEC. That's why this function only
5455          * handles the setting of interrupts for this specific MEC. All other
5456          * pipes' interrupts are set by amdkfd.
5457          */
5458
5459         if (me == 1) {
5460                 switch (pipe) {
5461                 case 0:
5462                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5463                         break;
5464                 case 1:
5465                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5466                         break;
5467                 case 2:
5468                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5469                         break;
5470                 case 3:
5471                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5472                         break;
5473                 default:
5474                         DRM_DEBUG("invalid pipe %d\n", pipe);
5475                         return;
5476                 }
5477         } else {
5478                 DRM_DEBUG("invalid me %d\n", me);
5479                 return;
5480         }
5481
5482         switch (state) {
5483         case AMDGPU_IRQ_STATE_DISABLE:
5484                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5485                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5486                                              TIME_STAMP_INT_ENABLE, 0);
5487                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5488                 break;
5489         case AMDGPU_IRQ_STATE_ENABLE:
5490                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5491                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5492                                              TIME_STAMP_INT_ENABLE, 1);
5493                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5494                 break;
5495         default:
5496                 break;
5497         }
5498 }
5499
5500 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5501                                              struct amdgpu_irq_src *source,
5502                                              unsigned type,
5503                                              enum amdgpu_interrupt_state state)
5504 {
5505         switch (state) {
5506         case AMDGPU_IRQ_STATE_DISABLE:
5507         case AMDGPU_IRQ_STATE_ENABLE:
5508                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5509                                PRIV_REG_INT_ENABLE,
5510                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5511                 break;
5512         default:
5513                 break;
5514         }
5515
5516         return 0;
5517 }
5518
5519 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5520                                               struct amdgpu_irq_src *source,
5521                                               unsigned type,
5522                                               enum amdgpu_interrupt_state state)
5523 {
5524         switch (state) {
5525         case AMDGPU_IRQ_STATE_DISABLE:
5526         case AMDGPU_IRQ_STATE_ENABLE:
5527                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5528                                PRIV_INSTR_INT_ENABLE,
5529                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5530         default:
5531                 break;
5532         }
5533
5534         return 0;
5535 }
5536
5537 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5538         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5539                         CP_ECC_ERROR_INT_ENABLE, 1)
5540
5541 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5542         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5543                         CP_ECC_ERROR_INT_ENABLE, 0)
5544
5545 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5546                                               struct amdgpu_irq_src *source,
5547                                               unsigned type,
5548                                               enum amdgpu_interrupt_state state)
5549 {
5550         switch (state) {
5551         case AMDGPU_IRQ_STATE_DISABLE:
5552                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5553                                 CP_ECC_ERROR_INT_ENABLE, 0);
5554                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5555                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5556                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5557                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5558                 break;
5559
5560         case AMDGPU_IRQ_STATE_ENABLE:
5561                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5562                                 CP_ECC_ERROR_INT_ENABLE, 1);
5563                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5564                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5565                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5566                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5567                 break;
5568         default:
5569                 break;
5570         }
5571
5572         return 0;
5573 }
5574
5575
5576 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5577                                             struct amdgpu_irq_src *src,
5578                                             unsigned type,
5579                                             enum amdgpu_interrupt_state state)
5580 {
5581         switch (type) {
5582         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5583                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5584                 break;
5585         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5586                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5587                 break;
5588         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5589                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5590                 break;
5591         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5592                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5593                 break;
5594         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5595                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5596                 break;
5597         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5598                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5599                 break;
5600         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5601                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5602                 break;
5603         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5604                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5605                 break;
5606         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5607                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5608                 break;
5609         default:
5610                 break;
5611         }
5612         return 0;
5613 }
5614
5615 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5616                             struct amdgpu_irq_src *source,
5617                             struct amdgpu_iv_entry *entry)
5618 {
5619         int i;
5620         u8 me_id, pipe_id, queue_id;
5621         struct amdgpu_ring *ring;
5622
5623         DRM_DEBUG("IH: CP EOP\n");
5624         me_id = (entry->ring_id & 0x0c) >> 2;
5625         pipe_id = (entry->ring_id & 0x03) >> 0;
5626         queue_id = (entry->ring_id & 0x70) >> 4;
5627
5628         switch (me_id) {
5629         case 0:
5630                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5631                 break;
5632         case 1:
5633         case 2:
5634                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5635                         ring = &adev->gfx.compute_ring[i];
5636                         /* Per-queue interrupt is supported for MEC starting from VI.
5637                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5638                           */
5639                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5640                                 amdgpu_fence_process(ring);
5641                 }
5642                 break;
5643         }
5644         return 0;
5645 }
5646
5647 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5648                            struct amdgpu_iv_entry *entry)
5649 {
5650         u8 me_id, pipe_id, queue_id;
5651         struct amdgpu_ring *ring;
5652         int i;
5653
5654         me_id = (entry->ring_id & 0x0c) >> 2;
5655         pipe_id = (entry->ring_id & 0x03) >> 0;
5656         queue_id = (entry->ring_id & 0x70) >> 4;
5657
5658         switch (me_id) {
5659         case 0:
5660                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5661                 break;
5662         case 1:
5663         case 2:
5664                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5665                         ring = &adev->gfx.compute_ring[i];
5666                         if (ring->me == me_id && ring->pipe == pipe_id &&
5667                             ring->queue == queue_id)
5668                                 drm_sched_fault(&ring->sched);
5669                 }
5670                 break;
5671         }
5672 }
5673
5674 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5675                                  struct amdgpu_irq_src *source,
5676                                  struct amdgpu_iv_entry *entry)
5677 {
5678         DRM_ERROR("Illegal register access in command stream\n");
5679         gfx_v9_0_fault(adev, entry);
5680         return 0;
5681 }
5682
5683 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5684                                   struct amdgpu_irq_src *source,
5685                                   struct amdgpu_iv_entry *entry)
5686 {
5687         DRM_ERROR("Illegal instruction in command stream\n");
5688         gfx_v9_0_fault(adev, entry);
5689         return 0;
5690 }
5691
5692 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5693                 struct ras_err_data *err_data,
5694                 struct amdgpu_iv_entry *entry)
5695 {
5696         /* TODO ue will trigger an interrupt. */
5697         kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5698         if (adev->gfx.funcs->query_ras_error_count)
5699                 adev->gfx.funcs->query_ras_error_count(adev, err_data);
5700         amdgpu_ras_reset_gpu(adev, 0);
5701         return AMDGPU_RAS_SUCCESS;
5702 }
5703
5704 static const struct {
5705         const char *name;
5706         uint32_t ip;
5707         uint32_t inst;
5708         uint32_t seg;
5709         uint32_t reg_offset;
5710         uint32_t per_se_instance;
5711         int32_t num_instance;
5712         uint32_t sec_count_mask;
5713         uint32_t ded_count_mask;
5714 } gfx_ras_edc_regs[] = {
5715         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
5716           REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5717           REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
5718         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
5719           REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
5720           REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
5721         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5722           REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
5723         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5724           REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
5725         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
5726           REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
5727           REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
5728         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5729           REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
5730         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5731           REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5732           REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
5733         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
5734           REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
5735           REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
5736         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
5737           REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
5738         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
5739           REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
5740         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
5741           REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
5742         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5743           REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
5744           REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
5745         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5746           REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
5747         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5748           0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5749           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
5750         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5751           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5752           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5753           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
5754         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5755           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5756           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
5757         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5758           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5759           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5760           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
5761         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5762           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5763           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5764           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
5765         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5766           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5767           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5768           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
5769         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5770           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5771           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5772           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
5773         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
5774           REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
5775         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5776           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5777           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
5778         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5779           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
5780         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5781           REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
5782         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5783           REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
5784         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5785           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
5786         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5787           REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
5788         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5789           REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
5790         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5791           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5792           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
5793         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5794           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5795           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
5796         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5797           REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5798           REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
5799         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5800           REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5801           REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
5802         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5803           REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5804           REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
5805         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5806           REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
5807         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5808           REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
5809         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5810           REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
5811         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5812           REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
5813         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5814           REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
5815         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5816           REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
5817         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5818           REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
5819         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5820           REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
5821         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5822           16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
5823         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5824           0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5825           0 },
5826         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5827           16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
5828         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5829           0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5830           0 },
5831         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5832           16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
5833         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
5834           REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
5835         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5836           REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5837           REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
5838         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5839           REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5840           REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
5841         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5842           REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
5843         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5844           REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
5845         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5846           REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
5847         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5848           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5849           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
5850         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5851           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5852           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
5853         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5854           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5855           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
5856         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5857           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5858           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
5859         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5860           REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
5861         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5862           REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5863           REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
5864         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5865           REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5866           REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
5867         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5868           REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
5869           REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
5870         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5871           REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5872           REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
5873         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5874           REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5875           REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
5876         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5877           REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5878           REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
5879         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5880           REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5881           REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
5882         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5883           1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5884           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
5885         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5886           6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5887           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
5888         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5889           1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5890           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
5891         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5892           6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5893           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
5894         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5895           1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5896           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
5897         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5898           6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5899           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
5900         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5901           6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5902           REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
5903         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5904           6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5905           REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
5906         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5907           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5908           REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
5909         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5910           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5911           REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
5912         { "SQC_INST_BANKA_UTCL1_MISS_FIFO",
5913           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5914           REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5915           0 },
5916         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5917           6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
5918         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5919           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
5920         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5921           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
5922         { "SQC_DATA_BANKA_DIRTY_BIT_RAM",
5923           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5924           REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
5925         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5926           REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5927           REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
5928         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5929           6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5930           REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
5931         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5932           6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5933           REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
5934         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5935           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5936           REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
5937         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5938           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5939           REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
5940         { "SQC_INST_BANKB_UTCL1_MISS_FIFO",
5941           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5942           REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5943           0 },
5944         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5945           6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
5946         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5947           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
5948         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5949           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
5950         { "SQC_DATA_BANKB_DIRTY_BIT_RAM",
5951           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5952           REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
5953         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5954           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5955           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
5956         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5957           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5958           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
5959         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5960           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5961           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
5962         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5963           REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5964           REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
5965         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5966           REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5967           REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
5968         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5969           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
5970         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5971           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
5972         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5973           REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
5974         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5975           REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
5976         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5977           REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
5978         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5979           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
5980           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
5981         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5982           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
5983           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
5984         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5985           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
5986           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
5987         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5988           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
5989         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5990           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
5991         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5992           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
5993         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5994           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
5995         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5996           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
5997         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5998           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
5999 };
6000
6001 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6002                                      void *inject_if)
6003 {
6004         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6005         int ret;
6006         struct ta_ras_trigger_error_input block_info = { 0 };
6007
6008         if (adev->asic_type != CHIP_VEGA20)
6009                 return -EINVAL;
6010
6011         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6012                 return -EINVAL;
6013
6014         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6015                 return -EPERM;
6016
6017         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6018               info->head.type)) {
6019                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6020                         ras_gfx_subblocks[info->head.sub_block_index].name,
6021                         info->head.type);
6022                 return -EPERM;
6023         }
6024
6025         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6026               info->head.type)) {
6027                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6028                         ras_gfx_subblocks[info->head.sub_block_index].name,
6029                         info->head.type);
6030                 return -EPERM;
6031         }
6032
6033         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6034         block_info.sub_block_index =
6035                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6036         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6037         block_info.address = info->address;
6038         block_info.value = info->value;
6039
6040         mutex_lock(&adev->grbm_idx_mutex);
6041         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6042         mutex_unlock(&adev->grbm_idx_mutex);
6043
6044         return ret;
6045 }
6046
6047 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6048                                           void *ras_error_status)
6049 {
6050         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6051         uint32_t sec_count, ded_count;
6052         uint32_t i;
6053         uint32_t reg_value;
6054         uint32_t se_id, instance_id;
6055
6056         if (adev->asic_type != CHIP_VEGA20)
6057                 return -EINVAL;
6058
6059         err_data->ue_count = 0;
6060         err_data->ce_count = 0;
6061
6062         mutex_lock(&adev->grbm_idx_mutex);
6063         for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
6064                 for (instance_id = 0; instance_id < 256; instance_id++) {
6065                         for (i = 0;
6066                              i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
6067                              i++) {
6068                                 if (se_id != 0 &&
6069                                     !gfx_ras_edc_regs[i].per_se_instance)
6070                                         continue;
6071                                 if (instance_id >= gfx_ras_edc_regs[i].num_instance)
6072                                         continue;
6073
6074                                 gfx_v9_0_select_se_sh(adev, se_id, 0,
6075                                                       instance_id);
6076
6077                                 reg_value = RREG32(
6078                                         adev->reg_offset[gfx_ras_edc_regs[i].ip]
6079                                                         [gfx_ras_edc_regs[i].inst]
6080                                                         [gfx_ras_edc_regs[i].seg] +
6081                                         gfx_ras_edc_regs[i].reg_offset);
6082                                 sec_count = reg_value &
6083                                             gfx_ras_edc_regs[i].sec_count_mask;
6084                                 ded_count = reg_value &
6085                                             gfx_ras_edc_regs[i].ded_count_mask;
6086                                 if (sec_count) {
6087                                         DRM_INFO(
6088                                                 "Instance[%d][%d]: SubBlock %s, SEC %d\n",
6089                                                 se_id, instance_id,
6090                                                 gfx_ras_edc_regs[i].name,
6091                                                 sec_count);
6092                                         err_data->ce_count++;
6093                                 }
6094
6095                                 if (ded_count) {
6096                                         DRM_INFO(
6097                                                 "Instance[%d][%d]: SubBlock %s, DED %d\n",
6098                                                 se_id, instance_id,
6099                                                 gfx_ras_edc_regs[i].name,
6100                                                 ded_count);
6101                                         err_data->ue_count++;
6102                                 }
6103                         }
6104                 }
6105         }
6106         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6107         mutex_unlock(&adev->grbm_idx_mutex);
6108
6109         return 0;
6110 }
6111
6112 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6113                                   struct amdgpu_irq_src *source,
6114                                   struct amdgpu_iv_entry *entry)
6115 {
6116         struct ras_common_if *ras_if = adev->gfx.ras_if;
6117         struct ras_dispatch_if ih_data = {
6118                 .entry = entry,
6119         };
6120
6121         if (!ras_if)
6122                 return 0;
6123
6124         ih_data.head = *ras_if;
6125
6126         DRM_ERROR("CP ECC ERROR IRQ\n");
6127         amdgpu_ras_interrupt_dispatch(adev, &ih_data);
6128         return 0;
6129 }
6130
6131 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6132         .name = "gfx_v9_0",
6133         .early_init = gfx_v9_0_early_init,
6134         .late_init = gfx_v9_0_late_init,
6135         .sw_init = gfx_v9_0_sw_init,
6136         .sw_fini = gfx_v9_0_sw_fini,
6137         .hw_init = gfx_v9_0_hw_init,
6138         .hw_fini = gfx_v9_0_hw_fini,
6139         .suspend = gfx_v9_0_suspend,
6140         .resume = gfx_v9_0_resume,
6141         .is_idle = gfx_v9_0_is_idle,
6142         .wait_for_idle = gfx_v9_0_wait_for_idle,
6143         .soft_reset = gfx_v9_0_soft_reset,
6144         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6145         .set_powergating_state = gfx_v9_0_set_powergating_state,
6146         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6147 };
6148
6149 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6150         .type = AMDGPU_RING_TYPE_GFX,
6151         .align_mask = 0xff,
6152         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6153         .support_64bit_ptrs = true,
6154         .vmhub = AMDGPU_GFXHUB_0,
6155         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6156         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6157         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6158         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6159                 5 +  /* COND_EXEC */
6160                 7 +  /* PIPELINE_SYNC */
6161                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6162                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6163                 2 + /* VM_FLUSH */
6164                 8 +  /* FENCE for VM_FLUSH */
6165                 20 + /* GDS switch */
6166                 4 + /* double SWITCH_BUFFER,
6167                        the first COND_EXEC jump to the place just
6168                            prior to this double SWITCH_BUFFER  */
6169                 5 + /* COND_EXEC */
6170                 7 +      /*     HDP_flush */
6171                 4 +      /*     VGT_flush */
6172                 14 + /* CE_META */
6173                 31 + /* DE_META */
6174                 3 + /* CNTX_CTRL */
6175                 5 + /* HDP_INVL */
6176                 8 + 8 + /* FENCE x2 */
6177                 2, /* SWITCH_BUFFER */
6178         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6179         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6180         .emit_fence = gfx_v9_0_ring_emit_fence,
6181         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6182         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6183         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6184         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6185         .test_ring = gfx_v9_0_ring_test_ring,
6186         .test_ib = gfx_v9_0_ring_test_ib,
6187         .insert_nop = amdgpu_ring_insert_nop,
6188         .pad_ib = amdgpu_ring_generic_pad_ib,
6189         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6190         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6191         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6192         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6193         .emit_tmz = gfx_v9_0_ring_emit_tmz,
6194         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6195         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6196         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6197         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6198 };
6199
6200 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6201         .type = AMDGPU_RING_TYPE_COMPUTE,
6202         .align_mask = 0xff,
6203         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6204         .support_64bit_ptrs = true,
6205         .vmhub = AMDGPU_GFXHUB_0,
6206         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6207         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6208         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6209         .emit_frame_size =
6210                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6211                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6212                 5 + /* hdp invalidate */
6213                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6214                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6215                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6216                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6217                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6218         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6219         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6220         .emit_fence = gfx_v9_0_ring_emit_fence,
6221         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6222         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6223         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6224         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6225         .test_ring = gfx_v9_0_ring_test_ring,
6226         .test_ib = gfx_v9_0_ring_test_ib,
6227         .insert_nop = amdgpu_ring_insert_nop,
6228         .pad_ib = amdgpu_ring_generic_pad_ib,
6229         .set_priority = gfx_v9_0_ring_set_priority_compute,
6230         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6231         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6232         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6233 };
6234
6235 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6236         .type = AMDGPU_RING_TYPE_KIQ,
6237         .align_mask = 0xff,
6238         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6239         .support_64bit_ptrs = true,
6240         .vmhub = AMDGPU_GFXHUB_0,
6241         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6242         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6243         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6244         .emit_frame_size =
6245                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6246                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6247                 5 + /* hdp invalidate */
6248                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6249                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6250                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6251                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6252                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6253         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6254         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6255         .test_ring = gfx_v9_0_ring_test_ring,
6256         .insert_nop = amdgpu_ring_insert_nop,
6257         .pad_ib = amdgpu_ring_generic_pad_ib,
6258         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6259         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6260         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6261         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6262 };
6263
6264 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6265 {
6266         int i;
6267
6268         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6269
6270         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6271                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6272
6273         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6274                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6275 }
6276
6277 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6278         .set = gfx_v9_0_set_eop_interrupt_state,
6279         .process = gfx_v9_0_eop_irq,
6280 };
6281
6282 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6283         .set = gfx_v9_0_set_priv_reg_fault_state,
6284         .process = gfx_v9_0_priv_reg_irq,
6285 };
6286
6287 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6288         .set = gfx_v9_0_set_priv_inst_fault_state,
6289         .process = gfx_v9_0_priv_inst_irq,
6290 };
6291
6292 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6293         .set = gfx_v9_0_set_cp_ecc_error_state,
6294         .process = gfx_v9_0_cp_ecc_error_irq,
6295 };
6296
6297
6298 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6299 {
6300         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6301         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6302
6303         adev->gfx.priv_reg_irq.num_types = 1;
6304         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6305
6306         adev->gfx.priv_inst_irq.num_types = 1;
6307         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6308
6309         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6310         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6311 }
6312
6313 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6314 {
6315         switch (adev->asic_type) {
6316         case CHIP_VEGA10:
6317         case CHIP_VEGA12:
6318         case CHIP_VEGA20:
6319         case CHIP_RAVEN:
6320         case CHIP_ARCTURUS:
6321                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6322                 break;
6323         default:
6324                 break;
6325         }
6326 }
6327
6328 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6329 {
6330         /* init asci gds info */
6331         switch (adev->asic_type) {
6332         case CHIP_VEGA10:
6333         case CHIP_VEGA12:
6334         case CHIP_VEGA20:
6335                 adev->gds.gds_size = 0x10000;
6336                 break;
6337         case CHIP_RAVEN:
6338         case CHIP_ARCTURUS:
6339                 adev->gds.gds_size = 0x1000;
6340                 break;
6341         default:
6342                 adev->gds.gds_size = 0x10000;
6343                 break;
6344         }
6345
6346         switch (adev->asic_type) {
6347         case CHIP_VEGA10:
6348         case CHIP_VEGA20:
6349                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6350                 break;
6351         case CHIP_VEGA12:
6352                 adev->gds.gds_compute_max_wave_id = 0x27f;
6353                 break;
6354         case CHIP_RAVEN:
6355                 if (adev->rev_id >= 0x8)
6356                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6357                 else
6358                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6359                 break;
6360         case CHIP_ARCTURUS:
6361                 adev->gds.gds_compute_max_wave_id = 0xfff;
6362                 break;
6363         default:
6364                 /* this really depends on the chip */
6365                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6366                 break;
6367         }
6368
6369         adev->gds.gws_size = 64;
6370         adev->gds.oa_size = 16;
6371 }
6372
6373 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6374                                                  u32 bitmap)
6375 {
6376         u32 data;
6377
6378         if (!bitmap)
6379                 return;
6380
6381         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6382         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6383
6384         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6385 }
6386
6387 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6388 {
6389         u32 data, mask;
6390
6391         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6392         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6393
6394         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6395         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6396
6397         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6398
6399         return (~data) & mask;
6400 }
6401
6402 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6403                                  struct amdgpu_cu_info *cu_info)
6404 {
6405         int i, j, k, counter, active_cu_number = 0;
6406         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6407         unsigned disable_masks[4 * 4];
6408
6409         if (!adev || !cu_info)
6410                 return -EINVAL;
6411
6412         /*
6413          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6414          */
6415         if (adev->gfx.config.max_shader_engines *
6416                 adev->gfx.config.max_sh_per_se > 16)
6417                 return -EINVAL;
6418
6419         amdgpu_gfx_parse_disable_cu(disable_masks,
6420                                     adev->gfx.config.max_shader_engines,
6421                                     adev->gfx.config.max_sh_per_se);
6422
6423         mutex_lock(&adev->grbm_idx_mutex);
6424         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6425                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6426                         mask = 1;
6427                         ao_bitmap = 0;
6428                         counter = 0;
6429                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6430                         gfx_v9_0_set_user_cu_inactive_bitmap(
6431                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6432                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6433
6434                         /*
6435                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6436                          * 4x4 size array, and it's usually suitable for Vega
6437                          * ASICs which has 4*2 SE/SH layout.
6438                          * But for Arcturus, SE/SH layout is changed to 8*1.
6439                          * To mostly reduce the impact, we make it compatible
6440                          * with current bitmap array as below:
6441                          *    SE4,SH0 --> bitmap[0][1]
6442                          *    SE5,SH0 --> bitmap[1][1]
6443                          *    SE6,SH0 --> bitmap[2][1]
6444                          *    SE7,SH0 --> bitmap[3][1]
6445                          */
6446                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6447
6448                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6449                                 if (bitmap & mask) {
6450                                         if (counter < adev->gfx.config.max_cu_per_sh)
6451                                                 ao_bitmap |= mask;
6452                                         counter ++;
6453                                 }
6454                                 mask <<= 1;
6455                         }
6456                         active_cu_number += counter;
6457                         if (i < 2 && j < 2)
6458                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6459                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6460                 }
6461         }
6462         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6463         mutex_unlock(&adev->grbm_idx_mutex);
6464
6465         cu_info->number = active_cu_number;
6466         cu_info->ao_cu_mask = ao_cu_mask;
6467         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6468
6469         return 0;
6470 }
6471
6472 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6473 {
6474         .type = AMD_IP_BLOCK_TYPE_GFX,
6475         .major = 9,
6476         .minor = 0,
6477         .rev = 0,
6478         .funcs = &gfx_v9_0_ip_funcs,
6479 };