]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
fc6d8c1fd32be2077aab5164d77b72339739f693
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "soc15.h"
29 #include "soc15d.h"
30 #include "amdgpu_atomfirmware.h"
31
32 #include "gc/gc_9_0_offset.h"
33 #include "gc/gc_9_0_sh_mask.h"
34 #include "vega10_enum.h"
35 #include "hdp/hdp_4_0_offset.h"
36
37 #include "soc15.h"
38 #include "soc15_common.h"
39 #include "clearstate_gfx9.h"
40 #include "v9_structs.h"
41
42 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
43
44 #include "amdgpu_ras.h"
45
46 #define GFX9_NUM_GFX_RINGS     1
47 #define GFX9_MEC_HPD_SIZE 4096
48 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
49 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
50
51 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
52 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
53 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
54 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
55 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
56 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
57
58 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
59 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
60 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
61 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
62 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
63 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
64
65 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
66 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
67 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
68 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
69 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
70 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
71
72 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
73 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
74 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
75 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
76 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
77 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
78
79 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
80 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
81 MODULE_FIRMWARE("amdgpu/raven_me.bin");
82 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
83 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
84 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
85
86 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
87 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
88 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
89 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
90 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
91 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
93
94 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
95 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
97 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
98 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
100
101 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
102 {
103         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
104         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
105         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
106         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
107         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
108         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
109         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
110         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
111         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
112         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
113         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
114         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
115         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
116         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
117         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
118         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
119         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
120         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
121         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
122         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
123 };
124
125 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
126 {
127         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
128         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
129         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
130         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
131         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
132         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
133         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
134         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
135         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
136         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
137         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
138         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
139         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
140         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
141         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
142         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
143         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
144         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
145 };
146
147 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
148 {
149         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
150         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
151         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
152         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
153         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
154         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
155         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
156         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
157         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
158         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
159         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
160 };
161
162 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
163 {
164         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
165         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
166         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
167         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
168         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
169         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
170         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
171         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
172         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
173         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
174         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
175         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
176         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
177         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
178         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
179         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
180         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
181         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
182         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
183         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
184         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
185         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
186         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
187         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
188 };
189
190 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
191 {
192         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
193         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
194         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
195         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
196         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
197         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
198         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
199 };
200
201 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
202 {
203         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
204         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
205         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
206         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
207         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
208         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
209         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
210         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
211         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
212         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
213         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
214         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
215         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
216         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
217         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
218         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
219         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
220         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
221         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
222 };
223
224 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
225 {
226         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
227         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
228         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
229 };
230
231 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
232 {
233         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
234         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
235         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
236         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
237         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
238         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
239         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
240         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
241         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
242         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
243         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
244         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
245         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
246         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
247         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
248         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
249 };
250
251 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
252 {
253         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
254         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
255         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
256         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
257         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
258         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
259         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
260         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
261         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
262         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
263         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
264         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
265         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
266 };
267
268 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
269 {
270         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
271         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
272         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
273         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
274         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
275         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
276         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278 };
279
280 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
281 {
282         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
283         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
284         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
285         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
286         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
287         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
288         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290 };
291
292 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
293 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
294 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
295 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
296
297 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
298 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
299 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
300 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
301 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
302                                  struct amdgpu_cu_info *cu_info);
303 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
304 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
305 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
306
307 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
308 {
309         switch (adev->asic_type) {
310         case CHIP_VEGA10:
311                 if (!amdgpu_virt_support_skip_setting(adev)) {
312                         soc15_program_register_sequence(adev,
313                                                          golden_settings_gc_9_0,
314                                                          ARRAY_SIZE(golden_settings_gc_9_0));
315                         soc15_program_register_sequence(adev,
316                                                          golden_settings_gc_9_0_vg10,
317                                                          ARRAY_SIZE(golden_settings_gc_9_0_vg10));
318                 }
319                 break;
320         case CHIP_VEGA12:
321                 soc15_program_register_sequence(adev,
322                                                 golden_settings_gc_9_2_1,
323                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
324                 soc15_program_register_sequence(adev,
325                                                 golden_settings_gc_9_2_1_vg12,
326                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
327                 break;
328         case CHIP_VEGA20:
329                 soc15_program_register_sequence(adev,
330                                                 golden_settings_gc_9_0,
331                                                 ARRAY_SIZE(golden_settings_gc_9_0));
332                 soc15_program_register_sequence(adev,
333                                                 golden_settings_gc_9_0_vg20,
334                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
335                 break;
336         case CHIP_RAVEN:
337                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
338                                                 ARRAY_SIZE(golden_settings_gc_9_1));
339                 if (adev->rev_id >= 8)
340                         soc15_program_register_sequence(adev,
341                                                         golden_settings_gc_9_1_rv2,
342                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
343                 else
344                         soc15_program_register_sequence(adev,
345                                                         golden_settings_gc_9_1_rv1,
346                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
347                 break;
348         default:
349                 break;
350         }
351
352         soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
353                                         (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
354 }
355
356 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
357 {
358         adev->gfx.scratch.num_reg = 8;
359         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
360         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
361 }
362
363 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
364                                        bool wc, uint32_t reg, uint32_t val)
365 {
366         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
367         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
368                                 WRITE_DATA_DST_SEL(0) |
369                                 (wc ? WR_CONFIRM : 0));
370         amdgpu_ring_write(ring, reg);
371         amdgpu_ring_write(ring, 0);
372         amdgpu_ring_write(ring, val);
373 }
374
375 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
376                                   int mem_space, int opt, uint32_t addr0,
377                                   uint32_t addr1, uint32_t ref, uint32_t mask,
378                                   uint32_t inv)
379 {
380         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
381         amdgpu_ring_write(ring,
382                                  /* memory (1) or register (0) */
383                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
384                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
385                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
386                                  WAIT_REG_MEM_ENGINE(eng_sel)));
387
388         if (mem_space)
389                 BUG_ON(addr0 & 0x3); /* Dword align */
390         amdgpu_ring_write(ring, addr0);
391         amdgpu_ring_write(ring, addr1);
392         amdgpu_ring_write(ring, ref);
393         amdgpu_ring_write(ring, mask);
394         amdgpu_ring_write(ring, inv); /* poll interval */
395 }
396
397 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
398 {
399         struct amdgpu_device *adev = ring->adev;
400         uint32_t scratch;
401         uint32_t tmp = 0;
402         unsigned i;
403         int r;
404
405         r = amdgpu_gfx_scratch_get(adev, &scratch);
406         if (r)
407                 return r;
408
409         WREG32(scratch, 0xCAFEDEAD);
410         r = amdgpu_ring_alloc(ring, 3);
411         if (r)
412                 goto error_free_scratch;
413
414         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
415         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
416         amdgpu_ring_write(ring, 0xDEADBEEF);
417         amdgpu_ring_commit(ring);
418
419         for (i = 0; i < adev->usec_timeout; i++) {
420                 tmp = RREG32(scratch);
421                 if (tmp == 0xDEADBEEF)
422                         break;
423                 DRM_UDELAY(1);
424         }
425
426         if (i >= adev->usec_timeout)
427                 r = -ETIMEDOUT;
428
429 error_free_scratch:
430         amdgpu_gfx_scratch_free(adev, scratch);
431         return r;
432 }
433
434 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
435 {
436         struct amdgpu_device *adev = ring->adev;
437         struct amdgpu_ib ib;
438         struct dma_fence *f = NULL;
439
440         unsigned index;
441         uint64_t gpu_addr;
442         uint32_t tmp;
443         long r;
444
445         r = amdgpu_device_wb_get(adev, &index);
446         if (r)
447                 return r;
448
449         gpu_addr = adev->wb.gpu_addr + (index * 4);
450         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
451         memset(&ib, 0, sizeof(ib));
452         r = amdgpu_ib_get(adev, NULL, 16, &ib);
453         if (r)
454                 goto err1;
455
456         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
457         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
458         ib.ptr[2] = lower_32_bits(gpu_addr);
459         ib.ptr[3] = upper_32_bits(gpu_addr);
460         ib.ptr[4] = 0xDEADBEEF;
461         ib.length_dw = 5;
462
463         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
464         if (r)
465                 goto err2;
466
467         r = dma_fence_wait_timeout(f, false, timeout);
468         if (r == 0) {
469                 r = -ETIMEDOUT;
470                 goto err2;
471         } else if (r < 0) {
472                 goto err2;
473         }
474
475         tmp = adev->wb.wb[index];
476         if (tmp == 0xDEADBEEF)
477                 r = 0;
478         else
479                 r = -EINVAL;
480
481 err2:
482         amdgpu_ib_free(adev, &ib, NULL);
483         dma_fence_put(f);
484 err1:
485         amdgpu_device_wb_free(adev, index);
486         return r;
487 }
488
489
490 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
491 {
492         release_firmware(adev->gfx.pfp_fw);
493         adev->gfx.pfp_fw = NULL;
494         release_firmware(adev->gfx.me_fw);
495         adev->gfx.me_fw = NULL;
496         release_firmware(adev->gfx.ce_fw);
497         adev->gfx.ce_fw = NULL;
498         release_firmware(adev->gfx.rlc_fw);
499         adev->gfx.rlc_fw = NULL;
500         release_firmware(adev->gfx.mec_fw);
501         adev->gfx.mec_fw = NULL;
502         release_firmware(adev->gfx.mec2_fw);
503         adev->gfx.mec2_fw = NULL;
504
505         kfree(adev->gfx.rlc.register_list_format);
506 }
507
508 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
509 {
510         const struct rlc_firmware_header_v2_1 *rlc_hdr;
511
512         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
513         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
514         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
515         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
516         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
517         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
518         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
519         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
520         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
521         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
522         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
523         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
524         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
525         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
526                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
527 }
528
529 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
530 {
531         adev->gfx.me_fw_write_wait = false;
532         adev->gfx.mec_fw_write_wait = false;
533
534         switch (adev->asic_type) {
535         case CHIP_VEGA10:
536                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
537                     (adev->gfx.me_feature_version >= 42) &&
538                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
539                     (adev->gfx.pfp_feature_version >= 42))
540                         adev->gfx.me_fw_write_wait = true;
541
542                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
543                     (adev->gfx.mec_feature_version >= 42))
544                         adev->gfx.mec_fw_write_wait = true;
545                 break;
546         case CHIP_VEGA12:
547                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
548                     (adev->gfx.me_feature_version >= 44) &&
549                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
550                     (adev->gfx.pfp_feature_version >= 44))
551                         adev->gfx.me_fw_write_wait = true;
552
553                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
554                     (adev->gfx.mec_feature_version >= 44))
555                         adev->gfx.mec_fw_write_wait = true;
556                 break;
557         case CHIP_VEGA20:
558                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
559                     (adev->gfx.me_feature_version >= 44) &&
560                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
561                     (adev->gfx.pfp_feature_version >= 44))
562                         adev->gfx.me_fw_write_wait = true;
563
564                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
565                     (adev->gfx.mec_feature_version >= 44))
566                         adev->gfx.mec_fw_write_wait = true;
567                 break;
568         case CHIP_RAVEN:
569                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
570                     (adev->gfx.me_feature_version >= 42) &&
571                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
572                     (adev->gfx.pfp_feature_version >= 42))
573                         adev->gfx.me_fw_write_wait = true;
574
575                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
576                     (adev->gfx.mec_feature_version >= 42))
577                         adev->gfx.mec_fw_write_wait = true;
578                 break;
579         default:
580                 break;
581         }
582 }
583
584 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
585 {
586         switch (adev->asic_type) {
587         case CHIP_VEGA10:
588         case CHIP_VEGA12:
589         case CHIP_VEGA20:
590                 break;
591         case CHIP_RAVEN:
592                 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
593                         break;
594                 if ((adev->gfx.rlc_fw_version < 531) ||
595                     (adev->gfx.rlc_fw_version == 53815) ||
596                     (adev->gfx.rlc_feature_version < 1) ||
597                     !adev->gfx.rlc.is_rlc_v2_1)
598                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
599                 break;
600         default:
601                 break;
602         }
603 }
604
605 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
606 {
607         const char *chip_name;
608         char fw_name[30];
609         int err;
610         struct amdgpu_firmware_info *info = NULL;
611         const struct common_firmware_header *header = NULL;
612         const struct gfx_firmware_header_v1_0 *cp_hdr;
613         const struct rlc_firmware_header_v2_0 *rlc_hdr;
614         unsigned int *tmp = NULL;
615         unsigned int i = 0;
616         uint16_t version_major;
617         uint16_t version_minor;
618
619         DRM_DEBUG("\n");
620
621         switch (adev->asic_type) {
622         case CHIP_VEGA10:
623                 chip_name = "vega10";
624                 break;
625         case CHIP_VEGA12:
626                 chip_name = "vega12";
627                 break;
628         case CHIP_VEGA20:
629                 chip_name = "vega20";
630                 break;
631         case CHIP_RAVEN:
632                 if (adev->rev_id >= 8)
633                         chip_name = "raven2";
634                 else if (adev->pdev->device == 0x15d8)
635                         chip_name = "picasso";
636                 else
637                         chip_name = "raven";
638                 break;
639         default:
640                 BUG();
641         }
642
643         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
644         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
645         if (err)
646                 goto out;
647         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
648         if (err)
649                 goto out;
650         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
651         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
652         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
653
654         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
655         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
656         if (err)
657                 goto out;
658         err = amdgpu_ucode_validate(adev->gfx.me_fw);
659         if (err)
660                 goto out;
661         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
662         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
663         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
664
665         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
666         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
667         if (err)
668                 goto out;
669         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
670         if (err)
671                 goto out;
672         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
673         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
674         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
675
676         /*
677          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
678          * instead of picasso_rlc.bin.
679          * Judgment method:
680          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
681          *          or revision >= 0xD8 && revision <= 0xDF
682          * otherwise is PCO FP5
683          */
684         if (!strcmp(chip_name, "picasso") &&
685                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
686                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
687                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
688         else
689                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
690         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
691         if (err)
692                 goto out;
693         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
694         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
695
696         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
697         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
698         if (version_major == 2 && version_minor == 1)
699                 adev->gfx.rlc.is_rlc_v2_1 = true;
700
701         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
702         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
703         adev->gfx.rlc.save_and_restore_offset =
704                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
705         adev->gfx.rlc.clear_state_descriptor_offset =
706                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
707         adev->gfx.rlc.avail_scratch_ram_locations =
708                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
709         adev->gfx.rlc.reg_restore_list_size =
710                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
711         adev->gfx.rlc.reg_list_format_start =
712                         le32_to_cpu(rlc_hdr->reg_list_format_start);
713         adev->gfx.rlc.reg_list_format_separate_start =
714                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
715         adev->gfx.rlc.starting_offsets_start =
716                         le32_to_cpu(rlc_hdr->starting_offsets_start);
717         adev->gfx.rlc.reg_list_format_size_bytes =
718                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
719         adev->gfx.rlc.reg_list_size_bytes =
720                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
721         adev->gfx.rlc.register_list_format =
722                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
723                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
724         if (!adev->gfx.rlc.register_list_format) {
725                 err = -ENOMEM;
726                 goto out;
727         }
728
729         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
730                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
731         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
732                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
733
734         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
735
736         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
737                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
738         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
739                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
740
741         if (adev->gfx.rlc.is_rlc_v2_1)
742                 gfx_v9_0_init_rlc_ext_microcode(adev);
743
744         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
745         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
746         if (err)
747                 goto out;
748         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
749         if (err)
750                 goto out;
751         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
752         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
753         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
754
755
756         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
757         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
758         if (!err) {
759                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
760                 if (err)
761                         goto out;
762                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
763                 adev->gfx.mec2_fw->data;
764                 adev->gfx.mec2_fw_version =
765                 le32_to_cpu(cp_hdr->header.ucode_version);
766                 adev->gfx.mec2_feature_version =
767                 le32_to_cpu(cp_hdr->ucode_feature_version);
768         } else {
769                 err = 0;
770                 adev->gfx.mec2_fw = NULL;
771         }
772
773         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
774                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
775                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
776                 info->fw = adev->gfx.pfp_fw;
777                 header = (const struct common_firmware_header *)info->fw->data;
778                 adev->firmware.fw_size +=
779                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
780
781                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
782                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
783                 info->fw = adev->gfx.me_fw;
784                 header = (const struct common_firmware_header *)info->fw->data;
785                 adev->firmware.fw_size +=
786                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
787
788                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
789                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
790                 info->fw = adev->gfx.ce_fw;
791                 header = (const struct common_firmware_header *)info->fw->data;
792                 adev->firmware.fw_size +=
793                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
794
795                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
796                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
797                 info->fw = adev->gfx.rlc_fw;
798                 header = (const struct common_firmware_header *)info->fw->data;
799                 adev->firmware.fw_size +=
800                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
801
802                 if (adev->gfx.rlc.is_rlc_v2_1 &&
803                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
804                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
805                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
806                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
807                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
808                         info->fw = adev->gfx.rlc_fw;
809                         adev->firmware.fw_size +=
810                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
811
812                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
813                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
814                         info->fw = adev->gfx.rlc_fw;
815                         adev->firmware.fw_size +=
816                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
817
818                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
819                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
820                         info->fw = adev->gfx.rlc_fw;
821                         adev->firmware.fw_size +=
822                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
823                 }
824
825                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
826                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
827                 info->fw = adev->gfx.mec_fw;
828                 header = (const struct common_firmware_header *)info->fw->data;
829                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
830                 adev->firmware.fw_size +=
831                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
832
833                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
834                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
835                 info->fw = adev->gfx.mec_fw;
836                 adev->firmware.fw_size +=
837                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
838
839                 if (adev->gfx.mec2_fw) {
840                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
841                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
842                         info->fw = adev->gfx.mec2_fw;
843                         header = (const struct common_firmware_header *)info->fw->data;
844                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
845                         adev->firmware.fw_size +=
846                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
847                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
848                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
849                         info->fw = adev->gfx.mec2_fw;
850                         adev->firmware.fw_size +=
851                                 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
852                 }
853
854         }
855
856 out:
857         gfx_v9_0_check_if_need_gfxoff(adev);
858         gfx_v9_0_check_fw_write_wait(adev);
859         if (err) {
860                 dev_err(adev->dev,
861                         "gfx9: Failed to load firmware \"%s\"\n",
862                         fw_name);
863                 release_firmware(adev->gfx.pfp_fw);
864                 adev->gfx.pfp_fw = NULL;
865                 release_firmware(adev->gfx.me_fw);
866                 adev->gfx.me_fw = NULL;
867                 release_firmware(adev->gfx.ce_fw);
868                 adev->gfx.ce_fw = NULL;
869                 release_firmware(adev->gfx.rlc_fw);
870                 adev->gfx.rlc_fw = NULL;
871                 release_firmware(adev->gfx.mec_fw);
872                 adev->gfx.mec_fw = NULL;
873                 release_firmware(adev->gfx.mec2_fw);
874                 adev->gfx.mec2_fw = NULL;
875         }
876         return err;
877 }
878
879 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
880 {
881         u32 count = 0;
882         const struct cs_section_def *sect = NULL;
883         const struct cs_extent_def *ext = NULL;
884
885         /* begin clear state */
886         count += 2;
887         /* context control state */
888         count += 3;
889
890         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
891                 for (ext = sect->section; ext->extent != NULL; ++ext) {
892                         if (sect->id == SECT_CONTEXT)
893                                 count += 2 + ext->reg_count;
894                         else
895                                 return 0;
896                 }
897         }
898
899         /* end clear state */
900         count += 2;
901         /* clear state */
902         count += 2;
903
904         return count;
905 }
906
907 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
908                                     volatile u32 *buffer)
909 {
910         u32 count = 0, i;
911         const struct cs_section_def *sect = NULL;
912         const struct cs_extent_def *ext = NULL;
913
914         if (adev->gfx.rlc.cs_data == NULL)
915                 return;
916         if (buffer == NULL)
917                 return;
918
919         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
920         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
921
922         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
923         buffer[count++] = cpu_to_le32(0x80000000);
924         buffer[count++] = cpu_to_le32(0x80000000);
925
926         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
927                 for (ext = sect->section; ext->extent != NULL; ++ext) {
928                         if (sect->id == SECT_CONTEXT) {
929                                 buffer[count++] =
930                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
931                                 buffer[count++] = cpu_to_le32(ext->reg_index -
932                                                 PACKET3_SET_CONTEXT_REG_START);
933                                 for (i = 0; i < ext->reg_count; i++)
934                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
935                         } else {
936                                 return;
937                         }
938                 }
939         }
940
941         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
942         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
943
944         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
945         buffer[count++] = cpu_to_le32(0);
946 }
947
948 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
949 {
950         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
951         uint32_t pg_always_on_cu_num = 2;
952         uint32_t always_on_cu_num;
953         uint32_t i, j, k;
954         uint32_t mask, cu_bitmap, counter;
955
956         if (adev->flags & AMD_IS_APU)
957                 always_on_cu_num = 4;
958         else if (adev->asic_type == CHIP_VEGA12)
959                 always_on_cu_num = 8;
960         else
961                 always_on_cu_num = 12;
962
963         mutex_lock(&adev->grbm_idx_mutex);
964         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
965                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
966                         mask = 1;
967                         cu_bitmap = 0;
968                         counter = 0;
969                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
970
971                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
972                                 if (cu_info->bitmap[i][j] & mask) {
973                                         if (counter == pg_always_on_cu_num)
974                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
975                                         if (counter < always_on_cu_num)
976                                                 cu_bitmap |= mask;
977                                         else
978                                                 break;
979                                         counter++;
980                                 }
981                                 mask <<= 1;
982                         }
983
984                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
985                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
986                 }
987         }
988         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
989         mutex_unlock(&adev->grbm_idx_mutex);
990 }
991
992 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
993 {
994         uint32_t data;
995
996         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
997         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
998         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
999         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1000         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1001
1002         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1003         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1004
1005         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1006         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1007
1008         mutex_lock(&adev->grbm_idx_mutex);
1009         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1010         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1011         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1012
1013         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1014         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1015         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1016         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1017         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1018
1019         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1020         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1021         data &= 0x0000FFFF;
1022         data |= 0x00C00000;
1023         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1024
1025         /*
1026          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1027          * programmed in gfx_v9_0_init_always_on_cu_mask()
1028          */
1029
1030         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1031          * but used for RLC_LB_CNTL configuration */
1032         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1033         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1034         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1035         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1036         mutex_unlock(&adev->grbm_idx_mutex);
1037
1038         gfx_v9_0_init_always_on_cu_mask(adev);
1039 }
1040
1041 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1042 {
1043         uint32_t data;
1044
1045         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1046         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1047         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1048         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1049         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1050
1051         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1052         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1053
1054         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1055         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1056
1057         mutex_lock(&adev->grbm_idx_mutex);
1058         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1059         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1060         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1061
1062         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1063         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1064         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1065         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1066         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1067
1068         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1069         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1070         data &= 0x0000FFFF;
1071         data |= 0x00C00000;
1072         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1073
1074         /*
1075          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1076          * programmed in gfx_v9_0_init_always_on_cu_mask()
1077          */
1078
1079         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1080          * but used for RLC_LB_CNTL configuration */
1081         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1082         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1083         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1084         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1085         mutex_unlock(&adev->grbm_idx_mutex);
1086
1087         gfx_v9_0_init_always_on_cu_mask(adev);
1088 }
1089
1090 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1091 {
1092         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1093 }
1094
1095 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1096 {
1097         return 5;
1098 }
1099
1100 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1101 {
1102         const struct cs_section_def *cs_data;
1103         int r;
1104
1105         adev->gfx.rlc.cs_data = gfx9_cs_data;
1106
1107         cs_data = adev->gfx.rlc.cs_data;
1108
1109         if (cs_data) {
1110                 /* init clear state block */
1111                 r = amdgpu_gfx_rlc_init_csb(adev);
1112                 if (r)
1113                         return r;
1114         }
1115
1116         if (adev->asic_type == CHIP_RAVEN) {
1117                 /* TODO: double check the cp_table_size for RV */
1118                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1119                 r = amdgpu_gfx_rlc_init_cpt(adev);
1120                 if (r)
1121                         return r;
1122         }
1123
1124         switch (adev->asic_type) {
1125         case CHIP_RAVEN:
1126                 gfx_v9_0_init_lbpw(adev);
1127                 break;
1128         case CHIP_VEGA20:
1129                 gfx_v9_4_init_lbpw(adev);
1130                 break;
1131         default:
1132                 break;
1133         }
1134
1135         return 0;
1136 }
1137
1138 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1139 {
1140         int r;
1141
1142         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1143         if (unlikely(r != 0))
1144                 return r;
1145
1146         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1147                         AMDGPU_GEM_DOMAIN_VRAM);
1148         if (!r)
1149                 adev->gfx.rlc.clear_state_gpu_addr =
1150                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1151
1152         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1153
1154         return r;
1155 }
1156
1157 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1158 {
1159         int r;
1160
1161         if (!adev->gfx.rlc.clear_state_obj)
1162                 return;
1163
1164         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1165         if (likely(r == 0)) {
1166                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1167                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1168         }
1169 }
1170
1171 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1172 {
1173         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1174         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1175 }
1176
1177 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1178 {
1179         int r;
1180         u32 *hpd;
1181         const __le32 *fw_data;
1182         unsigned fw_size;
1183         u32 *fw;
1184         size_t mec_hpd_size;
1185
1186         const struct gfx_firmware_header_v1_0 *mec_hdr;
1187
1188         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1189
1190         /* take ownership of the relevant compute queues */
1191         amdgpu_gfx_compute_queue_acquire(adev);
1192         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1193
1194         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1195                                       AMDGPU_GEM_DOMAIN_VRAM,
1196                                       &adev->gfx.mec.hpd_eop_obj,
1197                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1198                                       (void **)&hpd);
1199         if (r) {
1200                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1201                 gfx_v9_0_mec_fini(adev);
1202                 return r;
1203         }
1204
1205         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1206
1207         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1208         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1209
1210         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1211
1212         fw_data = (const __le32 *)
1213                 (adev->gfx.mec_fw->data +
1214                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1215         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1216
1217         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1218                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1219                                       &adev->gfx.mec.mec_fw_obj,
1220                                       &adev->gfx.mec.mec_fw_gpu_addr,
1221                                       (void **)&fw);
1222         if (r) {
1223                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1224                 gfx_v9_0_mec_fini(adev);
1225                 return r;
1226         }
1227
1228         memcpy(fw, fw_data, fw_size);
1229
1230         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1231         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1232
1233         return 0;
1234 }
1235
1236 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1237 {
1238         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1239                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1240                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1241                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1242                 (SQ_IND_INDEX__FORCE_READ_MASK));
1243         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1244 }
1245
1246 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1247                            uint32_t wave, uint32_t thread,
1248                            uint32_t regno, uint32_t num, uint32_t *out)
1249 {
1250         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1251                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1252                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1253                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1254                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1255                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1256                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1257         while (num--)
1258                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1259 }
1260
1261 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1262 {
1263         /* type 1 wave data */
1264         dst[(*no_fields)++] = 1;
1265         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1266         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1267         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1268         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1269         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1270         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1271         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1272         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1273         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1274         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1275         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1276         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1277         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1278         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1279 }
1280
1281 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1282                                      uint32_t wave, uint32_t start,
1283                                      uint32_t size, uint32_t *dst)
1284 {
1285         wave_read_regs(
1286                 adev, simd, wave, 0,
1287                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1288 }
1289
1290 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1291                                      uint32_t wave, uint32_t thread,
1292                                      uint32_t start, uint32_t size,
1293                                      uint32_t *dst)
1294 {
1295         wave_read_regs(
1296                 adev, simd, wave, thread,
1297                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1298 }
1299
1300 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1301                                   u32 me, u32 pipe, u32 q)
1302 {
1303         soc15_grbm_select(adev, me, pipe, q, 0);
1304 }
1305
1306 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1307         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1308         .select_se_sh = &gfx_v9_0_select_se_sh,
1309         .read_wave_data = &gfx_v9_0_read_wave_data,
1310         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1311         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1312         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1313 };
1314
1315 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1316 {
1317         u32 gb_addr_config;
1318         int err;
1319
1320         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1321
1322         switch (adev->asic_type) {
1323         case CHIP_VEGA10:
1324                 adev->gfx.config.max_hw_contexts = 8;
1325                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1326                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1327                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1328                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1329                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1330                 break;
1331         case CHIP_VEGA12:
1332                 adev->gfx.config.max_hw_contexts = 8;
1333                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1334                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1335                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1336                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1337                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1338                 DRM_INFO("fix gfx.config for vega12\n");
1339                 break;
1340         case CHIP_VEGA20:
1341                 adev->gfx.config.max_hw_contexts = 8;
1342                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1343                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1344                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1345                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1346                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1347                 gb_addr_config &= ~0xf3e777ff;
1348                 gb_addr_config |= 0x22014042;
1349                 /* check vbios table if gpu info is not available */
1350                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1351                 if (err)
1352                         return err;
1353                 break;
1354         case CHIP_RAVEN:
1355                 adev->gfx.config.max_hw_contexts = 8;
1356                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1357                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1358                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1359                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1360                 if (adev->rev_id >= 8)
1361                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1362                 else
1363                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1364                 break;
1365         default:
1366                 BUG();
1367                 break;
1368         }
1369
1370         adev->gfx.config.gb_addr_config = gb_addr_config;
1371
1372         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1373                         REG_GET_FIELD(
1374                                         adev->gfx.config.gb_addr_config,
1375                                         GB_ADDR_CONFIG,
1376                                         NUM_PIPES);
1377
1378         adev->gfx.config.max_tile_pipes =
1379                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1380
1381         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1382                         REG_GET_FIELD(
1383                                         adev->gfx.config.gb_addr_config,
1384                                         GB_ADDR_CONFIG,
1385                                         NUM_BANKS);
1386         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1387                         REG_GET_FIELD(
1388                                         adev->gfx.config.gb_addr_config,
1389                                         GB_ADDR_CONFIG,
1390                                         MAX_COMPRESSED_FRAGS);
1391         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1392                         REG_GET_FIELD(
1393                                         adev->gfx.config.gb_addr_config,
1394                                         GB_ADDR_CONFIG,
1395                                         NUM_RB_PER_SE);
1396         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1397                         REG_GET_FIELD(
1398                                         adev->gfx.config.gb_addr_config,
1399                                         GB_ADDR_CONFIG,
1400                                         NUM_SHADER_ENGINES);
1401         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1402                         REG_GET_FIELD(
1403                                         adev->gfx.config.gb_addr_config,
1404                                         GB_ADDR_CONFIG,
1405                                         PIPE_INTERLEAVE_SIZE));
1406
1407         return 0;
1408 }
1409
1410 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1411                                    struct amdgpu_ngg_buf *ngg_buf,
1412                                    int size_se,
1413                                    int default_size_se)
1414 {
1415         int r;
1416
1417         if (size_se < 0) {
1418                 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1419                 return -EINVAL;
1420         }
1421         size_se = size_se ? size_se : default_size_se;
1422
1423         ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1424         r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1425                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1426                                     &ngg_buf->bo,
1427                                     &ngg_buf->gpu_addr,
1428                                     NULL);
1429         if (r) {
1430                 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1431                 return r;
1432         }
1433         ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1434
1435         return r;
1436 }
1437
1438 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1439 {
1440         int i;
1441
1442         for (i = 0; i < NGG_BUF_MAX; i++)
1443                 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1444                                       &adev->gfx.ngg.buf[i].gpu_addr,
1445                                       NULL);
1446
1447         memset(&adev->gfx.ngg.buf[0], 0,
1448                         sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1449
1450         adev->gfx.ngg.init = false;
1451
1452         return 0;
1453 }
1454
1455 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1456 {
1457         int r;
1458
1459         if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1460                 return 0;
1461
1462         /* GDS reserve memory: 64 bytes alignment */
1463         adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1464         adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size;
1465         adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
1466         adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1467         adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1468
1469         /* Primitive Buffer */
1470         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1471                                     amdgpu_prim_buf_per_se,
1472                                     64 * 1024);
1473         if (r) {
1474                 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1475                 goto err;
1476         }
1477
1478         /* Position Buffer */
1479         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1480                                     amdgpu_pos_buf_per_se,
1481                                     256 * 1024);
1482         if (r) {
1483                 dev_err(adev->dev, "Failed to create Position Buffer\n");
1484                 goto err;
1485         }
1486
1487         /* Control Sideband */
1488         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1489                                     amdgpu_cntl_sb_buf_per_se,
1490                                     256);
1491         if (r) {
1492                 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1493                 goto err;
1494         }
1495
1496         /* Parameter Cache, not created by default */
1497         if (amdgpu_param_buf_per_se <= 0)
1498                 goto out;
1499
1500         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1501                                     amdgpu_param_buf_per_se,
1502                                     512 * 1024);
1503         if (r) {
1504                 dev_err(adev->dev, "Failed to create Parameter Cache\n");
1505                 goto err;
1506         }
1507
1508 out:
1509         adev->gfx.ngg.init = true;
1510         return 0;
1511 err:
1512         gfx_v9_0_ngg_fini(adev);
1513         return r;
1514 }
1515
1516 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1517 {
1518         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1519         int r;
1520         u32 data, base;
1521
1522         if (!amdgpu_ngg)
1523                 return 0;
1524
1525         /* Program buffer size */
1526         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1527                              adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1528         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1529                              adev->gfx.ngg.buf[NGG_POS].size >> 8);
1530         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1531
1532         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1533                              adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1534         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1535                              adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1536         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1537
1538         /* Program buffer base address */
1539         base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1540         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1541         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1542
1543         base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1544         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1545         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1546
1547         base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1548         data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1549         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1550
1551         base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1552         data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1553         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1554
1555         base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1556         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1557         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1558
1559         base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1560         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1561         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1562
1563         /* Clear GDS reserved memory */
1564         r = amdgpu_ring_alloc(ring, 17);
1565         if (r) {
1566                 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1567                           ring->name, r);
1568                 return r;
1569         }
1570
1571         gfx_v9_0_write_data_to_reg(ring, 0, false,
1572                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1573                                    (adev->gds.mem.total_size +
1574                                     adev->gfx.ngg.gds_reserve_size));
1575
1576         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1577         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1578                                 PACKET3_DMA_DATA_DST_SEL(1) |
1579                                 PACKET3_DMA_DATA_SRC_SEL(2)));
1580         amdgpu_ring_write(ring, 0);
1581         amdgpu_ring_write(ring, 0);
1582         amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1583         amdgpu_ring_write(ring, 0);
1584         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1585                                 adev->gfx.ngg.gds_reserve_size);
1586
1587         gfx_v9_0_write_data_to_reg(ring, 0, false,
1588                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1589
1590         amdgpu_ring_commit(ring);
1591
1592         return 0;
1593 }
1594
1595 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1596                                       int mec, int pipe, int queue)
1597 {
1598         int r;
1599         unsigned irq_type;
1600         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1601
1602         ring = &adev->gfx.compute_ring[ring_id];
1603
1604         /* mec0 is me1 */
1605         ring->me = mec + 1;
1606         ring->pipe = pipe;
1607         ring->queue = queue;
1608
1609         ring->ring_obj = NULL;
1610         ring->use_doorbell = true;
1611         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1612         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1613                                 + (ring_id * GFX9_MEC_HPD_SIZE);
1614         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1615
1616         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1617                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1618                 + ring->pipe;
1619
1620         /* type-2 packets are deprecated on MEC, use type-3 instead */
1621         r = amdgpu_ring_init(adev, ring, 1024,
1622                              &adev->gfx.eop_irq, irq_type);
1623         if (r)
1624                 return r;
1625
1626
1627         return 0;
1628 }
1629
1630 static int gfx_v9_0_sw_init(void *handle)
1631 {
1632         int i, j, k, r, ring_id;
1633         struct amdgpu_ring *ring;
1634         struct amdgpu_kiq *kiq;
1635         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1636
1637         switch (adev->asic_type) {
1638         case CHIP_VEGA10:
1639         case CHIP_VEGA12:
1640         case CHIP_VEGA20:
1641         case CHIP_RAVEN:
1642                 adev->gfx.mec.num_mec = 2;
1643                 break;
1644         default:
1645                 adev->gfx.mec.num_mec = 1;
1646                 break;
1647         }
1648
1649         adev->gfx.mec.num_pipe_per_mec = 4;
1650         adev->gfx.mec.num_queue_per_pipe = 8;
1651
1652         /* EOP Event */
1653         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1654         if (r)
1655                 return r;
1656
1657         /* Privileged reg */
1658         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1659                               &adev->gfx.priv_reg_irq);
1660         if (r)
1661                 return r;
1662
1663         /* Privileged inst */
1664         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1665                               &adev->gfx.priv_inst_irq);
1666         if (r)
1667                 return r;
1668
1669         /* ECC error */
1670         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1671                               &adev->gfx.cp_ecc_error_irq);
1672         if (r)
1673                 return r;
1674
1675         /* FUE error */
1676         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1677                               &adev->gfx.cp_ecc_error_irq);
1678         if (r)
1679                 return r;
1680
1681         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1682
1683         gfx_v9_0_scratch_init(adev);
1684
1685         r = gfx_v9_0_init_microcode(adev);
1686         if (r) {
1687                 DRM_ERROR("Failed to load gfx firmware!\n");
1688                 return r;
1689         }
1690
1691         r = adev->gfx.rlc.funcs->init(adev);
1692         if (r) {
1693                 DRM_ERROR("Failed to init rlc BOs!\n");
1694                 return r;
1695         }
1696
1697         r = gfx_v9_0_mec_init(adev);
1698         if (r) {
1699                 DRM_ERROR("Failed to init MEC BOs!\n");
1700                 return r;
1701         }
1702
1703         /* set up the gfx ring */
1704         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1705                 ring = &adev->gfx.gfx_ring[i];
1706                 ring->ring_obj = NULL;
1707                 if (!i)
1708                         sprintf(ring->name, "gfx");
1709                 else
1710                         sprintf(ring->name, "gfx_%d", i);
1711                 ring->use_doorbell = true;
1712                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1713                 r = amdgpu_ring_init(adev, ring, 1024,
1714                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
1715                 if (r)
1716                         return r;
1717         }
1718
1719         /* set up the compute queues - allocate horizontally across pipes */
1720         ring_id = 0;
1721         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1722                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1723                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1724                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1725                                         continue;
1726
1727                                 r = gfx_v9_0_compute_ring_init(adev,
1728                                                                ring_id,
1729                                                                i, k, j);
1730                                 if (r)
1731                                         return r;
1732
1733                                 ring_id++;
1734                         }
1735                 }
1736         }
1737
1738         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1739         if (r) {
1740                 DRM_ERROR("Failed to init KIQ BOs!\n");
1741                 return r;
1742         }
1743
1744         kiq = &adev->gfx.kiq;
1745         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1746         if (r)
1747                 return r;
1748
1749         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1750         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1751         if (r)
1752                 return r;
1753
1754         adev->gfx.ce_ram_size = 0x8000;
1755
1756         r = gfx_v9_0_gpu_early_init(adev);
1757         if (r)
1758                 return r;
1759
1760         r = gfx_v9_0_ngg_init(adev);
1761         if (r)
1762                 return r;
1763
1764         return 0;
1765 }
1766
1767
1768 static int gfx_v9_0_sw_fini(void *handle)
1769 {
1770         int i;
1771         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1772
1773         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1774                         adev->gfx.ras_if) {
1775                 struct ras_common_if *ras_if = adev->gfx.ras_if;
1776                 struct ras_ih_if ih_info = {
1777                         .head = *ras_if,
1778                 };
1779
1780                 amdgpu_ras_debugfs_remove(adev, ras_if);
1781                 amdgpu_ras_sysfs_remove(adev, ras_if);
1782                 amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
1783                 amdgpu_ras_feature_enable(adev, ras_if, 0);
1784                 kfree(ras_if);
1785         }
1786
1787         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
1788         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
1789         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
1790
1791         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1792                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1793         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1794                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1795
1796         amdgpu_gfx_compute_mqd_sw_fini(adev);
1797         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1798         amdgpu_gfx_kiq_fini(adev);
1799
1800         gfx_v9_0_mec_fini(adev);
1801         gfx_v9_0_ngg_fini(adev);
1802         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
1803                                 &adev->gfx.rlc.clear_state_gpu_addr,
1804                                 (void **)&adev->gfx.rlc.cs_ptr);
1805         if (adev->asic_type == CHIP_RAVEN) {
1806                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1807                                 &adev->gfx.rlc.cp_table_gpu_addr,
1808                                 (void **)&adev->gfx.rlc.cp_table_ptr);
1809         }
1810         gfx_v9_0_free_microcode(adev);
1811
1812         return 0;
1813 }
1814
1815
1816 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1817 {
1818         /* TODO */
1819 }
1820
1821 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1822 {
1823         u32 data;
1824
1825         if (instance == 0xffffffff)
1826                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1827         else
1828                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1829
1830         if (se_num == 0xffffffff)
1831                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1832         else
1833                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1834
1835         if (sh_num == 0xffffffff)
1836                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1837         else
1838                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1839
1840         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1841 }
1842
1843 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1844 {
1845         u32 data, mask;
1846
1847         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1848         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1849
1850         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1851         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1852
1853         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1854                                          adev->gfx.config.max_sh_per_se);
1855
1856         return (~data) & mask;
1857 }
1858
1859 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1860 {
1861         int i, j;
1862         u32 data;
1863         u32 active_rbs = 0;
1864         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1865                                         adev->gfx.config.max_sh_per_se;
1866
1867         mutex_lock(&adev->grbm_idx_mutex);
1868         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1869                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1870                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1871                         data = gfx_v9_0_get_rb_active_bitmap(adev);
1872                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1873                                                rb_bitmap_width_per_sh);
1874                 }
1875         }
1876         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1877         mutex_unlock(&adev->grbm_idx_mutex);
1878
1879         adev->gfx.config.backend_enable_mask = active_rbs;
1880         adev->gfx.config.num_rbs = hweight32(active_rbs);
1881 }
1882
1883 #define DEFAULT_SH_MEM_BASES    (0x6000)
1884 #define FIRST_COMPUTE_VMID      (8)
1885 #define LAST_COMPUTE_VMID       (16)
1886 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1887 {
1888         int i;
1889         uint32_t sh_mem_config;
1890         uint32_t sh_mem_bases;
1891
1892         /*
1893          * Configure apertures:
1894          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1895          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1896          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1897          */
1898         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1899
1900         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1901                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1902                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1903
1904         mutex_lock(&adev->srbm_mutex);
1905         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1906                 soc15_grbm_select(adev, 0, 0, 0, i);
1907                 /* CP and shaders */
1908                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1909                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1910         }
1911         soc15_grbm_select(adev, 0, 0, 0, 0);
1912         mutex_unlock(&adev->srbm_mutex);
1913 }
1914
1915 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1916 {
1917         u32 tmp;
1918         int i;
1919
1920         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1921
1922         gfx_v9_0_tiling_mode_table_init(adev);
1923
1924         gfx_v9_0_setup_rb(adev);
1925         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1926         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1927
1928         /* XXX SH_MEM regs */
1929         /* where to put LDS, scratch, GPUVM in FSA64 space */
1930         mutex_lock(&adev->srbm_mutex);
1931         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1932                 soc15_grbm_select(adev, 0, 0, 0, i);
1933                 /* CP and shaders */
1934                 if (i == 0) {
1935                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1936                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1937                         WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
1938                         WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0);
1939                 } else {
1940                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1941                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1942                         WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
1943                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1944                                 (adev->gmc.private_aperture_start >> 48));
1945                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1946                                 (adev->gmc.shared_aperture_start >> 48));
1947                         WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
1948                 }
1949         }
1950         soc15_grbm_select(adev, 0, 0, 0, 0);
1951
1952         mutex_unlock(&adev->srbm_mutex);
1953
1954         gfx_v9_0_init_compute_vmid(adev);
1955
1956         mutex_lock(&adev->grbm_idx_mutex);
1957         /*
1958          * making sure that the following register writes will be broadcasted
1959          * to all the shaders
1960          */
1961         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1962
1963         WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE,
1964                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
1965                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1966                    (adev->gfx.config.sc_prim_fifo_size_backend <<
1967                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
1968                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
1969                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
1970                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
1971                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
1972         mutex_unlock(&adev->grbm_idx_mutex);
1973
1974 }
1975
1976 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1977 {
1978         u32 i, j, k;
1979         u32 mask;
1980
1981         mutex_lock(&adev->grbm_idx_mutex);
1982         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1983                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1984                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1985                         for (k = 0; k < adev->usec_timeout; k++) {
1986                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1987                                         break;
1988                                 udelay(1);
1989                         }
1990                         if (k == adev->usec_timeout) {
1991                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
1992                                                       0xffffffff, 0xffffffff);
1993                                 mutex_unlock(&adev->grbm_idx_mutex);
1994                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1995                                          i, j);
1996                                 return;
1997                         }
1998                 }
1999         }
2000         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2001         mutex_unlock(&adev->grbm_idx_mutex);
2002
2003         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2004                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2005                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2006                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2007         for (k = 0; k < adev->usec_timeout; k++) {
2008                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2009                         break;
2010                 udelay(1);
2011         }
2012 }
2013
2014 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2015                                                bool enable)
2016 {
2017         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2018
2019         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2020         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2021         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2022         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2023
2024         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2025 }
2026
2027 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2028 {
2029         /* csib */
2030         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2031                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2032         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2033                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2034         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2035                         adev->gfx.rlc.clear_state_size);
2036 }
2037
2038 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2039                                 int indirect_offset,
2040                                 int list_size,
2041                                 int *unique_indirect_regs,
2042                                 int unique_indirect_reg_count,
2043                                 int *indirect_start_offsets,
2044                                 int *indirect_start_offsets_count,
2045                                 int max_start_offsets_count)
2046 {
2047         int idx;
2048
2049         for (; indirect_offset < list_size; indirect_offset++) {
2050                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2051                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2052                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2053
2054                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2055                         indirect_offset += 2;
2056
2057                         /* look for the matching indice */
2058                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2059                                 if (unique_indirect_regs[idx] ==
2060                                         register_list_format[indirect_offset] ||
2061                                         !unique_indirect_regs[idx])
2062                                         break;
2063                         }
2064
2065                         BUG_ON(idx >= unique_indirect_reg_count);
2066
2067                         if (!unique_indirect_regs[idx])
2068                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2069
2070                         indirect_offset++;
2071                 }
2072         }
2073 }
2074
2075 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2076 {
2077         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2078         int unique_indirect_reg_count = 0;
2079
2080         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2081         int indirect_start_offsets_count = 0;
2082
2083         int list_size = 0;
2084         int i = 0, j = 0;
2085         u32 tmp = 0;
2086
2087         u32 *register_list_format =
2088                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2089         if (!register_list_format)
2090                 return -ENOMEM;
2091         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
2092                 adev->gfx.rlc.reg_list_format_size_bytes);
2093
2094         /* setup unique_indirect_regs array and indirect_start_offsets array */
2095         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2096         gfx_v9_1_parse_ind_reg_list(register_list_format,
2097                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2098                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2099                                     unique_indirect_regs,
2100                                     unique_indirect_reg_count,
2101                                     indirect_start_offsets,
2102                                     &indirect_start_offsets_count,
2103                                     ARRAY_SIZE(indirect_start_offsets));
2104
2105         /* enable auto inc in case it is disabled */
2106         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2107         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2108         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2109
2110         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2111         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2112                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2113         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2114                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2115                         adev->gfx.rlc.register_restore[i]);
2116
2117         /* load indirect register */
2118         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2119                 adev->gfx.rlc.reg_list_format_start);
2120
2121         /* direct register portion */
2122         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2123                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2124                         register_list_format[i]);
2125
2126         /* indirect register portion */
2127         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2128                 if (register_list_format[i] == 0xFFFFFFFF) {
2129                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2130                         continue;
2131                 }
2132
2133                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2134                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2135
2136                 for (j = 0; j < unique_indirect_reg_count; j++) {
2137                         if (register_list_format[i] == unique_indirect_regs[j]) {
2138                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2139                                 break;
2140                         }
2141                 }
2142
2143                 BUG_ON(j >= unique_indirect_reg_count);
2144
2145                 i++;
2146         }
2147
2148         /* set save/restore list size */
2149         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2150         list_size = list_size >> 1;
2151         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2152                 adev->gfx.rlc.reg_restore_list_size);
2153         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2154
2155         /* write the starting offsets to RLC scratch ram */
2156         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2157                 adev->gfx.rlc.starting_offsets_start);
2158         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2159                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2160                        indirect_start_offsets[i]);
2161
2162         /* load unique indirect regs*/
2163         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2164                 if (unique_indirect_regs[i] != 0) {
2165                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2166                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2167                                unique_indirect_regs[i] & 0x3FFFF);
2168
2169                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2170                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2171                                unique_indirect_regs[i] >> 20);
2172                 }
2173         }
2174
2175         kfree(register_list_format);
2176         return 0;
2177 }
2178
2179 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2180 {
2181         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2182 }
2183
2184 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2185                                              bool enable)
2186 {
2187         uint32_t data = 0;
2188         uint32_t default_data = 0;
2189
2190         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2191         if (enable == true) {
2192                 /* enable GFXIP control over CGPG */
2193                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2194                 if(default_data != data)
2195                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2196
2197                 /* update status */
2198                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2199                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2200                 if(default_data != data)
2201                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2202         } else {
2203                 /* restore GFXIP control over GCPG */
2204                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2205                 if(default_data != data)
2206                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2207         }
2208 }
2209
2210 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2211 {
2212         uint32_t data = 0;
2213
2214         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2215                               AMD_PG_SUPPORT_GFX_SMG |
2216                               AMD_PG_SUPPORT_GFX_DMG)) {
2217                 /* init IDLE_POLL_COUNT = 60 */
2218                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2219                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2220                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2221                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2222
2223                 /* init RLC PG Delay */
2224                 data = 0;
2225                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2226                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2227                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2228                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2229                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2230
2231                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2232                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2233                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2234                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2235
2236                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2237                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2238                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2239                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2240
2241                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2242                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2243
2244                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2245                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2246                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2247
2248                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2249         }
2250 }
2251
2252 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2253                                                 bool enable)
2254 {
2255         uint32_t data = 0;
2256         uint32_t default_data = 0;
2257
2258         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2259         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2260                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2261                              enable ? 1 : 0);
2262         if (default_data != data)
2263                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2264 }
2265
2266 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2267                                                 bool enable)
2268 {
2269         uint32_t data = 0;
2270         uint32_t default_data = 0;
2271
2272         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2273         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2274                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2275                              enable ? 1 : 0);
2276         if(default_data != data)
2277                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2278 }
2279
2280 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2281                                         bool enable)
2282 {
2283         uint32_t data = 0;
2284         uint32_t default_data = 0;
2285
2286         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2287         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2288                              CP_PG_DISABLE,
2289                              enable ? 0 : 1);
2290         if(default_data != data)
2291                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2292 }
2293
2294 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2295                                                 bool enable)
2296 {
2297         uint32_t data, default_data;
2298
2299         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2300         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2301                              GFX_POWER_GATING_ENABLE,
2302                              enable ? 1 : 0);
2303         if(default_data != data)
2304                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2305 }
2306
2307 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2308                                                 bool enable)
2309 {
2310         uint32_t data, default_data;
2311
2312         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2313         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2314                              GFX_PIPELINE_PG_ENABLE,
2315                              enable ? 1 : 0);
2316         if(default_data != data)
2317                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2318
2319         if (!enable)
2320                 /* read any GFX register to wake up GFX */
2321                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2322 }
2323
2324 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2325                                                        bool enable)
2326 {
2327         uint32_t data, default_data;
2328
2329         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2330         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2331                              STATIC_PER_CU_PG_ENABLE,
2332                              enable ? 1 : 0);
2333         if(default_data != data)
2334                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2335 }
2336
2337 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2338                                                 bool enable)
2339 {
2340         uint32_t data, default_data;
2341
2342         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2343         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2344                              DYN_PER_CU_PG_ENABLE,
2345                              enable ? 1 : 0);
2346         if(default_data != data)
2347                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2348 }
2349
2350 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2351 {
2352         gfx_v9_0_init_csb(adev);
2353
2354         /*
2355          * Rlc save restore list is workable since v2_1.
2356          * And it's needed by gfxoff feature.
2357          */
2358         if (adev->gfx.rlc.is_rlc_v2_1) {
2359                 gfx_v9_1_init_rlc_save_restore_list(adev);
2360                 gfx_v9_0_enable_save_restore_machine(adev);
2361         }
2362
2363         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2364                               AMD_PG_SUPPORT_GFX_SMG |
2365                               AMD_PG_SUPPORT_GFX_DMG |
2366                               AMD_PG_SUPPORT_CP |
2367                               AMD_PG_SUPPORT_GDS |
2368                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2369                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2370                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2371                 gfx_v9_0_init_gfx_power_gating(adev);
2372         }
2373 }
2374
2375 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2376 {
2377         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2378         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2379         gfx_v9_0_wait_for_rlc_serdes(adev);
2380 }
2381
2382 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2383 {
2384         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2385         udelay(50);
2386         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2387         udelay(50);
2388 }
2389
2390 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2391 {
2392 #ifdef AMDGPU_RLC_DEBUG_RETRY
2393         u32 rlc_ucode_ver;
2394 #endif
2395
2396         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2397         udelay(50);
2398
2399         /* carrizo do enable cp interrupt after cp inited */
2400         if (!(adev->flags & AMD_IS_APU)) {
2401                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2402                 udelay(50);
2403         }
2404
2405 #ifdef AMDGPU_RLC_DEBUG_RETRY
2406         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2407         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2408         if(rlc_ucode_ver == 0x108) {
2409                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2410                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2411                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2412                  * default is 0x9C4 to create a 100us interval */
2413                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2414                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2415                  * to disable the page fault retry interrupts, default is
2416                  * 0x100 (256) */
2417                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2418         }
2419 #endif
2420 }
2421
2422 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2423 {
2424         const struct rlc_firmware_header_v2_0 *hdr;
2425         const __le32 *fw_data;
2426         unsigned i, fw_size;
2427
2428         if (!adev->gfx.rlc_fw)
2429                 return -EINVAL;
2430
2431         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2432         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2433
2434         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2435                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2436         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2437
2438         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2439                         RLCG_UCODE_LOADING_START_ADDRESS);
2440         for (i = 0; i < fw_size; i++)
2441                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2442         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2443
2444         return 0;
2445 }
2446
2447 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2448 {
2449         int r;
2450
2451         if (amdgpu_sriov_vf(adev)) {
2452                 gfx_v9_0_init_csb(adev);
2453                 return 0;
2454         }
2455
2456         adev->gfx.rlc.funcs->stop(adev);
2457
2458         /* disable CG */
2459         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2460
2461         gfx_v9_0_init_pg(adev);
2462
2463         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2464                 /* legacy rlc firmware loading */
2465                 r = gfx_v9_0_rlc_load_microcode(adev);
2466                 if (r)
2467                         return r;
2468         }
2469
2470         switch (adev->asic_type) {
2471         case CHIP_RAVEN:
2472                 if (amdgpu_lbpw == 0)
2473                         gfx_v9_0_enable_lbpw(adev, false);
2474                 else
2475                         gfx_v9_0_enable_lbpw(adev, true);
2476                 break;
2477         case CHIP_VEGA20:
2478                 if (amdgpu_lbpw > 0)
2479                         gfx_v9_0_enable_lbpw(adev, true);
2480                 else
2481                         gfx_v9_0_enable_lbpw(adev, false);
2482                 break;
2483         default:
2484                 break;
2485         }
2486
2487         adev->gfx.rlc.funcs->start(adev);
2488
2489         return 0;
2490 }
2491
2492 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2493 {
2494         int i;
2495         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2496
2497         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2498         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2499         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2500         if (!enable) {
2501                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2502                         adev->gfx.gfx_ring[i].sched.ready = false;
2503         }
2504         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2505         udelay(50);
2506 }
2507
2508 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2509 {
2510         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2511         const struct gfx_firmware_header_v1_0 *ce_hdr;
2512         const struct gfx_firmware_header_v1_0 *me_hdr;
2513         const __le32 *fw_data;
2514         unsigned i, fw_size;
2515
2516         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2517                 return -EINVAL;
2518
2519         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2520                 adev->gfx.pfp_fw->data;
2521         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2522                 adev->gfx.ce_fw->data;
2523         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2524                 adev->gfx.me_fw->data;
2525
2526         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2527         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2528         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2529
2530         gfx_v9_0_cp_gfx_enable(adev, false);
2531
2532         /* PFP */
2533         fw_data = (const __le32 *)
2534                 (adev->gfx.pfp_fw->data +
2535                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2536         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2537         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2538         for (i = 0; i < fw_size; i++)
2539                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2540         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2541
2542         /* CE */
2543         fw_data = (const __le32 *)
2544                 (adev->gfx.ce_fw->data +
2545                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2546         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2547         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2548         for (i = 0; i < fw_size; i++)
2549                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2550         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2551
2552         /* ME */
2553         fw_data = (const __le32 *)
2554                 (adev->gfx.me_fw->data +
2555                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2556         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2557         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2558         for (i = 0; i < fw_size; i++)
2559                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2560         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2561
2562         return 0;
2563 }
2564
2565 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2566 {
2567         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2568         const struct cs_section_def *sect = NULL;
2569         const struct cs_extent_def *ext = NULL;
2570         int r, i, tmp;
2571
2572         /* init the CP */
2573         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2574         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2575
2576         gfx_v9_0_cp_gfx_enable(adev, true);
2577
2578         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2579         if (r) {
2580                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2581                 return r;
2582         }
2583
2584         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2585         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2586
2587         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2588         amdgpu_ring_write(ring, 0x80000000);
2589         amdgpu_ring_write(ring, 0x80000000);
2590
2591         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2592                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2593                         if (sect->id == SECT_CONTEXT) {
2594                                 amdgpu_ring_write(ring,
2595                                        PACKET3(PACKET3_SET_CONTEXT_REG,
2596                                                ext->reg_count));
2597                                 amdgpu_ring_write(ring,
2598                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2599                                 for (i = 0; i < ext->reg_count; i++)
2600                                         amdgpu_ring_write(ring, ext->extent[i]);
2601                         }
2602                 }
2603         }
2604
2605         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2606         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2607
2608         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2609         amdgpu_ring_write(ring, 0);
2610
2611         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2612         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2613         amdgpu_ring_write(ring, 0x8000);
2614         amdgpu_ring_write(ring, 0x8000);
2615
2616         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2617         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2618                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2619         amdgpu_ring_write(ring, tmp);
2620         amdgpu_ring_write(ring, 0);
2621
2622         amdgpu_ring_commit(ring);
2623
2624         return 0;
2625 }
2626
2627 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2628 {
2629         struct amdgpu_ring *ring;
2630         u32 tmp;
2631         u32 rb_bufsz;
2632         u64 rb_addr, rptr_addr, wptr_gpu_addr;
2633
2634         /* Set the write pointer delay */
2635         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2636
2637         /* set the RB to use vmid 0 */
2638         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2639
2640         /* Set ring buffer size */
2641         ring = &adev->gfx.gfx_ring[0];
2642         rb_bufsz = order_base_2(ring->ring_size / 8);
2643         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2644         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2645 #ifdef __BIG_ENDIAN
2646         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2647 #endif
2648         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2649
2650         /* Initialize the ring buffer's write pointers */
2651         ring->wptr = 0;
2652         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2653         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2654
2655         /* set the wb address wether it's enabled or not */
2656         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2657         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2658         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2659
2660         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2661         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2662         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2663
2664         mdelay(1);
2665         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2666
2667         rb_addr = ring->gpu_addr >> 8;
2668         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2669         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2670
2671         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2672         if (ring->use_doorbell) {
2673                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2674                                     DOORBELL_OFFSET, ring->doorbell_index);
2675                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2676                                     DOORBELL_EN, 1);
2677         } else {
2678                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2679         }
2680         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2681
2682         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2683                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
2684         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2685
2686         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2687                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2688
2689
2690         /* start the ring */
2691         gfx_v9_0_cp_gfx_start(adev);
2692         ring->sched.ready = true;
2693
2694         return 0;
2695 }
2696
2697 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2698 {
2699         int i;
2700
2701         if (enable) {
2702                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2703         } else {
2704                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2705                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2706                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2707                         adev->gfx.compute_ring[i].sched.ready = false;
2708                 adev->gfx.kiq.ring.sched.ready = false;
2709         }
2710         udelay(50);
2711 }
2712
2713 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2714 {
2715         const struct gfx_firmware_header_v1_0 *mec_hdr;
2716         const __le32 *fw_data;
2717         unsigned i;
2718         u32 tmp;
2719
2720         if (!adev->gfx.mec_fw)
2721                 return -EINVAL;
2722
2723         gfx_v9_0_cp_compute_enable(adev, false);
2724
2725         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2726         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2727
2728         fw_data = (const __le32 *)
2729                 (adev->gfx.mec_fw->data +
2730                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2731         tmp = 0;
2732         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2733         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2734         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2735
2736         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2737                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2738         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2739                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2740
2741         /* MEC1 */
2742         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2743                          mec_hdr->jt_offset);
2744         for (i = 0; i < mec_hdr->jt_size; i++)
2745                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2746                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2747
2748         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2749                         adev->gfx.mec_fw_version);
2750         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2751
2752         return 0;
2753 }
2754
2755 /* KIQ functions */
2756 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2757 {
2758         uint32_t tmp;
2759         struct amdgpu_device *adev = ring->adev;
2760
2761         /* tell RLC which is KIQ queue */
2762         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2763         tmp &= 0xffffff00;
2764         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2765         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2766         tmp |= 0x80;
2767         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2768 }
2769
2770 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2771 {
2772         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2773         uint64_t queue_mask = 0;
2774         int r, i;
2775
2776         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2777                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2778                         continue;
2779
2780                 /* This situation may be hit in the future if a new HW
2781                  * generation exposes more than 64 queues. If so, the
2782                  * definition of queue_mask needs updating */
2783                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2784                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2785                         break;
2786                 }
2787
2788                 queue_mask |= (1ull << i);
2789         }
2790
2791         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2792         if (r) {
2793                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2794                 return r;
2795         }
2796
2797         /* set resources */
2798         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2799         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2800                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
2801         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
2802         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
2803         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
2804         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
2805         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
2806         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
2807         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2808                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2809                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2810                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2811
2812                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2813                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2814                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2815                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2816                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2817                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2818                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2819                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2820                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2821                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2822                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2823                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2824                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2825                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2826                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2827                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2828                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2829         }
2830
2831         r = amdgpu_ring_test_helper(kiq_ring);
2832         if (r)
2833                 DRM_ERROR("KCQ enable failed\n");
2834
2835         return r;
2836 }
2837
2838 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2839 {
2840         struct amdgpu_device *adev = ring->adev;
2841         struct v9_mqd *mqd = ring->mqd_ptr;
2842         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2843         uint32_t tmp;
2844
2845         mqd->header = 0xC0310800;
2846         mqd->compute_pipelinestat_enable = 0x00000001;
2847         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2848         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2849         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2850         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2851         mqd->compute_misc_reserved = 0x00000003;
2852
2853         mqd->dynamic_cu_mask_addr_lo =
2854                 lower_32_bits(ring->mqd_gpu_addr
2855                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2856         mqd->dynamic_cu_mask_addr_hi =
2857                 upper_32_bits(ring->mqd_gpu_addr
2858                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2859
2860         eop_base_addr = ring->eop_gpu_addr >> 8;
2861         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2862         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2863
2864         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2865         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2866         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2867                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2868
2869         mqd->cp_hqd_eop_control = tmp;
2870
2871         /* enable doorbell? */
2872         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2873
2874         if (ring->use_doorbell) {
2875                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2876                                     DOORBELL_OFFSET, ring->doorbell_index);
2877                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2878                                     DOORBELL_EN, 1);
2879                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2880                                     DOORBELL_SOURCE, 0);
2881                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2882                                     DOORBELL_HIT, 0);
2883         } else {
2884                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2885                                          DOORBELL_EN, 0);
2886         }
2887
2888         mqd->cp_hqd_pq_doorbell_control = tmp;
2889
2890         /* disable the queue if it's active */
2891         ring->wptr = 0;
2892         mqd->cp_hqd_dequeue_request = 0;
2893         mqd->cp_hqd_pq_rptr = 0;
2894         mqd->cp_hqd_pq_wptr_lo = 0;
2895         mqd->cp_hqd_pq_wptr_hi = 0;
2896
2897         /* set the pointer to the MQD */
2898         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2899         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2900
2901         /* set MQD vmid to 0 */
2902         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2903         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2904         mqd->cp_mqd_control = tmp;
2905
2906         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2907         hqd_gpu_addr = ring->gpu_addr >> 8;
2908         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2909         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2910
2911         /* set up the HQD, this is similar to CP_RB0_CNTL */
2912         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2913         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2914                             (order_base_2(ring->ring_size / 4) - 1));
2915         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2916                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2917 #ifdef __BIG_ENDIAN
2918         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2919 #endif
2920         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2921         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2922         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2923         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2924         mqd->cp_hqd_pq_control = tmp;
2925
2926         /* set the wb address whether it's enabled or not */
2927         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2928         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2929         mqd->cp_hqd_pq_rptr_report_addr_hi =
2930                 upper_32_bits(wb_gpu_addr) & 0xffff;
2931
2932         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2933         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2934         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2935         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2936
2937         tmp = 0;
2938         /* enable the doorbell if requested */
2939         if (ring->use_doorbell) {
2940                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2941                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2942                                 DOORBELL_OFFSET, ring->doorbell_index);
2943
2944                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2945                                          DOORBELL_EN, 1);
2946                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2947                                          DOORBELL_SOURCE, 0);
2948                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2949                                          DOORBELL_HIT, 0);
2950         }
2951
2952         mqd->cp_hqd_pq_doorbell_control = tmp;
2953
2954         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2955         ring->wptr = 0;
2956         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2957
2958         /* set the vmid for the queue */
2959         mqd->cp_hqd_vmid = 0;
2960
2961         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2962         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2963         mqd->cp_hqd_persistent_state = tmp;
2964
2965         /* set MIN_IB_AVAIL_SIZE */
2966         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2967         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2968         mqd->cp_hqd_ib_control = tmp;
2969
2970         /* activate the queue */
2971         mqd->cp_hqd_active = 1;
2972
2973         return 0;
2974 }
2975
2976 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2977 {
2978         struct amdgpu_device *adev = ring->adev;
2979         struct v9_mqd *mqd = ring->mqd_ptr;
2980         int j;
2981
2982         /* disable wptr polling */
2983         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2984
2985         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2986                mqd->cp_hqd_eop_base_addr_lo);
2987         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2988                mqd->cp_hqd_eop_base_addr_hi);
2989
2990         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2991         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2992                mqd->cp_hqd_eop_control);
2993
2994         /* enable doorbell? */
2995         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
2996                mqd->cp_hqd_pq_doorbell_control);
2997
2998         /* disable the queue if it's active */
2999         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3000                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3001                 for (j = 0; j < adev->usec_timeout; j++) {
3002                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3003                                 break;
3004                         udelay(1);
3005                 }
3006                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3007                        mqd->cp_hqd_dequeue_request);
3008                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3009                        mqd->cp_hqd_pq_rptr);
3010                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3011                        mqd->cp_hqd_pq_wptr_lo);
3012                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3013                        mqd->cp_hqd_pq_wptr_hi);
3014         }
3015
3016         /* set the pointer to the MQD */
3017         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3018                mqd->cp_mqd_base_addr_lo);
3019         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3020                mqd->cp_mqd_base_addr_hi);
3021
3022         /* set MQD vmid to 0 */
3023         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3024                mqd->cp_mqd_control);
3025
3026         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3027         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3028                mqd->cp_hqd_pq_base_lo);
3029         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3030                mqd->cp_hqd_pq_base_hi);
3031
3032         /* set up the HQD, this is similar to CP_RB0_CNTL */
3033         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3034                mqd->cp_hqd_pq_control);
3035
3036         /* set the wb address whether it's enabled or not */
3037         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3038                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3039         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3040                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3041
3042         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3043         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3044                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3045         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3046                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3047
3048         /* enable the doorbell if requested */
3049         if (ring->use_doorbell) {
3050                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3051                                         (adev->doorbell_index.kiq * 2) << 2);
3052                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3053                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3054         }
3055
3056         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3057                mqd->cp_hqd_pq_doorbell_control);
3058
3059         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3060         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3061                mqd->cp_hqd_pq_wptr_lo);
3062         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3063                mqd->cp_hqd_pq_wptr_hi);
3064
3065         /* set the vmid for the queue */
3066         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3067
3068         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3069                mqd->cp_hqd_persistent_state);
3070
3071         /* activate the queue */
3072         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3073                mqd->cp_hqd_active);
3074
3075         if (ring->use_doorbell)
3076                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3077
3078         return 0;
3079 }
3080
3081 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3082 {
3083         struct amdgpu_device *adev = ring->adev;
3084         int j;
3085
3086         /* disable the queue if it's active */
3087         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3088
3089                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3090
3091                 for (j = 0; j < adev->usec_timeout; j++) {
3092                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3093                                 break;
3094                         udelay(1);
3095                 }
3096
3097                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3098                         DRM_DEBUG("KIQ dequeue request failed.\n");
3099
3100                         /* Manual disable if dequeue request times out */
3101                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3102                 }
3103
3104                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3105                       0);
3106         }
3107
3108         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3109         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3110         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3111         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3112         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3113         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3114         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3115         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3116
3117         return 0;
3118 }
3119
3120 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3121 {
3122         struct amdgpu_device *adev = ring->adev;
3123         struct v9_mqd *mqd = ring->mqd_ptr;
3124         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3125
3126         gfx_v9_0_kiq_setting(ring);
3127
3128         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3129                 /* reset MQD to a clean status */
3130                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3131                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3132
3133                 /* reset ring buffer */
3134                 ring->wptr = 0;
3135                 amdgpu_ring_clear_ring(ring);
3136
3137                 mutex_lock(&adev->srbm_mutex);
3138                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3139                 gfx_v9_0_kiq_init_register(ring);
3140                 soc15_grbm_select(adev, 0, 0, 0, 0);
3141                 mutex_unlock(&adev->srbm_mutex);
3142         } else {
3143                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3144                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3145                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3146                 mutex_lock(&adev->srbm_mutex);
3147                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3148                 gfx_v9_0_mqd_init(ring);
3149                 gfx_v9_0_kiq_init_register(ring);
3150                 soc15_grbm_select(adev, 0, 0, 0, 0);
3151                 mutex_unlock(&adev->srbm_mutex);
3152
3153                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3154                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3155         }
3156
3157         return 0;
3158 }
3159
3160 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3161 {
3162         struct amdgpu_device *adev = ring->adev;
3163         struct v9_mqd *mqd = ring->mqd_ptr;
3164         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3165
3166         if (!adev->in_gpu_reset && !adev->in_suspend) {
3167                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3168                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3169                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3170                 mutex_lock(&adev->srbm_mutex);
3171                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3172                 gfx_v9_0_mqd_init(ring);
3173                 soc15_grbm_select(adev, 0, 0, 0, 0);
3174                 mutex_unlock(&adev->srbm_mutex);
3175
3176                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3177                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3178         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3179                 /* reset MQD to a clean status */
3180                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3181                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3182
3183                 /* reset ring buffer */
3184                 ring->wptr = 0;
3185                 amdgpu_ring_clear_ring(ring);
3186         } else {
3187                 amdgpu_ring_clear_ring(ring);
3188         }
3189
3190         return 0;
3191 }
3192
3193 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3194 {
3195         struct amdgpu_ring *ring;
3196         int r;
3197
3198         ring = &adev->gfx.kiq.ring;
3199
3200         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3201         if (unlikely(r != 0))
3202                 return r;
3203
3204         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3205         if (unlikely(r != 0))
3206                 return r;
3207
3208         gfx_v9_0_kiq_init_queue(ring);
3209         amdgpu_bo_kunmap(ring->mqd_obj);
3210         ring->mqd_ptr = NULL;
3211         amdgpu_bo_unreserve(ring->mqd_obj);
3212         ring->sched.ready = true;
3213         return 0;
3214 }
3215
3216 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3217 {
3218         struct amdgpu_ring *ring = NULL;
3219         int r = 0, i;
3220
3221         gfx_v9_0_cp_compute_enable(adev, true);
3222
3223         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3224                 ring = &adev->gfx.compute_ring[i];
3225
3226                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3227                 if (unlikely(r != 0))
3228                         goto done;
3229                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3230                 if (!r) {
3231                         r = gfx_v9_0_kcq_init_queue(ring);
3232                         amdgpu_bo_kunmap(ring->mqd_obj);
3233                         ring->mqd_ptr = NULL;
3234                 }
3235                 amdgpu_bo_unreserve(ring->mqd_obj);
3236                 if (r)
3237                         goto done;
3238         }
3239
3240         r = gfx_v9_0_kiq_kcq_enable(adev);
3241 done:
3242         return r;
3243 }
3244
3245 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3246 {
3247         int r, i;
3248         struct amdgpu_ring *ring;
3249
3250         if (!(adev->flags & AMD_IS_APU))
3251                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3252
3253         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3254                 /* legacy firmware loading */
3255                 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3256                 if (r)
3257                         return r;
3258
3259                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3260                 if (r)
3261                         return r;
3262         }
3263
3264         r = gfx_v9_0_kiq_resume(adev);
3265         if (r)
3266                 return r;
3267
3268         r = gfx_v9_0_cp_gfx_resume(adev);
3269         if (r)
3270                 return r;
3271
3272         r = gfx_v9_0_kcq_resume(adev);
3273         if (r)
3274                 return r;
3275
3276         ring = &adev->gfx.gfx_ring[0];
3277         r = amdgpu_ring_test_helper(ring);
3278         if (r)
3279                 return r;
3280
3281         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3282                 ring = &adev->gfx.compute_ring[i];
3283                 amdgpu_ring_test_helper(ring);
3284         }
3285
3286         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3287
3288         return 0;
3289 }
3290
3291 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3292 {
3293         gfx_v9_0_cp_gfx_enable(adev, enable);
3294         gfx_v9_0_cp_compute_enable(adev, enable);
3295 }
3296
3297 static int gfx_v9_0_hw_init(void *handle)
3298 {
3299         int r;
3300         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3301
3302         gfx_v9_0_init_golden_registers(adev);
3303
3304         gfx_v9_0_constants_init(adev);
3305
3306         r = gfx_v9_0_csb_vram_pin(adev);
3307         if (r)
3308                 return r;
3309
3310         r = adev->gfx.rlc.funcs->resume(adev);
3311         if (r)
3312                 return r;
3313
3314         r = gfx_v9_0_cp_resume(adev);
3315         if (r)
3316                 return r;
3317
3318         r = gfx_v9_0_ngg_en(adev);
3319         if (r)
3320                 return r;
3321
3322         return r;
3323 }
3324
3325 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3326 {
3327         int r, i;
3328         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3329
3330         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3331         if (r)
3332                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3333
3334         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3335                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3336
3337                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3338                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3339                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3340                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3341                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3342                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3343                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3344                 amdgpu_ring_write(kiq_ring, 0);
3345                 amdgpu_ring_write(kiq_ring, 0);
3346                 amdgpu_ring_write(kiq_ring, 0);
3347         }
3348         r = amdgpu_ring_test_helper(kiq_ring);
3349         if (r)
3350                 DRM_ERROR("KCQ disable failed\n");
3351
3352         return r;
3353 }
3354
3355 static int gfx_v9_0_hw_fini(void *handle)
3356 {
3357         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3358
3359         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3360         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3361         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3362
3363         /* disable KCQ to avoid CPC touch memory not valid anymore */
3364         gfx_v9_0_kcq_disable(adev);
3365
3366         if (amdgpu_sriov_vf(adev)) {
3367                 gfx_v9_0_cp_gfx_enable(adev, false);
3368                 /* must disable polling for SRIOV when hw finished, otherwise
3369                  * CPC engine may still keep fetching WB address which is already
3370                  * invalid after sw finished and trigger DMAR reading error in
3371                  * hypervisor side.
3372                  */
3373                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3374                 return 0;
3375         }
3376
3377         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3378          * otherwise KIQ is hanging when binding back
3379          */
3380         if (!adev->in_gpu_reset && !adev->in_suspend) {
3381                 mutex_lock(&adev->srbm_mutex);
3382                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3383                                 adev->gfx.kiq.ring.pipe,
3384                                 adev->gfx.kiq.ring.queue, 0);
3385                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3386                 soc15_grbm_select(adev, 0, 0, 0, 0);
3387                 mutex_unlock(&adev->srbm_mutex);
3388         }
3389
3390         gfx_v9_0_cp_enable(adev, false);
3391         adev->gfx.rlc.funcs->stop(adev);
3392
3393         gfx_v9_0_csb_vram_unpin(adev);
3394
3395         return 0;
3396 }
3397
3398 static int gfx_v9_0_suspend(void *handle)
3399 {
3400         return gfx_v9_0_hw_fini(handle);
3401 }
3402
3403 static int gfx_v9_0_resume(void *handle)
3404 {
3405         return gfx_v9_0_hw_init(handle);
3406 }
3407
3408 static bool gfx_v9_0_is_idle(void *handle)
3409 {
3410         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3411
3412         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3413                                 GRBM_STATUS, GUI_ACTIVE))
3414                 return false;
3415         else
3416                 return true;
3417 }
3418
3419 static int gfx_v9_0_wait_for_idle(void *handle)
3420 {
3421         unsigned i;
3422         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3423
3424         for (i = 0; i < adev->usec_timeout; i++) {
3425                 if (gfx_v9_0_is_idle(handle))
3426                         return 0;
3427                 udelay(1);
3428         }
3429         return -ETIMEDOUT;
3430 }
3431
3432 static int gfx_v9_0_soft_reset(void *handle)
3433 {
3434         u32 grbm_soft_reset = 0;
3435         u32 tmp;
3436         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3437
3438         /* GRBM_STATUS */
3439         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3440         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3441                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3442                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3443                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3444                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3445                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3446                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3447                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3448                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3449                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3450         }
3451
3452         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3453                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3454                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3455         }
3456
3457         /* GRBM_STATUS2 */
3458         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3459         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3460                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3461                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3462
3463
3464         if (grbm_soft_reset) {
3465                 /* stop the rlc */
3466                 adev->gfx.rlc.funcs->stop(adev);
3467
3468                 /* Disable GFX parsing/prefetching */
3469                 gfx_v9_0_cp_gfx_enable(adev, false);
3470
3471                 /* Disable MEC parsing/prefetching */
3472                 gfx_v9_0_cp_compute_enable(adev, false);
3473
3474                 if (grbm_soft_reset) {
3475                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3476                         tmp |= grbm_soft_reset;
3477                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3478                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3479                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3480
3481                         udelay(50);
3482
3483                         tmp &= ~grbm_soft_reset;
3484                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3485                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3486                 }
3487
3488                 /* Wait a little for things to settle down */
3489                 udelay(50);
3490         }
3491         return 0;
3492 }
3493
3494 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3495 {
3496         uint64_t clock;
3497
3498         mutex_lock(&adev->gfx.gpu_clock_mutex);
3499         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3500         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3501                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3502         mutex_unlock(&adev->gfx.gpu_clock_mutex);
3503         return clock;
3504 }
3505
3506 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3507                                           uint32_t vmid,
3508                                           uint32_t gds_base, uint32_t gds_size,
3509                                           uint32_t gws_base, uint32_t gws_size,
3510                                           uint32_t oa_base, uint32_t oa_size)
3511 {
3512         struct amdgpu_device *adev = ring->adev;
3513
3514         /* GDS Base */
3515         gfx_v9_0_write_data_to_reg(ring, 0, false,
3516                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3517                                    gds_base);
3518
3519         /* GDS Size */
3520         gfx_v9_0_write_data_to_reg(ring, 0, false,
3521                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3522                                    gds_size);
3523
3524         /* GWS */
3525         gfx_v9_0_write_data_to_reg(ring, 0, false,
3526                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3527                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3528
3529         /* OA */
3530         gfx_v9_0_write_data_to_reg(ring, 0, false,
3531                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3532                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
3533 }
3534
3535 static const u32 vgpr_init_compute_shader[] =
3536 {
3537         0xb07c0000, 0xbe8000ff,
3538         0x000000f8, 0xbf110800,
3539         0x7e000280, 0x7e020280,
3540         0x7e040280, 0x7e060280,
3541         0x7e080280, 0x7e0a0280,
3542         0x7e0c0280, 0x7e0e0280,
3543         0x80808800, 0xbe803200,
3544         0xbf84fff5, 0xbf9c0000,
3545         0xd28c0001, 0x0001007f,
3546         0xd28d0001, 0x0002027e,
3547         0x10020288, 0xb8810904,
3548         0xb7814000, 0xd1196a01,
3549         0x00000301, 0xbe800087,
3550         0xbefc00c1, 0xd89c4000,
3551         0x00020201, 0xd89cc080,
3552         0x00040401, 0x320202ff,
3553         0x00000800, 0x80808100,
3554         0xbf84fff8, 0x7e020280,
3555         0xbf810000, 0x00000000,
3556 };
3557
3558 static const u32 sgpr_init_compute_shader[] =
3559 {
3560         0xb07c0000, 0xbe8000ff,
3561         0x0000005f, 0xbee50080,
3562         0xbe812c65, 0xbe822c65,
3563         0xbe832c65, 0xbe842c65,
3564         0xbe852c65, 0xb77c0005,
3565         0x80808500, 0xbf84fff8,
3566         0xbe800080, 0xbf810000,
3567 };
3568
3569 static const struct soc15_reg_entry vgpr_init_regs[] = {
3570    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3571    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3572    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3573    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3574    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3575    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3576    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3577    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3578    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3579    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3580 };
3581
3582 static const struct soc15_reg_entry sgpr_init_regs[] = {
3583    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3584    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3585    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3586    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3587    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3588    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3589    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3590    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3591    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3592    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3593 };
3594
3595 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3596    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) },
3597    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) },
3598    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) },
3599    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) },
3600    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) },
3601    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) },
3602    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) },
3603    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) },
3604    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) },
3605    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) },
3606    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) },
3607    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) },
3608    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) },
3609    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) },
3610    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) },
3611    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) },
3612    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) },
3613    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) },
3614    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) },
3615    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) },
3616    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) },
3617    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) },
3618    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) },
3619    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) },
3620    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) },
3621    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) },
3622    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) },
3623    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) },
3624    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) },
3625    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) },
3626    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) },
3627 };
3628
3629 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3630 {
3631         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3632         struct amdgpu_ib ib;
3633         struct dma_fence *f = NULL;
3634         int r, i, j;
3635         u32 tmp;
3636         unsigned total_size, vgpr_offset, sgpr_offset;
3637         u64 gpu_addr;
3638
3639         /* only support when RAS is enabled */
3640         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3641                 return 0;
3642
3643         /* bail if the compute ring is not ready */
3644         if (!ring->sched.ready)
3645                 return 0;
3646
3647         tmp = RREG32_SOC15(GC, 0, mmGB_EDC_MODE);
3648         WREG32_SOC15(GC, 0, mmGB_EDC_MODE, 0);
3649
3650         total_size =
3651                 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3652         total_size +=
3653                 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3654         total_size = ALIGN(total_size, 256);
3655         vgpr_offset = total_size;
3656         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3657         sgpr_offset = total_size;
3658         total_size += sizeof(sgpr_init_compute_shader);
3659
3660         /* allocate an indirect buffer to put the commands in */
3661         memset(&ib, 0, sizeof(ib));
3662         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3663         if (r) {
3664                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3665                 return r;
3666         }
3667
3668         /* load the compute shaders */
3669         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3670                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3671
3672         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3673                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3674
3675         /* init the ib length to 0 */
3676         ib.length_dw = 0;
3677
3678         /* VGPR */
3679         /* write the register state for the compute dispatch */
3680         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3681                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3682                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3683                                                                 - PACKET3_SET_SH_REG_START;
3684                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3685         }
3686         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3687         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3688         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3689         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3690                                                         - PACKET3_SET_SH_REG_START;
3691         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3692         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3693
3694         /* write dispatch packet */
3695         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3696         ib.ptr[ib.length_dw++] = 128; /* x */
3697         ib.ptr[ib.length_dw++] = 1; /* y */
3698         ib.ptr[ib.length_dw++] = 1; /* z */
3699         ib.ptr[ib.length_dw++] =
3700                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3701
3702         /* write CS partial flush packet */
3703         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3704         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3705
3706         /* SGPR */
3707         /* write the register state for the compute dispatch */
3708         for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3709                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3710                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3711                                                                 - PACKET3_SET_SH_REG_START;
3712                 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3713         }
3714         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3715         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3716         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3717         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3718                                                         - PACKET3_SET_SH_REG_START;
3719         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3720         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3721
3722         /* write dispatch packet */
3723         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3724         ib.ptr[ib.length_dw++] = 128; /* x */
3725         ib.ptr[ib.length_dw++] = 1; /* y */
3726         ib.ptr[ib.length_dw++] = 1; /* z */
3727         ib.ptr[ib.length_dw++] =
3728                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3729
3730         /* write CS partial flush packet */
3731         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3732         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3733
3734         /* shedule the ib on the ring */
3735         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3736         if (r) {
3737                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3738                 goto fail;
3739         }
3740
3741         /* wait for the GPU to finish processing the IB */
3742         r = dma_fence_wait(f, false);
3743         if (r) {
3744                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3745                 goto fail;
3746         }
3747
3748         /* read back registers to clear the counters */
3749         mutex_lock(&adev->grbm_idx_mutex);
3750         for (j = 0; j < 16; j++) {
3751                 gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j);
3752                 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3753                         RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3754                 gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j);
3755                 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3756                         RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3757                 gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j);
3758                 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3759                         RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3760                 gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j);
3761                 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3762                         RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3763         }
3764         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3765         mutex_unlock(&adev->grbm_idx_mutex);
3766
3767 fail:
3768         amdgpu_ib_free(adev, &ib, NULL);
3769         dma_fence_put(f);
3770
3771         return r;
3772 }
3773
3774 static int gfx_v9_0_early_init(void *handle)
3775 {
3776         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3777
3778         adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3779         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3780         gfx_v9_0_set_ring_funcs(adev);
3781         gfx_v9_0_set_irq_funcs(adev);
3782         gfx_v9_0_set_gds_init(adev);
3783         gfx_v9_0_set_rlc_funcs(adev);
3784
3785         return 0;
3786 }
3787
3788 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3789                 struct amdgpu_iv_entry *entry);
3790
3791 static int gfx_v9_0_ecc_late_init(void *handle)
3792 {
3793         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3794         struct ras_common_if **ras_if = &adev->gfx.ras_if;
3795         struct ras_ih_if ih_info = {
3796                 .cb = gfx_v9_0_process_ras_data_cb,
3797         };
3798         struct ras_fs_if fs_info = {
3799                 .sysfs_name = "gfx_err_count",
3800                 .debugfs_name = "gfx_err_inject",
3801         };
3802         struct ras_common_if ras_block = {
3803                 .block = AMDGPU_RAS_BLOCK__GFX,
3804                 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3805                 .sub_block_index = 0,
3806                 .name = "gfx",
3807         };
3808         int r;
3809
3810         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3811                 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3812                 return 0;
3813         }
3814
3815         if (*ras_if)
3816                 goto resume;
3817
3818         *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3819         if (!*ras_if)
3820                 return -ENOMEM;
3821
3822         /* requires IBs so do in late init after IB pool is initialized */
3823         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3824         if (r)
3825                 return r;
3826
3827         **ras_if = ras_block;
3828
3829         r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3830         if (r) {
3831                 if (r == -EAGAIN) {
3832                         amdgpu_ras_request_reset_on_boot(adev,
3833                                         AMDGPU_RAS_BLOCK__GFX);
3834                         r = 0;
3835                 }
3836                 goto feature;
3837         }
3838
3839         ih_info.head = **ras_if;
3840         fs_info.head = **ras_if;
3841
3842         r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3843         if (r)
3844                 goto interrupt;
3845
3846         r = amdgpu_ras_debugfs_create(adev, &fs_info);
3847         if (r)
3848                 goto debugfs;
3849
3850         r = amdgpu_ras_sysfs_create(adev, &fs_info);
3851         if (r)
3852                 goto sysfs;
3853 resume:
3854         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3855         if (r)
3856                 goto irq;
3857
3858         return 0;
3859 irq:
3860         amdgpu_ras_sysfs_remove(adev, *ras_if);
3861 sysfs:
3862         amdgpu_ras_debugfs_remove(adev, *ras_if);
3863 debugfs:
3864         amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3865 interrupt:
3866         amdgpu_ras_feature_enable(adev, *ras_if, 0);
3867 feature:
3868         kfree(*ras_if);
3869         *ras_if = NULL;
3870         return r;
3871 }
3872
3873 static int gfx_v9_0_late_init(void *handle)
3874 {
3875         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3876         int r;
3877
3878         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3879         if (r)
3880                 return r;
3881
3882         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3883         if (r)
3884                 return r;
3885
3886         r = gfx_v9_0_ecc_late_init(handle);
3887         if (r)
3888                 return r;
3889
3890         return 0;
3891 }
3892
3893 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3894 {
3895         uint32_t rlc_setting;
3896
3897         /* if RLC is not enabled, do nothing */
3898         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3899         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3900                 return false;
3901
3902         return true;
3903 }
3904
3905 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3906 {
3907         uint32_t data;
3908         unsigned i;
3909
3910         data = RLC_SAFE_MODE__CMD_MASK;
3911         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3912         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3913
3914         /* wait for RLC_SAFE_MODE */
3915         for (i = 0; i < adev->usec_timeout; i++) {
3916                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3917                         break;
3918                 udelay(1);
3919         }
3920 }
3921
3922 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3923 {
3924         uint32_t data;
3925
3926         data = RLC_SAFE_MODE__CMD_MASK;
3927         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3928 }
3929
3930 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3931                                                 bool enable)
3932 {
3933         amdgpu_gfx_rlc_enter_safe_mode(adev);
3934
3935         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3936                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3937                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3938                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3939         } else {
3940                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3941                 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3942         }
3943
3944         amdgpu_gfx_rlc_exit_safe_mode(adev);
3945 }
3946
3947 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
3948                                                 bool enable)
3949 {
3950         /* TODO: double check if we need to perform under safe mode */
3951         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
3952
3953         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
3954                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
3955         else
3956                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
3957
3958         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
3959                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
3960         else
3961                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
3962
3963         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
3964 }
3965
3966 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
3967                                                       bool enable)
3968 {
3969         uint32_t data, def;
3970
3971         amdgpu_gfx_rlc_enter_safe_mode(adev);
3972
3973         /* It is disabled by HW by default */
3974         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
3975                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
3976                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3977
3978                 if (adev->asic_type != CHIP_VEGA12)
3979                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
3980
3981                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
3982                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
3983                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
3984
3985                 /* only for Vega10 & Raven1 */
3986                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
3987
3988                 if (def != data)
3989                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
3990
3991                 /* MGLS is a global flag to control all MGLS in GFX */
3992                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
3993                         /* 2 - RLC memory Light sleep */
3994                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
3995                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
3996                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
3997                                 if (def != data)
3998                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
3999                         }
4000                         /* 3 - CP memory Light sleep */
4001                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4002                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4003                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4004                                 if (def != data)
4005                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4006                         }
4007                 }
4008         } else {
4009                 /* 1 - MGCG_OVERRIDE */
4010                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4011
4012                 if (adev->asic_type != CHIP_VEGA12)
4013                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4014
4015                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4016                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4017                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4018                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4019
4020                 if (def != data)
4021                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4022
4023                 /* 2 - disable MGLS in RLC */
4024                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4025                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4026                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4027                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4028                 }
4029
4030                 /* 3 - disable MGLS in CP */
4031                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4032                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4033                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4034                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4035                 }
4036         }
4037
4038         amdgpu_gfx_rlc_exit_safe_mode(adev);
4039 }
4040
4041 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4042                                            bool enable)
4043 {
4044         uint32_t data, def;
4045
4046         amdgpu_gfx_rlc_enter_safe_mode(adev);
4047
4048         /* Enable 3D CGCG/CGLS */
4049         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4050                 /* write cmd to clear cgcg/cgls ov */
4051                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4052                 /* unset CGCG override */
4053                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4054                 /* update CGCG and CGLS override bits */
4055                 if (def != data)
4056                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4057
4058                 /* enable 3Dcgcg FSM(0x0000363f) */
4059                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4060
4061                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4062                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4063                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4064                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4065                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4066                 if (def != data)
4067                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4068
4069                 /* set IDLE_POLL_COUNT(0x00900100) */
4070                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4071                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4072                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4073                 if (def != data)
4074                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4075         } else {
4076                 /* Disable CGCG/CGLS */
4077                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4078                 /* disable cgcg, cgls should be disabled */
4079                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4080                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4081                 /* disable cgcg and cgls in FSM */
4082                 if (def != data)
4083                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4084         }
4085
4086         amdgpu_gfx_rlc_exit_safe_mode(adev);
4087 }
4088
4089 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4090                                                       bool enable)
4091 {
4092         uint32_t def, data;
4093
4094         amdgpu_gfx_rlc_enter_safe_mode(adev);
4095
4096         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4097                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4098                 /* unset CGCG override */
4099                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4100                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4101                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4102                 else
4103                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4104                 /* update CGCG and CGLS override bits */
4105                 if (def != data)
4106                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4107
4108                 /* enable cgcg FSM(0x0000363F) */
4109                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4110
4111                 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4112                         RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4113                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4114                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4115                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4116                 if (def != data)
4117                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4118
4119                 /* set IDLE_POLL_COUNT(0x00900100) */
4120                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4121                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4122                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4123                 if (def != data)
4124                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4125         } else {
4126                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4127                 /* reset CGCG/CGLS bits */
4128                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4129                 /* disable cgcg and cgls in FSM */
4130                 if (def != data)
4131                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4132         }
4133
4134         amdgpu_gfx_rlc_exit_safe_mode(adev);
4135 }
4136
4137 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4138                                             bool enable)
4139 {
4140         if (enable) {
4141                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4142                  * ===  MGCG + MGLS ===
4143                  */
4144                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4145                 /* ===  CGCG /CGLS for GFX 3D Only === */
4146                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4147                 /* ===  CGCG + CGLS === */
4148                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4149         } else {
4150                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4151                  * ===  CGCG + CGLS ===
4152                  */
4153                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4154                 /* ===  CGCG /CGLS for GFX 3D Only === */
4155                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4156                 /* ===  MGCG + MGLS === */
4157                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4158         }
4159         return 0;
4160 }
4161
4162 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4163         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4164         .set_safe_mode = gfx_v9_0_set_safe_mode,
4165         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4166         .init = gfx_v9_0_rlc_init,
4167         .get_csb_size = gfx_v9_0_get_csb_size,
4168         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4169         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4170         .resume = gfx_v9_0_rlc_resume,
4171         .stop = gfx_v9_0_rlc_stop,
4172         .reset = gfx_v9_0_rlc_reset,
4173         .start = gfx_v9_0_rlc_start
4174 };
4175
4176 static int gfx_v9_0_set_powergating_state(void *handle,
4177                                           enum amd_powergating_state state)
4178 {
4179         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4180         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4181
4182         switch (adev->asic_type) {
4183         case CHIP_RAVEN:
4184                 if (!enable) {
4185                         amdgpu_gfx_off_ctrl(adev, false);
4186                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4187                 }
4188                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4189                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4190                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4191                 } else {
4192                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4193                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4194                 }
4195
4196                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4197                         gfx_v9_0_enable_cp_power_gating(adev, true);
4198                 else
4199                         gfx_v9_0_enable_cp_power_gating(adev, false);
4200
4201                 /* update gfx cgpg state */
4202                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4203
4204                 /* update mgcg state */
4205                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4206
4207                 if (enable)
4208                         amdgpu_gfx_off_ctrl(adev, true);
4209                 break;
4210         case CHIP_VEGA12:
4211                 if (!enable) {
4212                         amdgpu_gfx_off_ctrl(adev, false);
4213                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4214                 } else {
4215                         amdgpu_gfx_off_ctrl(adev, true);
4216                 }
4217                 break;
4218         default:
4219                 break;
4220         }
4221
4222         return 0;
4223 }
4224
4225 static int gfx_v9_0_set_clockgating_state(void *handle,
4226                                           enum amd_clockgating_state state)
4227 {
4228         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4229
4230         if (amdgpu_sriov_vf(adev))
4231                 return 0;
4232
4233         switch (adev->asic_type) {
4234         case CHIP_VEGA10:
4235         case CHIP_VEGA12:
4236         case CHIP_VEGA20:
4237         case CHIP_RAVEN:
4238                 gfx_v9_0_update_gfx_clock_gating(adev,
4239                                                  state == AMD_CG_STATE_GATE ? true : false);
4240                 break;
4241         default:
4242                 break;
4243         }
4244         return 0;
4245 }
4246
4247 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4248 {
4249         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4250         int data;
4251
4252         if (amdgpu_sriov_vf(adev))
4253                 *flags = 0;
4254
4255         /* AMD_CG_SUPPORT_GFX_MGCG */
4256         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4257         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4258                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4259
4260         /* AMD_CG_SUPPORT_GFX_CGCG */
4261         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4262         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4263                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4264
4265         /* AMD_CG_SUPPORT_GFX_CGLS */
4266         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4267                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4268
4269         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4270         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4271         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4272                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4273
4274         /* AMD_CG_SUPPORT_GFX_CP_LS */
4275         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4276         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4277                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4278
4279         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4280         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4281         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4282                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4283
4284         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4285         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4286                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4287 }
4288
4289 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4290 {
4291         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4292 }
4293
4294 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4295 {
4296         struct amdgpu_device *adev = ring->adev;
4297         u64 wptr;
4298
4299         /* XXX check if swapping is necessary on BE */
4300         if (ring->use_doorbell) {
4301                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4302         } else {
4303                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4304                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4305         }
4306
4307         return wptr;
4308 }
4309
4310 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4311 {
4312         struct amdgpu_device *adev = ring->adev;
4313
4314         if (ring->use_doorbell) {
4315                 /* XXX check if swapping is necessary on BE */
4316                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4317                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4318         } else {
4319                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4320                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4321         }
4322 }
4323
4324 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4325 {
4326         struct amdgpu_device *adev = ring->adev;
4327         u32 ref_and_mask, reg_mem_engine;
4328         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4329
4330         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4331                 switch (ring->me) {
4332                 case 1:
4333                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4334                         break;
4335                 case 2:
4336                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4337                         break;
4338                 default:
4339                         return;
4340                 }
4341                 reg_mem_engine = 0;
4342         } else {
4343                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4344                 reg_mem_engine = 1; /* pfp */
4345         }
4346
4347         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4348                               adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4349                               adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4350                               ref_and_mask, ref_and_mask, 0x20);
4351 }
4352
4353 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4354                                         struct amdgpu_job *job,
4355                                         struct amdgpu_ib *ib,
4356                                         uint32_t flags)
4357 {
4358         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4359         u32 header, control = 0;
4360
4361         if (ib->flags & AMDGPU_IB_FLAG_CE)
4362                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4363         else
4364                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4365
4366         control |= ib->length_dw | (vmid << 24);
4367
4368         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4369                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4370
4371                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4372                         gfx_v9_0_ring_emit_de_meta(ring);
4373         }
4374
4375         amdgpu_ring_write(ring, header);
4376         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4377         amdgpu_ring_write(ring,
4378 #ifdef __BIG_ENDIAN
4379                 (2 << 0) |
4380 #endif
4381                 lower_32_bits(ib->gpu_addr));
4382         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4383         amdgpu_ring_write(ring, control);
4384 }
4385
4386 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4387                                           struct amdgpu_job *job,
4388                                           struct amdgpu_ib *ib,
4389                                           uint32_t flags)
4390 {
4391         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4392         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4393
4394         /* Currently, there is a high possibility to get wave ID mismatch
4395          * between ME and GDS, leading to a hw deadlock, because ME generates
4396          * different wave IDs than the GDS expects. This situation happens
4397          * randomly when at least 5 compute pipes use GDS ordered append.
4398          * The wave IDs generated by ME are also wrong after suspend/resume.
4399          * Those are probably bugs somewhere else in the kernel driver.
4400          *
4401          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4402          * GDS to 0 for this ring (me/pipe).
4403          */
4404         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4405                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4406                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4407                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4408         }
4409
4410         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4411         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4412         amdgpu_ring_write(ring,
4413 #ifdef __BIG_ENDIAN
4414                                 (2 << 0) |
4415 #endif
4416                                 lower_32_bits(ib->gpu_addr));
4417         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4418         amdgpu_ring_write(ring, control);
4419 }
4420
4421 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4422                                      u64 seq, unsigned flags)
4423 {
4424         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4425         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4426         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4427
4428         /* RELEASE_MEM - flush caches, send int */
4429         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4430         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4431                                                EOP_TC_NC_ACTION_EN) :
4432                                               (EOP_TCL1_ACTION_EN |
4433                                                EOP_TC_ACTION_EN |
4434                                                EOP_TC_WB_ACTION_EN |
4435                                                EOP_TC_MD_ACTION_EN)) |
4436                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4437                                  EVENT_INDEX(5)));
4438         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4439
4440         /*
4441          * the address should be Qword aligned if 64bit write, Dword
4442          * aligned if only send 32bit data low (discard data high)
4443          */
4444         if (write64bit)
4445                 BUG_ON(addr & 0x7);
4446         else
4447                 BUG_ON(addr & 0x3);
4448         amdgpu_ring_write(ring, lower_32_bits(addr));
4449         amdgpu_ring_write(ring, upper_32_bits(addr));
4450         amdgpu_ring_write(ring, lower_32_bits(seq));
4451         amdgpu_ring_write(ring, upper_32_bits(seq));
4452         amdgpu_ring_write(ring, 0);
4453 }
4454
4455 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4456 {
4457         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4458         uint32_t seq = ring->fence_drv.sync_seq;
4459         uint64_t addr = ring->fence_drv.gpu_addr;
4460
4461         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4462                               lower_32_bits(addr), upper_32_bits(addr),
4463                               seq, 0xffffffff, 4);
4464 }
4465
4466 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4467                                         unsigned vmid, uint64_t pd_addr)
4468 {
4469         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4470
4471         /* compute doesn't have PFP */
4472         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4473                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4474                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4475                 amdgpu_ring_write(ring, 0x0);
4476         }
4477 }
4478
4479 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4480 {
4481         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4482 }
4483
4484 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4485 {
4486         u64 wptr;
4487
4488         /* XXX check if swapping is necessary on BE */
4489         if (ring->use_doorbell)
4490                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4491         else
4492                 BUG();
4493         return wptr;
4494 }
4495
4496 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4497                                            bool acquire)
4498 {
4499         struct amdgpu_device *adev = ring->adev;
4500         int pipe_num, tmp, reg;
4501         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4502
4503         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4504
4505         /* first me only has 2 entries, GFX and HP3D */
4506         if (ring->me > 0)
4507                 pipe_num -= 2;
4508
4509         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4510         tmp = RREG32(reg);
4511         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4512         WREG32(reg, tmp);
4513 }
4514
4515 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4516                                             struct amdgpu_ring *ring,
4517                                             bool acquire)
4518 {
4519         int i, pipe;
4520         bool reserve;
4521         struct amdgpu_ring *iring;
4522
4523         mutex_lock(&adev->gfx.pipe_reserve_mutex);
4524         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
4525         if (acquire)
4526                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4527         else
4528                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4529
4530         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4531                 /* Clear all reservations - everyone reacquires all resources */
4532                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4533                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4534                                                        true);
4535
4536                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4537                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4538                                                        true);
4539         } else {
4540                 /* Lower all pipes without a current reservation */
4541                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4542                         iring = &adev->gfx.gfx_ring[i];
4543                         pipe = amdgpu_gfx_queue_to_bit(adev,
4544                                                        iring->me,
4545                                                        iring->pipe,
4546                                                        0);
4547                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4548                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4549                 }
4550
4551                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4552                         iring = &adev->gfx.compute_ring[i];
4553                         pipe = amdgpu_gfx_queue_to_bit(adev,
4554                                                        iring->me,
4555                                                        iring->pipe,
4556                                                        0);
4557                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4558                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4559                 }
4560         }
4561
4562         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4563 }
4564
4565 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4566                                       struct amdgpu_ring *ring,
4567                                       bool acquire)
4568 {
4569         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4570         uint32_t queue_priority = acquire ? 0xf : 0x0;
4571
4572         mutex_lock(&adev->srbm_mutex);
4573         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4574
4575         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4576         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4577
4578         soc15_grbm_select(adev, 0, 0, 0, 0);
4579         mutex_unlock(&adev->srbm_mutex);
4580 }
4581
4582 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4583                                                enum drm_sched_priority priority)
4584 {
4585         struct amdgpu_device *adev = ring->adev;
4586         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4587
4588         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4589                 return;
4590
4591         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4592         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4593 }
4594
4595 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4596 {
4597         struct amdgpu_device *adev = ring->adev;
4598
4599         /* XXX check if swapping is necessary on BE */
4600         if (ring->use_doorbell) {
4601                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4602                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4603         } else{
4604                 BUG(); /* only DOORBELL method supported on gfx9 now */
4605         }
4606 }
4607
4608 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4609                                          u64 seq, unsigned int flags)
4610 {
4611         struct amdgpu_device *adev = ring->adev;
4612
4613         /* we only allocate 32bit for each seq wb address */
4614         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4615
4616         /* write fence seq to the "addr" */
4617         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4618         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4619                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4620         amdgpu_ring_write(ring, lower_32_bits(addr));
4621         amdgpu_ring_write(ring, upper_32_bits(addr));
4622         amdgpu_ring_write(ring, lower_32_bits(seq));
4623
4624         if (flags & AMDGPU_FENCE_FLAG_INT) {
4625                 /* set register to trigger INT */
4626                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4627                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4628                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4629                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4630                 amdgpu_ring_write(ring, 0);
4631                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4632         }
4633 }
4634
4635 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4636 {
4637         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4638         amdgpu_ring_write(ring, 0);
4639 }
4640
4641 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4642 {
4643         struct v9_ce_ib_state ce_payload = {0};
4644         uint64_t csa_addr;
4645         int cnt;
4646
4647         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4648         csa_addr = amdgpu_csa_vaddr(ring->adev);
4649
4650         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4651         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4652                                  WRITE_DATA_DST_SEL(8) |
4653                                  WR_CONFIRM) |
4654                                  WRITE_DATA_CACHE_POLICY(0));
4655         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4656         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4657         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4658 }
4659
4660 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4661 {
4662         struct v9_de_ib_state de_payload = {0};
4663         uint64_t csa_addr, gds_addr;
4664         int cnt;
4665
4666         csa_addr = amdgpu_csa_vaddr(ring->adev);
4667         gds_addr = csa_addr + 4096;
4668         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4669         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4670
4671         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4672         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4673         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4674                                  WRITE_DATA_DST_SEL(8) |
4675                                  WR_CONFIRM) |
4676                                  WRITE_DATA_CACHE_POLICY(0));
4677         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4678         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4679         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4680 }
4681
4682 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4683 {
4684         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4685         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4686 }
4687
4688 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4689 {
4690         uint32_t dw2 = 0;
4691
4692         if (amdgpu_sriov_vf(ring->adev))
4693                 gfx_v9_0_ring_emit_ce_meta(ring);
4694
4695         gfx_v9_0_ring_emit_tmz(ring, true);
4696
4697         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4698         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4699                 /* set load_global_config & load_global_uconfig */
4700                 dw2 |= 0x8001;
4701                 /* set load_cs_sh_regs */
4702                 dw2 |= 0x01000000;
4703                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4704                 dw2 |= 0x10002;
4705
4706                 /* set load_ce_ram if preamble presented */
4707                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4708                         dw2 |= 0x10000000;
4709         } else {
4710                 /* still load_ce_ram if this is the first time preamble presented
4711                  * although there is no context switch happens.
4712                  */
4713                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4714                         dw2 |= 0x10000000;
4715         }
4716
4717         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4718         amdgpu_ring_write(ring, dw2);
4719         amdgpu_ring_write(ring, 0);
4720 }
4721
4722 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4723 {
4724         unsigned ret;
4725         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4726         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4727         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4728         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4729         ret = ring->wptr & ring->buf_mask;
4730         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4731         return ret;
4732 }
4733
4734 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4735 {
4736         unsigned cur;
4737         BUG_ON(offset > ring->buf_mask);
4738         BUG_ON(ring->ring[offset] != 0x55aa55aa);
4739
4740         cur = (ring->wptr & ring->buf_mask) - 1;
4741         if (likely(cur > offset))
4742                 ring->ring[offset] = cur - offset;
4743         else
4744                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4745 }
4746
4747 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4748 {
4749         struct amdgpu_device *adev = ring->adev;
4750
4751         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4752         amdgpu_ring_write(ring, 0 |     /* src: register*/
4753                                 (5 << 8) |      /* dst: memory */
4754                                 (1 << 20));     /* write confirm */
4755         amdgpu_ring_write(ring, reg);
4756         amdgpu_ring_write(ring, 0);
4757         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4758                                 adev->virt.reg_val_offs * 4));
4759         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4760                                 adev->virt.reg_val_offs * 4));
4761 }
4762
4763 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4764                                     uint32_t val)
4765 {
4766         uint32_t cmd = 0;
4767
4768         switch (ring->funcs->type) {
4769         case AMDGPU_RING_TYPE_GFX:
4770                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4771                 break;
4772         case AMDGPU_RING_TYPE_KIQ:
4773                 cmd = (1 << 16); /* no inc addr */
4774                 break;
4775         default:
4776                 cmd = WR_CONFIRM;
4777                 break;
4778         }
4779         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4780         amdgpu_ring_write(ring, cmd);
4781         amdgpu_ring_write(ring, reg);
4782         amdgpu_ring_write(ring, 0);
4783         amdgpu_ring_write(ring, val);
4784 }
4785
4786 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4787                                         uint32_t val, uint32_t mask)
4788 {
4789         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4790 }
4791
4792 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4793                                                   uint32_t reg0, uint32_t reg1,
4794                                                   uint32_t ref, uint32_t mask)
4795 {
4796         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4797         struct amdgpu_device *adev = ring->adev;
4798         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4799                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4800
4801         if (fw_version_ok)
4802                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4803                                       ref, mask, 0x20);
4804         else
4805                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4806                                                            ref, mask);
4807 }
4808
4809 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4810 {
4811         struct amdgpu_device *adev = ring->adev;
4812         uint32_t value = 0;
4813
4814         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4815         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4816         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4817         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4818         WREG32(mmSQ_CMD, value);
4819 }
4820
4821 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4822                                                  enum amdgpu_interrupt_state state)
4823 {
4824         switch (state) {
4825         case AMDGPU_IRQ_STATE_DISABLE:
4826         case AMDGPU_IRQ_STATE_ENABLE:
4827                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4828                                TIME_STAMP_INT_ENABLE,
4829                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4830                 break;
4831         default:
4832                 break;
4833         }
4834 }
4835
4836 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4837                                                      int me, int pipe,
4838                                                      enum amdgpu_interrupt_state state)
4839 {
4840         u32 mec_int_cntl, mec_int_cntl_reg;
4841
4842         /*
4843          * amdgpu controls only the first MEC. That's why this function only
4844          * handles the setting of interrupts for this specific MEC. All other
4845          * pipes' interrupts are set by amdkfd.
4846          */
4847
4848         if (me == 1) {
4849                 switch (pipe) {
4850                 case 0:
4851                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4852                         break;
4853                 case 1:
4854                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4855                         break;
4856                 case 2:
4857                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4858                         break;
4859                 case 3:
4860                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4861                         break;
4862                 default:
4863                         DRM_DEBUG("invalid pipe %d\n", pipe);
4864                         return;
4865                 }
4866         } else {
4867                 DRM_DEBUG("invalid me %d\n", me);
4868                 return;
4869         }
4870
4871         switch (state) {
4872         case AMDGPU_IRQ_STATE_DISABLE:
4873                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4874                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4875                                              TIME_STAMP_INT_ENABLE, 0);
4876                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4877                 break;
4878         case AMDGPU_IRQ_STATE_ENABLE:
4879                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4880                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4881                                              TIME_STAMP_INT_ENABLE, 1);
4882                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4883                 break;
4884         default:
4885                 break;
4886         }
4887 }
4888
4889 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4890                                              struct amdgpu_irq_src *source,
4891                                              unsigned type,
4892                                              enum amdgpu_interrupt_state state)
4893 {
4894         switch (state) {
4895         case AMDGPU_IRQ_STATE_DISABLE:
4896         case AMDGPU_IRQ_STATE_ENABLE:
4897                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4898                                PRIV_REG_INT_ENABLE,
4899                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4900                 break;
4901         default:
4902                 break;
4903         }
4904
4905         return 0;
4906 }
4907
4908 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4909                                               struct amdgpu_irq_src *source,
4910                                               unsigned type,
4911                                               enum amdgpu_interrupt_state state)
4912 {
4913         switch (state) {
4914         case AMDGPU_IRQ_STATE_DISABLE:
4915         case AMDGPU_IRQ_STATE_ENABLE:
4916                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4917                                PRIV_INSTR_INT_ENABLE,
4918                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4919         default:
4920                 break;
4921         }
4922
4923         return 0;
4924 }
4925
4926 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
4927         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4928                         CP_ECC_ERROR_INT_ENABLE, 1)
4929
4930 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
4931         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4932                         CP_ECC_ERROR_INT_ENABLE, 0)
4933
4934 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4935                                               struct amdgpu_irq_src *source,
4936                                               unsigned type,
4937                                               enum amdgpu_interrupt_state state)
4938 {
4939         switch (state) {
4940         case AMDGPU_IRQ_STATE_DISABLE:
4941                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4942                                 CP_ECC_ERROR_INT_ENABLE, 0);
4943                 DISABLE_ECC_ON_ME_PIPE(1, 0);
4944                 DISABLE_ECC_ON_ME_PIPE(1, 1);
4945                 DISABLE_ECC_ON_ME_PIPE(1, 2);
4946                 DISABLE_ECC_ON_ME_PIPE(1, 3);
4947                 break;
4948
4949         case AMDGPU_IRQ_STATE_ENABLE:
4950                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4951                                 CP_ECC_ERROR_INT_ENABLE, 1);
4952                 ENABLE_ECC_ON_ME_PIPE(1, 0);
4953                 ENABLE_ECC_ON_ME_PIPE(1, 1);
4954                 ENABLE_ECC_ON_ME_PIPE(1, 2);
4955                 ENABLE_ECC_ON_ME_PIPE(1, 3);
4956                 break;
4957         default:
4958                 break;
4959         }
4960
4961         return 0;
4962 }
4963
4964
4965 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4966                                             struct amdgpu_irq_src *src,
4967                                             unsigned type,
4968                                             enum amdgpu_interrupt_state state)
4969 {
4970         switch (type) {
4971         case AMDGPU_CP_IRQ_GFX_EOP:
4972                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
4973                 break;
4974         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4975                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4976                 break;
4977         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4978                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4979                 break;
4980         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4981                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4982                 break;
4983         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4984                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4985                 break;
4986         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4987                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4988                 break;
4989         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4990                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4991                 break;
4992         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4993                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4994                 break;
4995         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4996                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4997                 break;
4998         default:
4999                 break;
5000         }
5001         return 0;
5002 }
5003
5004 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5005                             struct amdgpu_irq_src *source,
5006                             struct amdgpu_iv_entry *entry)
5007 {
5008         int i;
5009         u8 me_id, pipe_id, queue_id;
5010         struct amdgpu_ring *ring;
5011
5012         DRM_DEBUG("IH: CP EOP\n");
5013         me_id = (entry->ring_id & 0x0c) >> 2;
5014         pipe_id = (entry->ring_id & 0x03) >> 0;
5015         queue_id = (entry->ring_id & 0x70) >> 4;
5016
5017         switch (me_id) {
5018         case 0:
5019                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5020                 break;
5021         case 1:
5022         case 2:
5023                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5024                         ring = &adev->gfx.compute_ring[i];
5025                         /* Per-queue interrupt is supported for MEC starting from VI.
5026                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5027                           */
5028                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5029                                 amdgpu_fence_process(ring);
5030                 }
5031                 break;
5032         }
5033         return 0;
5034 }
5035
5036 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5037                            struct amdgpu_iv_entry *entry)
5038 {
5039         u8 me_id, pipe_id, queue_id;
5040         struct amdgpu_ring *ring;
5041         int i;
5042
5043         me_id = (entry->ring_id & 0x0c) >> 2;
5044         pipe_id = (entry->ring_id & 0x03) >> 0;
5045         queue_id = (entry->ring_id & 0x70) >> 4;
5046
5047         switch (me_id) {
5048         case 0:
5049                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5050                 break;
5051         case 1:
5052         case 2:
5053                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5054                         ring = &adev->gfx.compute_ring[i];
5055                         if (ring->me == me_id && ring->pipe == pipe_id &&
5056                             ring->queue == queue_id)
5057                                 drm_sched_fault(&ring->sched);
5058                 }
5059                 break;
5060         }
5061 }
5062
5063 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5064                                  struct amdgpu_irq_src *source,
5065                                  struct amdgpu_iv_entry *entry)
5066 {
5067         DRM_ERROR("Illegal register access in command stream\n");
5068         gfx_v9_0_fault(adev, entry);
5069         return 0;
5070 }
5071
5072 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5073                                   struct amdgpu_irq_src *source,
5074                                   struct amdgpu_iv_entry *entry)
5075 {
5076         DRM_ERROR("Illegal instruction in command stream\n");
5077         gfx_v9_0_fault(adev, entry);
5078         return 0;
5079 }
5080
5081 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5082                 struct amdgpu_iv_entry *entry)
5083 {
5084         /* TODO ue will trigger an interrupt. */
5085         kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5086         amdgpu_ras_reset_gpu(adev, 0);
5087         return AMDGPU_RAS_UE;
5088 }
5089
5090 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5091                                   struct amdgpu_irq_src *source,
5092                                   struct amdgpu_iv_entry *entry)
5093 {
5094         struct ras_common_if *ras_if = adev->gfx.ras_if;
5095         struct ras_dispatch_if ih_data = {
5096                 .entry = entry,
5097         };
5098
5099         if (!ras_if)
5100                 return 0;
5101
5102         ih_data.head = *ras_if;
5103
5104         DRM_ERROR("CP ECC ERROR IRQ\n");
5105         amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5106         return 0;
5107 }
5108
5109 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5110         .name = "gfx_v9_0",
5111         .early_init = gfx_v9_0_early_init,
5112         .late_init = gfx_v9_0_late_init,
5113         .sw_init = gfx_v9_0_sw_init,
5114         .sw_fini = gfx_v9_0_sw_fini,
5115         .hw_init = gfx_v9_0_hw_init,
5116         .hw_fini = gfx_v9_0_hw_fini,
5117         .suspend = gfx_v9_0_suspend,
5118         .resume = gfx_v9_0_resume,
5119         .is_idle = gfx_v9_0_is_idle,
5120         .wait_for_idle = gfx_v9_0_wait_for_idle,
5121         .soft_reset = gfx_v9_0_soft_reset,
5122         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
5123         .set_powergating_state = gfx_v9_0_set_powergating_state,
5124         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
5125 };
5126
5127 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5128         .type = AMDGPU_RING_TYPE_GFX,
5129         .align_mask = 0xff,
5130         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5131         .support_64bit_ptrs = true,
5132         .vmhub = AMDGPU_GFXHUB,
5133         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5134         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5135         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5136         .emit_frame_size = /* totally 242 maximum if 16 IBs */
5137                 5 +  /* COND_EXEC */
5138                 7 +  /* PIPELINE_SYNC */
5139                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5140                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5141                 2 + /* VM_FLUSH */
5142                 8 +  /* FENCE for VM_FLUSH */
5143                 20 + /* GDS switch */
5144                 4 + /* double SWITCH_BUFFER,
5145                        the first COND_EXEC jump to the place just
5146                            prior to this double SWITCH_BUFFER  */
5147                 5 + /* COND_EXEC */
5148                 7 +      /*     HDP_flush */
5149                 4 +      /*     VGT_flush */
5150                 14 + /* CE_META */
5151                 31 + /* DE_META */
5152                 3 + /* CNTX_CTRL */
5153                 5 + /* HDP_INVL */
5154                 8 + 8 + /* FENCE x2 */
5155                 2, /* SWITCH_BUFFER */
5156         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
5157         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5158         .emit_fence = gfx_v9_0_ring_emit_fence,
5159         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5160         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5161         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5162         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5163         .test_ring = gfx_v9_0_ring_test_ring,
5164         .test_ib = gfx_v9_0_ring_test_ib,
5165         .insert_nop = amdgpu_ring_insert_nop,
5166         .pad_ib = amdgpu_ring_generic_pad_ib,
5167         .emit_switch_buffer = gfx_v9_ring_emit_sb,
5168         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5169         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5170         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5171         .emit_tmz = gfx_v9_0_ring_emit_tmz,
5172         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5173         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5174         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5175         .soft_recovery = gfx_v9_0_ring_soft_recovery,
5176 };
5177
5178 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5179         .type = AMDGPU_RING_TYPE_COMPUTE,
5180         .align_mask = 0xff,
5181         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5182         .support_64bit_ptrs = true,
5183         .vmhub = AMDGPU_GFXHUB,
5184         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5185         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5186         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5187         .emit_frame_size =
5188                 20 + /* gfx_v9_0_ring_emit_gds_switch */
5189                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5190                 5 + /* hdp invalidate */
5191                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5192                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5193                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5194                 2 + /* gfx_v9_0_ring_emit_vm_flush */
5195                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5196         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5197         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
5198         .emit_fence = gfx_v9_0_ring_emit_fence,
5199         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5200         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5201         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5202         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5203         .test_ring = gfx_v9_0_ring_test_ring,
5204         .test_ib = gfx_v9_0_ring_test_ib,
5205         .insert_nop = amdgpu_ring_insert_nop,
5206         .pad_ib = amdgpu_ring_generic_pad_ib,
5207         .set_priority = gfx_v9_0_ring_set_priority_compute,
5208         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5209         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5210         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5211 };
5212
5213 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5214         .type = AMDGPU_RING_TYPE_KIQ,
5215         .align_mask = 0xff,
5216         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5217         .support_64bit_ptrs = true,
5218         .vmhub = AMDGPU_GFXHUB,
5219         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5220         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5221         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5222         .emit_frame_size =
5223                 20 + /* gfx_v9_0_ring_emit_gds_switch */
5224                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
5225                 5 + /* hdp invalidate */
5226                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5227                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5228                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5229                 2 + /* gfx_v9_0_ring_emit_vm_flush */
5230                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5231         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
5232         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5233         .test_ring = gfx_v9_0_ring_test_ring,
5234         .insert_nop = amdgpu_ring_insert_nop,
5235         .pad_ib = amdgpu_ring_generic_pad_ib,
5236         .emit_rreg = gfx_v9_0_ring_emit_rreg,
5237         .emit_wreg = gfx_v9_0_ring_emit_wreg,
5238         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5239         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5240 };
5241
5242 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5243 {
5244         int i;
5245
5246         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5247
5248         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5249                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5250
5251         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5252                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5253 }
5254
5255 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5256         .set = gfx_v9_0_set_eop_interrupt_state,
5257         .process = gfx_v9_0_eop_irq,
5258 };
5259
5260 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5261         .set = gfx_v9_0_set_priv_reg_fault_state,
5262         .process = gfx_v9_0_priv_reg_irq,
5263 };
5264
5265 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5266         .set = gfx_v9_0_set_priv_inst_fault_state,
5267         .process = gfx_v9_0_priv_inst_irq,
5268 };
5269
5270 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5271         .set = gfx_v9_0_set_cp_ecc_error_state,
5272         .process = gfx_v9_0_cp_ecc_error_irq,
5273 };
5274
5275
5276 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5277 {
5278         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5279         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5280
5281         adev->gfx.priv_reg_irq.num_types = 1;
5282         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5283
5284         adev->gfx.priv_inst_irq.num_types = 1;
5285         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5286
5287         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5288         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5289 }
5290
5291 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5292 {
5293         switch (adev->asic_type) {
5294         case CHIP_VEGA10:
5295         case CHIP_VEGA12:
5296         case CHIP_VEGA20:
5297         case CHIP_RAVEN:
5298                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5299                 break;
5300         default:
5301                 break;
5302         }
5303 }
5304
5305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5306 {
5307         /* init asci gds info */
5308         switch (adev->asic_type) {
5309         case CHIP_VEGA10:
5310         case CHIP_VEGA12:
5311         case CHIP_VEGA20:
5312                 adev->gds.mem.total_size = 0x10000;
5313                 break;
5314         case CHIP_RAVEN:
5315                 adev->gds.mem.total_size = 0x1000;
5316                 break;
5317         default:
5318                 adev->gds.mem.total_size = 0x10000;
5319                 break;
5320         }
5321
5322         switch (adev->asic_type) {
5323         case CHIP_VEGA10:
5324         case CHIP_VEGA20:
5325                 adev->gds.gds_compute_max_wave_id = 0x7ff;
5326                 break;
5327         case CHIP_VEGA12:
5328                 adev->gds.gds_compute_max_wave_id = 0x27f;
5329                 break;
5330         case CHIP_RAVEN:
5331                 if (adev->rev_id >= 0x8)
5332                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5333                 else
5334                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5335                 break;
5336         default:
5337                 /* this really depends on the chip */
5338                 adev->gds.gds_compute_max_wave_id = 0x7ff;
5339                 break;
5340         }
5341
5342         adev->gds.gws.total_size = 64;
5343         adev->gds.oa.total_size = 16;
5344
5345         if (adev->gds.mem.total_size == 64 * 1024) {
5346                 adev->gds.mem.gfx_partition_size = 4096;
5347                 adev->gds.mem.cs_partition_size = 4096;
5348
5349                 adev->gds.gws.gfx_partition_size = 4;
5350                 adev->gds.gws.cs_partition_size = 4;
5351
5352                 adev->gds.oa.gfx_partition_size = 4;
5353                 adev->gds.oa.cs_partition_size = 1;
5354         } else {
5355                 adev->gds.mem.gfx_partition_size = 1024;
5356                 adev->gds.mem.cs_partition_size = 1024;
5357
5358                 adev->gds.gws.gfx_partition_size = 16;
5359                 adev->gds.gws.cs_partition_size = 16;
5360
5361                 adev->gds.oa.gfx_partition_size = 4;
5362                 adev->gds.oa.cs_partition_size = 4;
5363         }
5364 }
5365
5366 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5367                                                  u32 bitmap)
5368 {
5369         u32 data;
5370
5371         if (!bitmap)
5372                 return;
5373
5374         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5375         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5376
5377         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5378 }
5379
5380 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5381 {
5382         u32 data, mask;
5383
5384         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5385         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5386
5387         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5388         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5389
5390         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5391
5392         return (~data) & mask;
5393 }
5394
5395 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5396                                  struct amdgpu_cu_info *cu_info)
5397 {
5398         int i, j, k, counter, active_cu_number = 0;
5399         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5400         unsigned disable_masks[4 * 2];
5401
5402         if (!adev || !cu_info)
5403                 return -EINVAL;
5404
5405         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5406
5407         mutex_lock(&adev->grbm_idx_mutex);
5408         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5409                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5410                         mask = 1;
5411                         ao_bitmap = 0;
5412                         counter = 0;
5413                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5414                         if (i < 4 && j < 2)
5415                                 gfx_v9_0_set_user_cu_inactive_bitmap(
5416                                         adev, disable_masks[i * 2 + j]);
5417                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5418                         cu_info->bitmap[i][j] = bitmap;
5419
5420                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5421                                 if (bitmap & mask) {
5422                                         if (counter < adev->gfx.config.max_cu_per_sh)
5423                                                 ao_bitmap |= mask;
5424                                         counter ++;
5425                                 }
5426                                 mask <<= 1;
5427                         }
5428                         active_cu_number += counter;
5429                         if (i < 2 && j < 2)
5430                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5431                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5432                 }
5433         }
5434         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5435         mutex_unlock(&adev->grbm_idx_mutex);
5436
5437         cu_info->number = active_cu_number;
5438         cu_info->ao_cu_mask = ao_cu_mask;
5439         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5440
5441         return 0;
5442 }
5443
5444 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5445 {
5446         .type = AMD_IP_BLOCK_TYPE_GFX,
5447         .major = 9,
5448         .minor = 0,
5449         .rev = 0,
5450         .funcs = &gfx_v9_0_ip_funcs,
5451 };