1 #include <linux/module.h>
2 #include <linux/slab.h>
8 static struct amd_decoder_ops *fam_ops;
10 static u8 xec_mask = 0xf;
12 static bool report_gart_errors;
13 static void (*decode_dram_ecc)(int node_id, struct mce *m);
15 void amd_report_gart_errors(bool v)
17 report_gart_errors = v;
19 EXPORT_SYMBOL_GPL(amd_report_gart_errors);
21 void amd_register_ecc_decoder(void (*f)(int, struct mce *))
25 EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
27 void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
29 if (decode_dram_ecc) {
30 WARN_ON(decode_dram_ecc != f);
32 decode_dram_ecc = NULL;
35 EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
38 * string representation for the different MCA reported error types, see F3x48
42 /* transaction type */
43 static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
46 static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
48 /* memory transaction type */
49 static const char * const rrrr_msgs[] = {
50 "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
53 /* participating processor */
54 const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
55 EXPORT_SYMBOL_GPL(pp_msgs);
58 static const char * const to_msgs[] = { "no timeout", "timed out" };
61 static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
63 /* internal error type */
64 static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
66 static const char * const f15h_mc1_mce_desc[] = {
67 "UC during a demand linefill from L2",
68 "Parity error during data load from IC",
69 "Parity error for IC valid bit",
70 "Main tag parity error",
71 "Parity error in prediction queue",
72 "PFB data/address parity error",
73 "Parity error in the branch status reg",
74 "PFB promotion address error",
75 "Tag error during probe/victimization",
76 "Parity error for IC probe tag valid bit",
77 "PFB non-cacheable bit parity error",
78 "PFB valid bit parity error", /* xec = 0xd */
79 "Microcode Patch Buffer", /* xec = 010 */
87 static const char * const f15h_mc2_mce_desc[] = {
88 "Fill ECC error on data fills", /* xec = 0x4 */
89 "Fill parity error on insn fills",
90 "Prefetcher request FIFO parity error",
91 "PRQ address parity error",
92 "PRQ data parity error",
95 "WCB Data parity error",
96 "VB Data ECC or parity error",
97 "L2 Tag ECC error", /* xec = 0x10 */
98 "Hard L2 Tag ECC error",
99 "Multiple hits on L2 tag",
101 "PRB address parity error"
104 static const char * const mc4_mce_desc[] = {
105 "DRAM ECC error detected on the NB",
106 "CRC error detected on HT link",
107 "Link-defined sync error packets detected on HT link",
110 "Invalid GART PTE entry during GART table walk",
111 "Unsupported atomic RMW received from an IO link",
112 "Watchdog timeout due to lack of progress",
113 "DRAM ECC error detected on the NB",
114 "SVM DMA Exclusion Vector error",
115 "HT data error detected on link",
116 "Protocol error (link, L3, probe filter)",
117 "NB internal arrays parity error",
118 "DRAM addr/ctl signals parity error",
119 "IO link transmission error",
120 "L3 data cache ECC error", /* xec = 0x1c */
121 "L3 cache tag error",
122 "L3 LRU parity bits error",
123 "ECC Error in the Probe Filter directory"
126 static const char * const mc5_mce_desc[] = {
127 "CPU Watchdog timer expire",
128 "Wakeup array dest tag",
132 "Retire dispatch queue",
133 "Mapper checkpoint array",
134 "Physical register file EX0 port",
135 "Physical register file EX1 port",
136 "Physical register file AG0 port",
137 "Physical register file AG1 port",
138 "Flag register file",
140 "Retire status queue"
143 static const char * const mc6_mce_desc[] = {
144 "Hardware Assertion",
146 "Physical Register File",
149 "Status Register File",
152 /* Scalable MCA error strings */
153 static const char * const smca_ls_mce_desc[] = {
154 "Load queue parity error",
155 "Store queue parity error",
156 "Miss address buffer payload parity error",
157 "Level 1 TLB parity error",
158 "DC Tag error type 5",
159 "DC Tag error type 6",
160 "DC Tag error type 1",
161 "Internal error type 1",
162 "Internal error type 2",
163 "System Read Data Error Thread 0",
164 "System Read Data Error Thread 1",
165 "DC Tag error type 2",
166 "DC Data error type 1 and poison consumption",
167 "DC Data error type 2",
168 "DC Data error type 3",
169 "DC Tag error type 4",
170 "Level 2 TLB parity error",
172 "DC Tag error type 3",
173 "DC Tag error type 5",
174 "L2 Fill Data error",
177 static const char * const smca_if_mce_desc[] = {
178 "Op Cache Microtag Probe Port Parity Error",
179 "IC Microtag or Full Tag Multi-hit Error",
180 "IC Full Tag Parity Error",
181 "IC Data Array Parity Error",
182 "Decoupling Queue PhysAddr Parity Error",
183 "L0 ITLB Parity Error",
184 "L1 ITLB Parity Error",
185 "L2 ITLB Parity Error",
186 "BPQ Thread 0 Snoop Parity Error",
187 "BPQ Thread 1 Snoop Parity Error",
188 "L1 BTB Multi-Match Error",
189 "L2 BTB Multi-Match Error",
190 "L2 Cache Response Poison Error",
191 "System Read Data Error",
194 static const char * const smca_l2_mce_desc[] = {
195 "L2M Tag Multiple-Way-Hit error",
196 "L2M Tag or State Array ECC Error",
197 "L2M Data Array ECC Error",
198 "Hardware Assert Error",
201 static const char * const smca_de_mce_desc[] = {
202 "Micro-op cache tag parity error",
203 "Micro-op cache data parity error",
204 "Instruction buffer parity error",
205 "Micro-op queue parity error",
206 "Instruction dispatch queue parity error",
207 "Fetch address FIFO parity error",
208 "Patch RAM data parity error",
209 "Patch RAM sequencer parity error",
210 "Micro-op buffer parity error"
213 static const char * const smca_ex_mce_desc[] = {
214 "Watchdog Timeout error",
215 "Physical register file parity error",
216 "Flag register file parity error",
217 "Immediate displacement register file parity error",
218 "Address generator payload parity error",
219 "EX payload parity error",
220 "Checkpoint queue parity error",
221 "Retire dispatch queue parity error",
222 "Retire status queue parity error",
223 "Scheduling queue parity error",
224 "Branch buffer queue parity error",
225 "Hardware Assertion error",
228 static const char * const smca_fp_mce_desc[] = {
229 "Physical register file (PRF) parity error",
230 "Freelist (FL) parity error",
231 "Schedule queue parity error",
233 "Retire queue (RQ) parity error",
234 "Status register file (SRF) parity error",
235 "Hardware assertion",
238 static const char * const smca_l3_mce_desc[] = {
239 "Shadow Tag Macro ECC Error",
240 "Shadow Tag Macro Multi-way-hit Error",
242 "L3M Tag Multi-way-hit Error",
243 "L3M Data ECC Error",
244 "SDP Parity Error or SystemReadDataError from XI",
245 "L3 Victim Queue Parity Error",
246 "L3 Hardware Assertion",
249 static const char * const smca_cs_mce_desc[] = {
252 "Security Violation",
254 "Unexpected Response",
255 "Request or Probe Parity Error",
256 "Read Response Parity Error",
257 "Atomic Request Parity Error",
258 "Probe Filter ECC Error",
261 static const char * const smca_cs2_mce_desc[] = {
264 "Security Violation",
266 "Unexpected Response",
267 "Request or Probe Parity Error",
268 "Read Response Parity Error",
269 "Atomic Request Parity Error",
270 "SDP read response had no match in the CS queue",
271 "Probe Filter Protocol Error",
272 "Probe Filter ECC Error",
273 "SDP read response had an unexpected RETRY error",
274 "Counter overflow error",
275 "Counter underflow error",
278 static const char * const smca_pie_mce_desc[] = {
280 "Register security violation",
282 "Poison data consumption",
283 "A deferred error was detected in the DF"
286 static const char * const smca_umc_mce_desc[] = {
290 "Advanced peripheral bus error",
291 "Address/Command parity error",
292 "Write data CRC error",
293 "DCQ SRAM ECC error",
294 "AES SRAM ECC error",
297 static const char * const smca_pb_mce_desc[] = {
298 "An ECC error in the Parameter Block RAM array",
301 static const char * const smca_psp_mce_desc[] = {
302 "An ECC or parity error in a PSP RAM instance",
305 static const char * const smca_psp2_mce_desc[] = {
306 "High SRAM ECC or parity error",
307 "Low SRAM ECC or parity error",
308 "Instruction Cache Bank 0 ECC or parity error",
309 "Instruction Cache Bank 1 ECC or parity error",
310 "Instruction Tag Ram 0 parity error",
311 "Instruction Tag Ram 1 parity error",
312 "Data Cache Bank 0 ECC or parity error",
313 "Data Cache Bank 1 ECC or parity error",
314 "Data Cache Bank 2 ECC or parity error",
315 "Data Cache Bank 3 ECC or parity error",
316 "Data Tag Bank 0 parity error",
317 "Data Tag Bank 1 parity error",
318 "Data Tag Bank 2 parity error",
319 "Data Tag Bank 3 parity error",
320 "Dirty Data Ram parity error",
321 "TLB Bank 0 parity error",
322 "TLB Bank 1 parity error",
323 "System Hub Read Buffer ECC or parity error",
326 static const char * const smca_smu_mce_desc[] = {
327 "An ECC or parity error in an SMU RAM instance",
330 static const char * const smca_smu2_mce_desc[] = {
331 "High SRAM ECC or parity error",
332 "Low SRAM ECC or parity error",
333 "Data Cache Bank A ECC or parity error",
334 "Data Cache Bank B ECC or parity error",
335 "Data Tag Cache Bank A ECC or parity error",
336 "Data Tag Cache Bank B ECC or parity error",
337 "Instruction Cache Bank A ECC or parity error",
338 "Instruction Cache Bank B ECC or parity error",
339 "Instruction Tag Cache Bank A ECC or parity error",
340 "Instruction Tag Cache Bank B ECC or parity error",
341 "System Hub Read Buffer ECC or parity error",
344 static const char * const smca_mp5_mce_desc[] = {
345 "High SRAM ECC or parity error",
346 "Low SRAM ECC or parity error",
347 "Data Cache Bank A ECC or parity error",
348 "Data Cache Bank B ECC or parity error",
349 "Data Tag Cache Bank A ECC or parity error",
350 "Data Tag Cache Bank B ECC or parity error",
351 "Instruction Cache Bank A ECC or parity error",
352 "Instruction Cache Bank B ECC or parity error",
353 "Instruction Tag Cache Bank A ECC or parity error",
354 "Instruction Tag Cache Bank B ECC or parity error",
357 static const char * const smca_nbio_mce_desc[] = {
358 "ECC or Parity error",
360 "SDP ErrEvent error",
361 "SDP Egress Poison Error",
362 "IOHC Internal Poison Error",
365 static const char * const smca_pcie_mce_desc[] = {
366 "CCIX PER Message logging",
367 "CCIX Read Response with Status: Non-Data Error",
368 "CCIX Write Response with Status: Non-Data Error",
369 "CCIX Read Response with Status: Data Error",
370 "CCIX Non-okay write response with data error",
373 struct smca_mce_desc {
374 const char * const *descs;
375 unsigned int num_descs;
378 static struct smca_mce_desc smca_mce_descs[] = {
379 [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) },
380 [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) },
381 [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) },
382 [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) },
383 [SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) },
384 [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
385 [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
386 [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
387 [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) },
388 [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
389 [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
390 [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
391 [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
392 [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc) },
393 [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
394 [SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc) },
395 [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) },
396 [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) },
397 [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) },
400 static bool f12h_mc0_mce(u16 ec, u8 xec)
409 pr_cont("during L1 linefill from L2.\n");
410 else if (ll == LL_L1)
411 pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
418 static bool f10h_mc0_mce(u16 ec, u8 xec)
420 if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
421 pr_cont("during data scrub.\n");
424 return f12h_mc0_mce(ec, xec);
427 static bool k8_mc0_mce(u16 ec, u8 xec)
430 pr_cont("during system linefill.\n");
434 return f10h_mc0_mce(ec, xec);
437 static bool cat_mc0_mce(u16 ec, u8 xec)
444 if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
450 pr_cont("Data/Tag parity error due to %s.\n",
451 (r4 == R4_DRD ? "load/hw prf" : "store"));
454 pr_cont("Copyback parity error on a tag miss.\n");
457 pr_cont("Tag parity error during snoop.\n");
462 } else if (BUS_ERROR(ec)) {
464 if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
467 pr_cont("System read data error on a ");
471 pr_cont("TLB reload.\n");
489 static bool f15h_mc0_mce(u16 ec, u8 xec)
497 pr_cont("Data Array access error.\n");
501 pr_cont("UC error during a linefill from L2/NB.\n");
506 pr_cont("STQ access error.\n");
510 pr_cont("SCB access error.\n");
514 pr_cont("Tag error.\n");
518 pr_cont("LDQ access error.\n");
524 } else if (BUS_ERROR(ec)) {
527 pr_cont("System Read Data Error.\n");
529 pr_cont(" Internal error condition type %d.\n", xec);
530 } else if (INT_ERROR(ec)) {
532 pr_cont("Hardware Assert.\n");
542 static void decode_mc0_mce(struct mce *m)
544 u16 ec = EC(m->status);
545 u8 xec = XEC(m->status, xec_mask);
547 pr_emerg(HW_ERR "MC0 Error: ");
549 /* TLB error signatures are the same across families */
551 if (TT(ec) == TT_DATA) {
552 pr_cont("%s TLB %s.\n", LL_MSG(ec),
553 ((xec == 2) ? "locked miss"
554 : (xec ? "multimatch" : "parity")));
557 } else if (fam_ops->mc0_mce(ec, xec))
560 pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
563 static bool k8_mc1_mce(u16 ec, u8 xec)
572 pr_cont("during a linefill from L2.\n");
573 else if (ll == 0x1) {
576 pr_cont("Parity error during data load.\n");
580 pr_cont("Copyback Parity/Victim error.\n");
584 pr_cont("Tag Snoop error.\n");
597 static bool cat_mc1_mce(u16 ec, u8 xec)
605 if (TT(ec) != TT_INSTR)
609 pr_cont("Data/tag array parity error for a tag hit.\n");
610 else if (r4 == R4_SNOOP)
611 pr_cont("Tag error during snoop/victimization.\n");
613 pr_cont("Tag parity error from victim castout.\n");
615 pr_cont("Microcode patch RAM parity error.\n");
622 static bool f15h_mc1_mce(u16 ec, u8 xec)
631 pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
635 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
639 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
643 pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
652 static void decode_mc1_mce(struct mce *m)
654 u16 ec = EC(m->status);
655 u8 xec = XEC(m->status, xec_mask);
657 pr_emerg(HW_ERR "MC1 Error: ");
660 pr_cont("%s TLB %s.\n", LL_MSG(ec),
661 (xec ? "multimatch" : "parity error"));
662 else if (BUS_ERROR(ec)) {
663 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
665 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
666 } else if (INT_ERROR(ec)) {
668 pr_cont("Hardware Assert.\n");
671 } else if (fam_ops->mc1_mce(ec, xec))
679 pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
682 static bool k8_mc2_mce(u16 ec, u8 xec)
687 pr_cont(" in the write data buffers.\n");
689 pr_cont(" in the victim data buffers.\n");
690 else if (xec == 0x2 && MEM_ERROR(ec))
691 pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
692 else if (xec == 0x0) {
694 pr_cont("%s error in a Page Descriptor Cache or Guest TLB.\n",
696 else if (BUS_ERROR(ec))
697 pr_cont(": %s/ECC error in data read from NB: %s.\n",
698 R4_MSG(ec), PP_MSG(ec));
699 else if (MEM_ERROR(ec)) {
703 pr_cont(": %s error during data copyback.\n",
706 pr_cont(": %s parity/ECC error during data "
707 "access from L2.\n", R4_MSG(ec));
718 static bool f15h_mc2_mce(u16 ec, u8 xec)
724 pr_cont("Data parity TLB read error.\n");
726 pr_cont("Poison data provided for TLB fill.\n");
729 } else if (BUS_ERROR(ec)) {
733 pr_cont("Error during attempted NB data read.\n");
734 } else if (MEM_ERROR(ec)) {
737 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
741 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
747 } else if (INT_ERROR(ec)) {
749 pr_cont("Hardware Assert.\n");
757 static bool f16h_mc2_mce(u16 ec, u8 xec)
766 pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
771 pr_cont("ECC error in L2 tag (%s).\n",
772 ((r4 == R4_GEN) ? "BankReq" :
773 ((r4 == R4_SNOOP) ? "Prb" : "Fill")));
778 pr_cont("ECC error in L2 data array (%s).\n",
779 (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" :
780 ((r4 == R4_GEN) ? "Attr" :
781 ((r4 == R4_EVICT) ? "Vict" : "Fill"))));
786 pr_cont("Parity error in L2 attribute bits (%s).\n",
787 ((r4 == R4_RD) ? "Hit" :
788 ((r4 == R4_GEN) ? "Attr" : "Fill")));
798 static void decode_mc2_mce(struct mce *m)
800 u16 ec = EC(m->status);
801 u8 xec = XEC(m->status, xec_mask);
803 pr_emerg(HW_ERR "MC2 Error: ");
805 if (!fam_ops->mc2_mce(ec, xec))
806 pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
809 static void decode_mc3_mce(struct mce *m)
811 u16 ec = EC(m->status);
812 u8 xec = XEC(m->status, xec_mask);
814 if (boot_cpu_data.x86 >= 0x14) {
815 pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
816 " please report on LKML.\n");
820 pr_emerg(HW_ERR "MC3 Error");
825 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
828 pr_cont(" during %s.\n", R4_MSG(ec));
835 pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
838 static void decode_mc4_mce(struct mce *m)
840 unsigned int fam = x86_family(m->cpuid);
841 int node_id = amd_get_nb_id(m->extcpu);
842 u16 ec = EC(m->status);
843 u8 xec = XEC(m->status, 0x1f);
846 pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
851 /* special handling for DRAM ECCs */
852 if (xec == 0x0 || xec == 0x8) {
853 /* no ECCs on F11h */
857 pr_cont("%s.\n", mc4_mce_desc[xec]);
860 decode_dram_ecc(node_id, m);
867 pr_cont("GART Table Walk data error.\n");
868 else if (BUS_ERROR(ec))
869 pr_cont("DMA Exclusion Vector Table Walk error.\n");
875 if (fam == 0x15 || fam == 0x16)
876 pr_cont("Compute Unit Data Error.\n");
889 pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
893 pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
896 static void decode_mc5_mce(struct mce *m)
898 unsigned int fam = x86_family(m->cpuid);
899 u16 ec = EC(m->status);
900 u8 xec = XEC(m->status, xec_mask);
902 if (fam == 0xf || fam == 0x11)
905 pr_emerg(HW_ERR "MC5 Error: ");
909 pr_cont("Hardware Assert.\n");
915 if (xec == 0x0 || xec == 0xc)
916 pr_cont("%s.\n", mc5_mce_desc[xec]);
918 pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
925 pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
928 static void decode_mc6_mce(struct mce *m)
930 u8 xec = XEC(m->status, xec_mask);
932 pr_emerg(HW_ERR "MC6 Error: ");
937 pr_cont("%s parity error.\n", mc6_mce_desc[xec]);
941 pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
944 /* Decode errors according to Scalable MCA specification */
945 static void decode_smca_error(struct mce *m)
947 struct smca_hwid *hwid;
948 enum smca_bank_types bank_type;
950 u8 xec = XEC(m->status, xec_mask);
952 if (m->bank >= ARRAY_SIZE(smca_banks))
955 hwid = smca_banks[m->bank].hwid;
959 bank_type = hwid->bank_type;
961 if (bank_type == SMCA_RESERVED) {
962 pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank);
966 ip_name = smca_get_long_name(bank_type);
968 pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec);
970 /* Only print the decode of valid error codes */
971 if (xec < smca_mce_descs[bank_type].num_descs &&
972 (hwid->xec_bitmap & BIT_ULL(xec))) {
973 pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
976 if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
977 decode_dram_ecc(cpu_to_node(m->extcpu), m);
980 static inline void amd_decode_err_code(u16 ec)
983 pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
987 pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
990 pr_cont(", mem/io: %s", II_MSG(ec));
992 pr_cont(", tx: %s", TT_MSG(ec));
994 if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
995 pr_cont(", mem-tx: %s", R4_MSG(ec));
998 pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
1005 * Filter out unwanted MCE signatures here.
1007 static bool amd_filter_mce(struct mce *m)
1010 * NB GART TLB error reporting is disabled by default.
1012 if (m->bank == 4 && XEC(m->status, 0x1f) == 0x5 && !report_gart_errors)
1018 static const char *decode_error_status(struct mce *m)
1020 if (m->status & MCI_STATUS_UC) {
1021 if (m->status & MCI_STATUS_PCC)
1022 return "System Fatal error.";
1023 if (m->mcgstatus & MCG_STATUS_RIPV)
1024 return "Uncorrected, software restartable error.";
1025 return "Uncorrected, software containable error.";
1028 if (m->status & MCI_STATUS_DEFERRED)
1029 return "Deferred error, no action required.";
1031 return "Corrected error, no action required.";
1035 amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
1037 struct mce *m = (struct mce *)data;
1038 unsigned int fam = x86_family(m->cpuid);
1041 if (amd_filter_mce(m))
1044 pr_emerg(HW_ERR "%s\n", decode_error_status(m));
1046 pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
1048 fam, x86_model(m->cpuid), x86_stepping(m->cpuid),
1050 ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
1051 ((m->status & MCI_STATUS_UC) ? "UE" :
1052 (m->status & MCI_STATUS_DEFERRED) ? "-" : "CE"),
1053 ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
1054 ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"),
1055 ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"));
1057 if (boot_cpu_has(X86_FEATURE_SMCA)) {
1059 u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
1061 if (!rdmsr_safe(addr, &low, &high) &&
1062 (low & MCI_CONFIG_MCAX))
1063 pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
1065 pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
1068 /* do the two bits[14:13] together */
1069 ecc = (m->status >> 45) & 0x3;
1071 pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
1074 pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-"));
1076 /* F15h, bank4, bit 43 is part of McaStatSubCache. */
1077 if (fam != 0x15 || m->bank != 4)
1078 pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-"));
1082 pr_cont("|%s", (m->status & MCI_STATUS_SCRUB ? "Scrub" : "-"));
1084 pr_cont("]: 0x%016llx\n", m->status);
1086 if (m->status & MCI_STATUS_ADDRV)
1087 pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr);
1089 if (boot_cpu_has(X86_FEATURE_SMCA)) {
1090 pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid);
1092 if (m->status & MCI_STATUS_SYNDV)
1093 pr_cont(", Syndrome: 0x%016llx", m->synd);
1097 decode_smca_error(m);
1102 pr_emerg(HW_ERR "TSC: %llu\n", m->tsc);
1141 amd_decode_err_code(m->status & 0xffff);
1146 static struct notifier_block amd_mce_dec_nb = {
1147 .notifier_call = amd_decode_mce,
1148 .priority = MCE_PRIO_EDAC,
1151 static int __init mce_amd_init(void)
1153 struct cpuinfo_x86 *c = &boot_cpu_data;
1155 if (c->x86_vendor != X86_VENDOR_AMD &&
1156 c->x86_vendor != X86_VENDOR_HYGON)
1159 fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
1165 fam_ops->mc0_mce = k8_mc0_mce;
1166 fam_ops->mc1_mce = k8_mc1_mce;
1167 fam_ops->mc2_mce = k8_mc2_mce;
1171 fam_ops->mc0_mce = f10h_mc0_mce;
1172 fam_ops->mc1_mce = k8_mc1_mce;
1173 fam_ops->mc2_mce = k8_mc2_mce;
1177 fam_ops->mc0_mce = k8_mc0_mce;
1178 fam_ops->mc1_mce = k8_mc1_mce;
1179 fam_ops->mc2_mce = k8_mc2_mce;
1183 fam_ops->mc0_mce = f12h_mc0_mce;
1184 fam_ops->mc1_mce = k8_mc1_mce;
1185 fam_ops->mc2_mce = k8_mc2_mce;
1189 fam_ops->mc0_mce = cat_mc0_mce;
1190 fam_ops->mc1_mce = cat_mc1_mce;
1191 fam_ops->mc2_mce = k8_mc2_mce;
1195 xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
1197 fam_ops->mc0_mce = f15h_mc0_mce;
1198 fam_ops->mc1_mce = f15h_mc1_mce;
1199 fam_ops->mc2_mce = f15h_mc2_mce;
1204 fam_ops->mc0_mce = cat_mc0_mce;
1205 fam_ops->mc1_mce = cat_mc1_mce;
1206 fam_ops->mc2_mce = f16h_mc2_mce;
1212 if (!boot_cpu_has(X86_FEATURE_SMCA)) {
1213 printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
1219 printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
1223 pr_info("MCE: In-kernel MCE decoding enabled.\n");
1225 mce_register_decode_chain(&amd_mce_dec_nb);
1234 early_initcall(mce_amd_init);
1237 static void __exit mce_amd_exit(void)
1239 mce_unregister_decode_chain(&amd_mce_dec_nb);
1243 MODULE_DESCRIPTION("AMD MCE decoder");
1244 MODULE_ALIAS("edac-mce-amd");
1245 MODULE_LICENSE("GPL");
1246 module_exit(mce_amd_exit);