]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/edac/i7core_edac.c
Merge tag 'usercopy-v5.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
[linux.git] / drivers / edac / i7core_edac.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Intel i7 core/Nehalem Memory Controller kernel module
3  *
4  * This driver supports the memory controllers found on the Intel
5  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
6  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
7  * and Westmere-EP.
8  *
9  * Copyright (c) 2009-2010 by:
10  *       Mauro Carvalho Chehab
11  *
12  * Red Hat Inc. http://www.redhat.com
13  *
14  * Forked and adapted from the i5400_edac driver
15  *
16  * Based on the following public Intel datasheets:
17  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
18  * Datasheet, Volume 2:
19  *      http://download.intel.com/design/processor/datashts/320835.pdf
20  * Intel Xeon Processor 5500 Series Datasheet Volume 2
21  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
22  * also available at:
23  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
24  */
25
26 #include <linux/module.h>
27 #include <linux/init.h>
28 #include <linux/pci.h>
29 #include <linux/pci_ids.h>
30 #include <linux/slab.h>
31 #include <linux/delay.h>
32 #include <linux/dmi.h>
33 #include <linux/edac.h>
34 #include <linux/mmzone.h>
35 #include <linux/smp.h>
36 #include <asm/mce.h>
37 #include <asm/processor.h>
38 #include <asm/div64.h>
39
40 #include "edac_module.h"
41
42 /* Static vars */
43 static LIST_HEAD(i7core_edac_list);
44 static DEFINE_MUTEX(i7core_edac_lock);
45 static int probed;
46
47 static int use_pci_fixup;
48 module_param(use_pci_fixup, int, 0444);
49 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
50 /*
51  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
52  * registers start at bus 255, and are not reported by BIOS.
53  * We currently find devices with only 2 sockets. In order to support more QPI
54  * Quick Path Interconnect, just increment this number.
55  */
56 #define MAX_SOCKET_BUSES        2
57
58
59 /*
60  * Alter this version for the module when modifications are made
61  */
62 #define I7CORE_REVISION    " Ver: 1.0.0"
63 #define EDAC_MOD_STR      "i7core_edac"
64
65 /*
66  * Debug macros
67  */
68 #define i7core_printk(level, fmt, arg...)                       \
69         edac_printk(level, "i7core", fmt, ##arg)
70
71 #define i7core_mc_printk(mci, level, fmt, arg...)               \
72         edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
73
74 /*
75  * i7core Memory Controller Registers
76  */
77
78         /* OFFSETS for Device 0 Function 0 */
79
80 #define MC_CFG_CONTROL  0x90
81   #define MC_CFG_UNLOCK         0x02
82   #define MC_CFG_LOCK           0x00
83
84         /* OFFSETS for Device 3 Function 0 */
85
86 #define MC_CONTROL      0x48
87 #define MC_STATUS       0x4c
88 #define MC_MAX_DOD      0x64
89
90 /*
91  * OFFSETS for Device 3 Function 4, as indicated on Xeon 5500 datasheet:
92  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
93  */
94
95 #define MC_TEST_ERR_RCV1        0x60
96   #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
97
98 #define MC_TEST_ERR_RCV0        0x64
99   #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
100   #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
101
102 /* OFFSETS for Device 3 Function 2, as indicated on Xeon 5500 datasheet */
103 #define MC_SSRCONTROL           0x48
104   #define SSR_MODE_DISABLE      0x00
105   #define SSR_MODE_ENABLE       0x01
106   #define SSR_MODE_MASK         0x03
107
108 #define MC_SCRUB_CONTROL        0x4c
109   #define STARTSCRUB            (1 << 24)
110   #define SCRUBINTERVAL_MASK    0xffffff
111
112 #define MC_COR_ECC_CNT_0        0x80
113 #define MC_COR_ECC_CNT_1        0x84
114 #define MC_COR_ECC_CNT_2        0x88
115 #define MC_COR_ECC_CNT_3        0x8c
116 #define MC_COR_ECC_CNT_4        0x90
117 #define MC_COR_ECC_CNT_5        0x94
118
119 #define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
120 #define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
121
122
123         /* OFFSETS for Devices 4,5 and 6 Function 0 */
124
125 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
126   #define THREE_DIMMS_PRESENT           (1 << 24)
127   #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
128   #define QUAD_RANK_PRESENT             (1 << 22)
129   #define REGISTERED_DIMM               (1 << 15)
130
131 #define MC_CHANNEL_MAPPER       0x60
132   #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
133   #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
134
135 #define MC_CHANNEL_RANK_PRESENT 0x7c
136   #define RANK_PRESENT_MASK             0xffff
137
138 #define MC_CHANNEL_ADDR_MATCH   0xf0
139 #define MC_CHANNEL_ERROR_MASK   0xf8
140 #define MC_CHANNEL_ERROR_INJECT 0xfc
141   #define INJECT_ADDR_PARITY    0x10
142   #define INJECT_ECC            0x08
143   #define MASK_CACHELINE        0x06
144   #define MASK_FULL_CACHELINE   0x06
145   #define MASK_MSB32_CACHELINE  0x04
146   #define MASK_LSB32_CACHELINE  0x02
147   #define NO_MASK_CACHELINE     0x00
148   #define REPEAT_EN             0x01
149
150         /* OFFSETS for Devices 4,5 and 6 Function 1 */
151
152 #define MC_DOD_CH_DIMM0         0x48
153 #define MC_DOD_CH_DIMM1         0x4c
154 #define MC_DOD_CH_DIMM2         0x50
155   #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
156   #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
157   #define DIMM_PRESENT_MASK     (1 << 9)
158   #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
159   #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
160   #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
161   #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
162   #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
163   #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
164   #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
165   #define MC_DOD_NUMCOL_MASK            3
166   #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
167
168 #define MC_RANK_PRESENT         0x7c
169
170 #define MC_SAG_CH_0     0x80
171 #define MC_SAG_CH_1     0x84
172 #define MC_SAG_CH_2     0x88
173 #define MC_SAG_CH_3     0x8c
174 #define MC_SAG_CH_4     0x90
175 #define MC_SAG_CH_5     0x94
176 #define MC_SAG_CH_6     0x98
177 #define MC_SAG_CH_7     0x9c
178
179 #define MC_RIR_LIMIT_CH_0       0x40
180 #define MC_RIR_LIMIT_CH_1       0x44
181 #define MC_RIR_LIMIT_CH_2       0x48
182 #define MC_RIR_LIMIT_CH_3       0x4C
183 #define MC_RIR_LIMIT_CH_4       0x50
184 #define MC_RIR_LIMIT_CH_5       0x54
185 #define MC_RIR_LIMIT_CH_6       0x58
186 #define MC_RIR_LIMIT_CH_7       0x5C
187 #define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
188
189 #define MC_RIR_WAY_CH           0x80
190   #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
191   #define MC_RIR_WAY_RANK_MASK          0x7
192
193 /*
194  * i7core structs
195  */
196
197 #define NUM_CHANS 3
198 #define MAX_DIMMS 3             /* Max DIMMS per channel */
199 #define MAX_MCR_FUNC  4
200 #define MAX_CHAN_FUNC 3
201
202 struct i7core_info {
203         u32     mc_control;
204         u32     mc_status;
205         u32     max_dod;
206         u32     ch_map;
207 };
208
209
210 struct i7core_inject {
211         int     enable;
212
213         u32     section;
214         u32     type;
215         u32     eccmask;
216
217         /* Error address mask */
218         int channel, dimm, rank, bank, page, col;
219 };
220
221 struct i7core_channel {
222         bool            is_3dimms_present;
223         bool            is_single_4rank;
224         bool            has_4rank;
225         u32             dimms;
226 };
227
228 struct pci_id_descr {
229         int                     dev;
230         int                     func;
231         int                     dev_id;
232         int                     optional;
233 };
234
235 struct pci_id_table {
236         const struct pci_id_descr       *descr;
237         int                             n_devs;
238 };
239
240 struct i7core_dev {
241         struct list_head        list;
242         u8                      socket;
243         struct pci_dev          **pdev;
244         int                     n_devs;
245         struct mem_ctl_info     *mci;
246 };
247
248 struct i7core_pvt {
249         struct device *addrmatch_dev, *chancounts_dev;
250
251         struct pci_dev  *pci_noncore;
252         struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
253         struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
254
255         struct i7core_dev *i7core_dev;
256
257         struct i7core_info      info;
258         struct i7core_inject    inject;
259         struct i7core_channel   channel[NUM_CHANS];
260
261         int             ce_count_available;
262
263                         /* ECC corrected errors counts per udimm */
264         unsigned long   udimm_ce_count[MAX_DIMMS];
265         int             udimm_last_ce_count[MAX_DIMMS];
266                         /* ECC corrected errors counts per rdimm */
267         unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
268         int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
269
270         bool            is_registered, enable_scrub;
271
272         /* DCLK Frequency used for computing scrub rate */
273         int                     dclk_freq;
274
275         /* Struct to control EDAC polling */
276         struct edac_pci_ctl_info *i7core_pci;
277 };
278
279 #define PCI_DESCR(device, function, device_id)  \
280         .dev = (device),                        \
281         .func = (function),                     \
282         .dev_id = (device_id)
283
284 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
285                 /* Memory controller */
286         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
287         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
288                         /* Exists only for RDIMM */
289         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
290         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
291
292                 /* Channel 0 */
293         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
294         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
295         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
296         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
297
298                 /* Channel 1 */
299         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
300         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
301         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
302         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
303
304                 /* Channel 2 */
305         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
306         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
307         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
308         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
309
310                 /* Generic Non-core registers */
311         /*
312          * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
313          * On Xeon 55xx, however, it has a different id (8086:2c40). So,
314          * the probing code needs to test for the other address in case of
315          * failure of this one
316          */
317         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
318
319 };
320
321 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
322         { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
323         { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
324         { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
325
326         { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
327         { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
328         { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
329         { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
330
331         { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
332         { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
333         { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
334         { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
335
336         /*
337          * This is the PCI device has an alternate address on some
338          * processors like Core i7 860
339          */
340         { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
341 };
342
343 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
344                 /* Memory controller */
345         { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
346         { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
347                         /* Exists only for RDIMM */
348         { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
349         { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
350
351                 /* Channel 0 */
352         { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
353         { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
354         { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
355         { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
356
357                 /* Channel 1 */
358         { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
359         { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
360         { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
361         { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
362
363                 /* Channel 2 */
364         { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
365         { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
366         { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
367         { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
368
369                 /* Generic Non-core registers */
370         { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
371
372 };
373
374 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
375 static const struct pci_id_table pci_dev_table[] = {
376         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
377         PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
378         PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
379         {0,}                    /* 0 terminated list. */
380 };
381
382 /*
383  *      pci_device_id   table for which devices we are looking for
384  */
385 static const struct pci_device_id i7core_pci_tbl[] = {
386         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
387         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
388         {0,}                    /* 0 terminated list. */
389 };
390
391 /****************************************************************************
392                         Ancillary status routines
393  ****************************************************************************/
394
395         /* MC_CONTROL bits */
396 #define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
397 #define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
398
399         /* MC_STATUS bits */
400 #define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
401 #define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
402
403         /* MC_MAX_DOD read functions */
404 static inline int numdimms(u32 dimms)
405 {
406         return (dimms & 0x3) + 1;
407 }
408
409 static inline int numrank(u32 rank)
410 {
411         static const int ranks[] = { 1, 2, 4, -EINVAL };
412
413         return ranks[rank & 0x3];
414 }
415
416 static inline int numbank(u32 bank)
417 {
418         static const int banks[] = { 4, 8, 16, -EINVAL };
419
420         return banks[bank & 0x3];
421 }
422
423 static inline int numrow(u32 row)
424 {
425         static const int rows[] = {
426                 1 << 12, 1 << 13, 1 << 14, 1 << 15,
427                 1 << 16, -EINVAL, -EINVAL, -EINVAL,
428         };
429
430         return rows[row & 0x7];
431 }
432
433 static inline int numcol(u32 col)
434 {
435         static const int cols[] = {
436                 1 << 10, 1 << 11, 1 << 12, -EINVAL,
437         };
438         return cols[col & 0x3];
439 }
440
441 static struct i7core_dev *get_i7core_dev(u8 socket)
442 {
443         struct i7core_dev *i7core_dev;
444
445         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
446                 if (i7core_dev->socket == socket)
447                         return i7core_dev;
448         }
449
450         return NULL;
451 }
452
453 static struct i7core_dev *alloc_i7core_dev(u8 socket,
454                                            const struct pci_id_table *table)
455 {
456         struct i7core_dev *i7core_dev;
457
458         i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
459         if (!i7core_dev)
460                 return NULL;
461
462         i7core_dev->pdev = kcalloc(table->n_devs, sizeof(*i7core_dev->pdev),
463                                    GFP_KERNEL);
464         if (!i7core_dev->pdev) {
465                 kfree(i7core_dev);
466                 return NULL;
467         }
468
469         i7core_dev->socket = socket;
470         i7core_dev->n_devs = table->n_devs;
471         list_add_tail(&i7core_dev->list, &i7core_edac_list);
472
473         return i7core_dev;
474 }
475
476 static void free_i7core_dev(struct i7core_dev *i7core_dev)
477 {
478         list_del(&i7core_dev->list);
479         kfree(i7core_dev->pdev);
480         kfree(i7core_dev);
481 }
482
483 /****************************************************************************
484                         Memory check routines
485  ****************************************************************************/
486
487 static int get_dimm_config(struct mem_ctl_info *mci)
488 {
489         struct i7core_pvt *pvt = mci->pvt_info;
490         struct pci_dev *pdev;
491         int i, j;
492         enum edac_type mode;
493         enum mem_type mtype;
494         struct dimm_info *dimm;
495
496         /* Get data from the MC register, function 0 */
497         pdev = pvt->pci_mcr[0];
498         if (!pdev)
499                 return -ENODEV;
500
501         /* Device 3 function 0 reads */
502         pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
503         pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
504         pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
505         pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
506
507         edac_dbg(0, "QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
508                  pvt->i7core_dev->socket, pvt->info.mc_control,
509                  pvt->info.mc_status, pvt->info.max_dod, pvt->info.ch_map);
510
511         if (ECC_ENABLED(pvt)) {
512                 edac_dbg(0, "ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
513                 if (ECCx8(pvt))
514                         mode = EDAC_S8ECD8ED;
515                 else
516                         mode = EDAC_S4ECD4ED;
517         } else {
518                 edac_dbg(0, "ECC disabled\n");
519                 mode = EDAC_NONE;
520         }
521
522         /* FIXME: need to handle the error codes */
523         edac_dbg(0, "DOD Max limits: DIMMS: %d, %d-ranked, %d-banked x%x x 0x%x\n",
524                  numdimms(pvt->info.max_dod),
525                  numrank(pvt->info.max_dod >> 2),
526                  numbank(pvt->info.max_dod >> 4),
527                  numrow(pvt->info.max_dod >> 6),
528                  numcol(pvt->info.max_dod >> 9));
529
530         for (i = 0; i < NUM_CHANS; i++) {
531                 u32 data, dimm_dod[3], value[8];
532
533                 if (!pvt->pci_ch[i][0])
534                         continue;
535
536                 if (!CH_ACTIVE(pvt, i)) {
537                         edac_dbg(0, "Channel %i is not active\n", i);
538                         continue;
539                 }
540                 if (CH_DISABLED(pvt, i)) {
541                         edac_dbg(0, "Channel %i is disabled\n", i);
542                         continue;
543                 }
544
545                 /* Devices 4-6 function 0 */
546                 pci_read_config_dword(pvt->pci_ch[i][0],
547                                 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
548
549
550                 if (data & THREE_DIMMS_PRESENT)
551                         pvt->channel[i].is_3dimms_present = true;
552
553                 if (data & SINGLE_QUAD_RANK_PRESENT)
554                         pvt->channel[i].is_single_4rank = true;
555
556                 if (data & QUAD_RANK_PRESENT)
557                         pvt->channel[i].has_4rank = true;
558
559                 if (data & REGISTERED_DIMM)
560                         mtype = MEM_RDDR3;
561                 else
562                         mtype = MEM_DDR3;
563
564                 /* Devices 4-6 function 1 */
565                 pci_read_config_dword(pvt->pci_ch[i][1],
566                                 MC_DOD_CH_DIMM0, &dimm_dod[0]);
567                 pci_read_config_dword(pvt->pci_ch[i][1],
568                                 MC_DOD_CH_DIMM1, &dimm_dod[1]);
569                 pci_read_config_dword(pvt->pci_ch[i][1],
570                                 MC_DOD_CH_DIMM2, &dimm_dod[2]);
571
572                 edac_dbg(0, "Ch%d phy rd%d, wr%d (0x%08x): %s%s%s%cDIMMs\n",
573                          i,
574                          RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
575                          data,
576                          pvt->channel[i].is_3dimms_present ? "3DIMMS " : "",
577                          pvt->channel[i].is_3dimms_present ? "SINGLE_4R " : "",
578                          pvt->channel[i].has_4rank ? "HAS_4R " : "",
579                          (data & REGISTERED_DIMM) ? 'R' : 'U');
580
581                 for (j = 0; j < 3; j++) {
582                         u32 banks, ranks, rows, cols;
583                         u32 size, npages;
584
585                         if (!DIMM_PRESENT(dimm_dod[j]))
586                                 continue;
587
588                         dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
589                                        i, j, 0);
590                         banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
591                         ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
592                         rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
593                         cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
594
595                         /* DDR3 has 8 I/O banks */
596                         size = (rows * cols * banks * ranks) >> (20 - 3);
597
598                         edac_dbg(0, "\tdimm %d %d MiB offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n",
599                                  j, size,
600                                  RANKOFFSET(dimm_dod[j]),
601                                  banks, ranks, rows, cols);
602
603                         npages = MiB_TO_PAGES(size);
604
605                         dimm->nr_pages = npages;
606
607                         switch (banks) {
608                         case 4:
609                                 dimm->dtype = DEV_X4;
610                                 break;
611                         case 8:
612                                 dimm->dtype = DEV_X8;
613                                 break;
614                         case 16:
615                                 dimm->dtype = DEV_X16;
616                                 break;
617                         default:
618                                 dimm->dtype = DEV_UNKNOWN;
619                         }
620
621                         snprintf(dimm->label, sizeof(dimm->label),
622                                  "CPU#%uChannel#%u_DIMM#%u",
623                                  pvt->i7core_dev->socket, i, j);
624                         dimm->grain = 8;
625                         dimm->edac_mode = mode;
626                         dimm->mtype = mtype;
627                 }
628
629                 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
630                 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
631                 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
632                 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
633                 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
634                 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
635                 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
636                 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
637                 edac_dbg(1, "\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
638                 for (j = 0; j < 8; j++)
639                         edac_dbg(1, "\t\t%#x\t%#x\t%#x\n",
640                                  (value[j] >> 27) & 0x1,
641                                  (value[j] >> 24) & 0x7,
642                                  (value[j] & ((1 << 24) - 1)));
643         }
644
645         return 0;
646 }
647
648 /****************************************************************************
649                         Error insertion routines
650  ****************************************************************************/
651
652 #define to_mci(k) container_of(k, struct mem_ctl_info, dev)
653
654 /* The i7core has independent error injection features per channel.
655    However, to have a simpler code, we don't allow enabling error injection
656    on more than one channel.
657    Also, since a change at an inject parameter will be applied only at enable,
658    we're disabling error injection on all write calls to the sysfs nodes that
659    controls the error code injection.
660  */
661 static int disable_inject(const struct mem_ctl_info *mci)
662 {
663         struct i7core_pvt *pvt = mci->pvt_info;
664
665         pvt->inject.enable = 0;
666
667         if (!pvt->pci_ch[pvt->inject.channel][0])
668                 return -ENODEV;
669
670         pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
671                                 MC_CHANNEL_ERROR_INJECT, 0);
672
673         return 0;
674 }
675
676 /*
677  * i7core inject inject.section
678  *
679  *      accept and store error injection inject.section value
680  *      bit 0 - refers to the lower 32-byte half cacheline
681  *      bit 1 - refers to the upper 32-byte half cacheline
682  */
683 static ssize_t i7core_inject_section_store(struct device *dev,
684                                            struct device_attribute *mattr,
685                                            const char *data, size_t count)
686 {
687         struct mem_ctl_info *mci = to_mci(dev);
688         struct i7core_pvt *pvt = mci->pvt_info;
689         unsigned long value;
690         int rc;
691
692         if (pvt->inject.enable)
693                 disable_inject(mci);
694
695         rc = kstrtoul(data, 10, &value);
696         if ((rc < 0) || (value > 3))
697                 return -EIO;
698
699         pvt->inject.section = (u32) value;
700         return count;
701 }
702
703 static ssize_t i7core_inject_section_show(struct device *dev,
704                                           struct device_attribute *mattr,
705                                           char *data)
706 {
707         struct mem_ctl_info *mci = to_mci(dev);
708         struct i7core_pvt *pvt = mci->pvt_info;
709         return sprintf(data, "0x%08x\n", pvt->inject.section);
710 }
711
712 /*
713  * i7core inject.type
714  *
715  *      accept and store error injection inject.section value
716  *      bit 0 - repeat enable - Enable error repetition
717  *      bit 1 - inject ECC error
718  *      bit 2 - inject parity error
719  */
720 static ssize_t i7core_inject_type_store(struct device *dev,
721                                         struct device_attribute *mattr,
722                                         const char *data, size_t count)
723 {
724         struct mem_ctl_info *mci = to_mci(dev);
725         struct i7core_pvt *pvt = mci->pvt_info;
726         unsigned long value;
727         int rc;
728
729         if (pvt->inject.enable)
730                 disable_inject(mci);
731
732         rc = kstrtoul(data, 10, &value);
733         if ((rc < 0) || (value > 7))
734                 return -EIO;
735
736         pvt->inject.type = (u32) value;
737         return count;
738 }
739
740 static ssize_t i7core_inject_type_show(struct device *dev,
741                                        struct device_attribute *mattr,
742                                        char *data)
743 {
744         struct mem_ctl_info *mci = to_mci(dev);
745         struct i7core_pvt *pvt = mci->pvt_info;
746
747         return sprintf(data, "0x%08x\n", pvt->inject.type);
748 }
749
750 /*
751  * i7core_inject_inject.eccmask_store
752  *
753  * The type of error (UE/CE) will depend on the inject.eccmask value:
754  *   Any bits set to a 1 will flip the corresponding ECC bit
755  *   Correctable errors can be injected by flipping 1 bit or the bits within
756  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
757  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
758  *   uncorrectable error to be injected.
759  */
760 static ssize_t i7core_inject_eccmask_store(struct device *dev,
761                                            struct device_attribute *mattr,
762                                            const char *data, size_t count)
763 {
764         struct mem_ctl_info *mci = to_mci(dev);
765         struct i7core_pvt *pvt = mci->pvt_info;
766         unsigned long value;
767         int rc;
768
769         if (pvt->inject.enable)
770                 disable_inject(mci);
771
772         rc = kstrtoul(data, 10, &value);
773         if (rc < 0)
774                 return -EIO;
775
776         pvt->inject.eccmask = (u32) value;
777         return count;
778 }
779
780 static ssize_t i7core_inject_eccmask_show(struct device *dev,
781                                           struct device_attribute *mattr,
782                                           char *data)
783 {
784         struct mem_ctl_info *mci = to_mci(dev);
785         struct i7core_pvt *pvt = mci->pvt_info;
786
787         return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
788 }
789
790 /*
791  * i7core_addrmatch
792  *
793  * The type of error (UE/CE) will depend on the inject.eccmask value:
794  *   Any bits set to a 1 will flip the corresponding ECC bit
795  *   Correctable errors can be injected by flipping 1 bit or the bits within
796  *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
797  *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
798  *   uncorrectable error to be injected.
799  */
800
801 #define DECLARE_ADDR_MATCH(param, limit)                        \
802 static ssize_t i7core_inject_store_##param(                     \
803         struct device *dev,                                     \
804         struct device_attribute *mattr,                         \
805         const char *data, size_t count)                         \
806 {                                                               \
807         struct mem_ctl_info *mci = dev_get_drvdata(dev);        \
808         struct i7core_pvt *pvt;                                 \
809         long value;                                             \
810         int rc;                                                 \
811                                                                 \
812         edac_dbg(1, "\n");                                      \
813         pvt = mci->pvt_info;                                    \
814                                                                 \
815         if (pvt->inject.enable)                                 \
816                 disable_inject(mci);                            \
817                                                                 \
818         if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
819                 value = -1;                                     \
820         else {                                                  \
821                 rc = kstrtoul(data, 10, &value);                \
822                 if ((rc < 0) || (value >= limit))               \
823                         return -EIO;                            \
824         }                                                       \
825                                                                 \
826         pvt->inject.param = value;                              \
827                                                                 \
828         return count;                                           \
829 }                                                               \
830                                                                 \
831 static ssize_t i7core_inject_show_##param(                      \
832         struct device *dev,                                     \
833         struct device_attribute *mattr,                         \
834         char *data)                                             \
835 {                                                               \
836         struct mem_ctl_info *mci = dev_get_drvdata(dev);        \
837         struct i7core_pvt *pvt;                                 \
838                                                                 \
839         pvt = mci->pvt_info;                                    \
840         edac_dbg(1, "pvt=%p\n", pvt);                           \
841         if (pvt->inject.param < 0)                              \
842                 return sprintf(data, "any\n");                  \
843         else                                                    \
844                 return sprintf(data, "%d\n", pvt->inject.param);\
845 }
846
847 #define ATTR_ADDR_MATCH(param)                                  \
848         static DEVICE_ATTR(param, S_IRUGO | S_IWUSR,            \
849                     i7core_inject_show_##param,                 \
850                     i7core_inject_store_##param)
851
852 DECLARE_ADDR_MATCH(channel, 3);
853 DECLARE_ADDR_MATCH(dimm, 3);
854 DECLARE_ADDR_MATCH(rank, 4);
855 DECLARE_ADDR_MATCH(bank, 32);
856 DECLARE_ADDR_MATCH(page, 0x10000);
857 DECLARE_ADDR_MATCH(col, 0x4000);
858
859 ATTR_ADDR_MATCH(channel);
860 ATTR_ADDR_MATCH(dimm);
861 ATTR_ADDR_MATCH(rank);
862 ATTR_ADDR_MATCH(bank);
863 ATTR_ADDR_MATCH(page);
864 ATTR_ADDR_MATCH(col);
865
866 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
867 {
868         u32 read;
869         int count;
870
871         edac_dbg(0, "setting pci %02x:%02x.%x reg=%02x value=%08x\n",
872                  dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
873                  where, val);
874
875         for (count = 0; count < 10; count++) {
876                 if (count)
877                         msleep(100);
878                 pci_write_config_dword(dev, where, val);
879                 pci_read_config_dword(dev, where, &read);
880
881                 if (read == val)
882                         return 0;
883         }
884
885         i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
886                 "write=%08x. Read=%08x\n",
887                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
888                 where, val, read);
889
890         return -EINVAL;
891 }
892
893 /*
894  * This routine prepares the Memory Controller for error injection.
895  * The error will be injected when some process tries to write to the
896  * memory that matches the given criteria.
897  * The criteria can be set in terms of a mask where dimm, rank, bank, page
898  * and col can be specified.
899  * A -1 value for any of the mask items will make the MCU to ignore
900  * that matching criteria for error injection.
901  *
902  * It should be noticed that the error will only happen after a write operation
903  * on a memory that matches the condition. if REPEAT_EN is not enabled at
904  * inject mask, then it will produce just one error. Otherwise, it will repeat
905  * until the injectmask would be cleaned.
906  *
907  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
908  *    is reliable enough to check if the MC is using the
909  *    three channels. However, this is not clear at the datasheet.
910  */
911 static ssize_t i7core_inject_enable_store(struct device *dev,
912                                           struct device_attribute *mattr,
913                                           const char *data, size_t count)
914 {
915         struct mem_ctl_info *mci = to_mci(dev);
916         struct i7core_pvt *pvt = mci->pvt_info;
917         u32 injectmask;
918         u64 mask = 0;
919         int  rc;
920         long enable;
921
922         if (!pvt->pci_ch[pvt->inject.channel][0])
923                 return 0;
924
925         rc = kstrtoul(data, 10, &enable);
926         if ((rc < 0))
927                 return 0;
928
929         if (enable) {
930                 pvt->inject.enable = 1;
931         } else {
932                 disable_inject(mci);
933                 return count;
934         }
935
936         /* Sets pvt->inject.dimm mask */
937         if (pvt->inject.dimm < 0)
938                 mask |= 1LL << 41;
939         else {
940                 if (pvt->channel[pvt->inject.channel].dimms > 2)
941                         mask |= (pvt->inject.dimm & 0x3LL) << 35;
942                 else
943                         mask |= (pvt->inject.dimm & 0x1LL) << 36;
944         }
945
946         /* Sets pvt->inject.rank mask */
947         if (pvt->inject.rank < 0)
948                 mask |= 1LL << 40;
949         else {
950                 if (pvt->channel[pvt->inject.channel].dimms > 2)
951                         mask |= (pvt->inject.rank & 0x1LL) << 34;
952                 else
953                         mask |= (pvt->inject.rank & 0x3LL) << 34;
954         }
955
956         /* Sets pvt->inject.bank mask */
957         if (pvt->inject.bank < 0)
958                 mask |= 1LL << 39;
959         else
960                 mask |= (pvt->inject.bank & 0x15LL) << 30;
961
962         /* Sets pvt->inject.page mask */
963         if (pvt->inject.page < 0)
964                 mask |= 1LL << 38;
965         else
966                 mask |= (pvt->inject.page & 0xffff) << 14;
967
968         /* Sets pvt->inject.column mask */
969         if (pvt->inject.col < 0)
970                 mask |= 1LL << 37;
971         else
972                 mask |= (pvt->inject.col & 0x3fff);
973
974         /*
975          * bit    0: REPEAT_EN
976          * bits 1-2: MASK_HALF_CACHELINE
977          * bit    3: INJECT_ECC
978          * bit    4: INJECT_ADDR_PARITY
979          */
980
981         injectmask = (pvt->inject.type & 1) |
982                      (pvt->inject.section & 0x3) << 1 |
983                      (pvt->inject.type & 0x6) << (3 - 1);
984
985         /* Unlock writes to registers - this register is write only */
986         pci_write_config_dword(pvt->pci_noncore,
987                                MC_CFG_CONTROL, 0x2);
988
989         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
990                                MC_CHANNEL_ADDR_MATCH, mask);
991         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
992                                MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
993
994         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
995                                MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
996
997         write_and_test(pvt->pci_ch[pvt->inject.channel][0],
998                                MC_CHANNEL_ERROR_INJECT, injectmask);
999
1000         /*
1001          * This is something undocumented, based on my tests
1002          * Without writing 8 to this register, errors aren't injected. Not sure
1003          * why.
1004          */
1005         pci_write_config_dword(pvt->pci_noncore,
1006                                MC_CFG_CONTROL, 8);
1007
1008         edac_dbg(0, "Error inject addr match 0x%016llx, ecc 0x%08x, inject 0x%08x\n",
1009                  mask, pvt->inject.eccmask, injectmask);
1010
1011
1012         return count;
1013 }
1014
1015 static ssize_t i7core_inject_enable_show(struct device *dev,
1016                                          struct device_attribute *mattr,
1017                                          char *data)
1018 {
1019         struct mem_ctl_info *mci = to_mci(dev);
1020         struct i7core_pvt *pvt = mci->pvt_info;
1021         u32 injectmask;
1022
1023         if (!pvt->pci_ch[pvt->inject.channel][0])
1024                 return 0;
1025
1026         pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1027                                MC_CHANNEL_ERROR_INJECT, &injectmask);
1028
1029         edac_dbg(0, "Inject error read: 0x%018x\n", injectmask);
1030
1031         if (injectmask & 0x0c)
1032                 pvt->inject.enable = 1;
1033
1034         return sprintf(data, "%d\n", pvt->inject.enable);
1035 }
1036
1037 #define DECLARE_COUNTER(param)                                  \
1038 static ssize_t i7core_show_counter_##param(                     \
1039         struct device *dev,                                     \
1040         struct device_attribute *mattr,                         \
1041         char *data)                                             \
1042 {                                                               \
1043         struct mem_ctl_info *mci = dev_get_drvdata(dev);        \
1044         struct i7core_pvt *pvt = mci->pvt_info;                 \
1045                                                                 \
1046         edac_dbg(1, "\n");                                      \
1047         if (!pvt->ce_count_available || (pvt->is_registered))   \
1048                 return sprintf(data, "data unavailable\n");     \
1049         return sprintf(data, "%lu\n",                           \
1050                         pvt->udimm_ce_count[param]);            \
1051 }
1052
1053 #define ATTR_COUNTER(param)                                     \
1054         static DEVICE_ATTR(udimm##param, S_IRUGO | S_IWUSR,     \
1055                     i7core_show_counter_##param,                \
1056                     NULL)
1057
1058 DECLARE_COUNTER(0);
1059 DECLARE_COUNTER(1);
1060 DECLARE_COUNTER(2);
1061
1062 ATTR_COUNTER(0);
1063 ATTR_COUNTER(1);
1064 ATTR_COUNTER(2);
1065
1066 /*
1067  * inject_addrmatch device sysfs struct
1068  */
1069
1070 static struct attribute *i7core_addrmatch_attrs[] = {
1071         &dev_attr_channel.attr,
1072         &dev_attr_dimm.attr,
1073         &dev_attr_rank.attr,
1074         &dev_attr_bank.attr,
1075         &dev_attr_page.attr,
1076         &dev_attr_col.attr,
1077         NULL
1078 };
1079
1080 static const struct attribute_group addrmatch_grp = {
1081         .attrs  = i7core_addrmatch_attrs,
1082 };
1083
1084 static const struct attribute_group *addrmatch_groups[] = {
1085         &addrmatch_grp,
1086         NULL
1087 };
1088
1089 static void addrmatch_release(struct device *device)
1090 {
1091         edac_dbg(1, "Releasing device %s\n", dev_name(device));
1092         kfree(device);
1093 }
1094
1095 static const struct device_type addrmatch_type = {
1096         .groups         = addrmatch_groups,
1097         .release        = addrmatch_release,
1098 };
1099
1100 /*
1101  * all_channel_counts sysfs struct
1102  */
1103
1104 static struct attribute *i7core_udimm_counters_attrs[] = {
1105         &dev_attr_udimm0.attr,
1106         &dev_attr_udimm1.attr,
1107         &dev_attr_udimm2.attr,
1108         NULL
1109 };
1110
1111 static const struct attribute_group all_channel_counts_grp = {
1112         .attrs  = i7core_udimm_counters_attrs,
1113 };
1114
1115 static const struct attribute_group *all_channel_counts_groups[] = {
1116         &all_channel_counts_grp,
1117         NULL
1118 };
1119
1120 static void all_channel_counts_release(struct device *device)
1121 {
1122         edac_dbg(1, "Releasing device %s\n", dev_name(device));
1123         kfree(device);
1124 }
1125
1126 static const struct device_type all_channel_counts_type = {
1127         .groups         = all_channel_counts_groups,
1128         .release        = all_channel_counts_release,
1129 };
1130
1131 /*
1132  * inject sysfs attributes
1133  */
1134
1135 static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR,
1136                    i7core_inject_section_show, i7core_inject_section_store);
1137
1138 static DEVICE_ATTR(inject_type, S_IRUGO | S_IWUSR,
1139                    i7core_inject_type_show, i7core_inject_type_store);
1140
1141
1142 static DEVICE_ATTR(inject_eccmask, S_IRUGO | S_IWUSR,
1143                    i7core_inject_eccmask_show, i7core_inject_eccmask_store);
1144
1145 static DEVICE_ATTR(inject_enable, S_IRUGO | S_IWUSR,
1146                    i7core_inject_enable_show, i7core_inject_enable_store);
1147
1148 static struct attribute *i7core_dev_attrs[] = {
1149         &dev_attr_inject_section.attr,
1150         &dev_attr_inject_type.attr,
1151         &dev_attr_inject_eccmask.attr,
1152         &dev_attr_inject_enable.attr,
1153         NULL
1154 };
1155
1156 ATTRIBUTE_GROUPS(i7core_dev);
1157
1158 static int i7core_create_sysfs_devices(struct mem_ctl_info *mci)
1159 {
1160         struct i7core_pvt *pvt = mci->pvt_info;
1161         int rc;
1162
1163         pvt->addrmatch_dev = kzalloc(sizeof(*pvt->addrmatch_dev), GFP_KERNEL);
1164         if (!pvt->addrmatch_dev)
1165                 return -ENOMEM;
1166
1167         pvt->addrmatch_dev->type = &addrmatch_type;
1168         pvt->addrmatch_dev->bus = mci->dev.bus;
1169         device_initialize(pvt->addrmatch_dev);
1170         pvt->addrmatch_dev->parent = &mci->dev;
1171         dev_set_name(pvt->addrmatch_dev, "inject_addrmatch");
1172         dev_set_drvdata(pvt->addrmatch_dev, mci);
1173
1174         edac_dbg(1, "creating %s\n", dev_name(pvt->addrmatch_dev));
1175
1176         rc = device_add(pvt->addrmatch_dev);
1177         if (rc < 0)
1178                 goto err_put_addrmatch;
1179
1180         if (!pvt->is_registered) {
1181                 pvt->chancounts_dev = kzalloc(sizeof(*pvt->chancounts_dev),
1182                                               GFP_KERNEL);
1183                 if (!pvt->chancounts_dev) {
1184                         rc = -ENOMEM;
1185                         goto err_del_addrmatch;
1186                 }
1187
1188                 pvt->chancounts_dev->type = &all_channel_counts_type;
1189                 pvt->chancounts_dev->bus = mci->dev.bus;
1190                 device_initialize(pvt->chancounts_dev);
1191                 pvt->chancounts_dev->parent = &mci->dev;
1192                 dev_set_name(pvt->chancounts_dev, "all_channel_counts");
1193                 dev_set_drvdata(pvt->chancounts_dev, mci);
1194
1195                 edac_dbg(1, "creating %s\n", dev_name(pvt->chancounts_dev));
1196
1197                 rc = device_add(pvt->chancounts_dev);
1198                 if (rc < 0)
1199                         goto err_put_chancounts;
1200         }
1201         return 0;
1202
1203 err_put_chancounts:
1204         put_device(pvt->chancounts_dev);
1205 err_del_addrmatch:
1206         device_del(pvt->addrmatch_dev);
1207 err_put_addrmatch:
1208         put_device(pvt->addrmatch_dev);
1209
1210         return rc;
1211 }
1212
1213 static void i7core_delete_sysfs_devices(struct mem_ctl_info *mci)
1214 {
1215         struct i7core_pvt *pvt = mci->pvt_info;
1216
1217         edac_dbg(1, "\n");
1218
1219         if (!pvt->is_registered) {
1220                 device_del(pvt->chancounts_dev);
1221                 put_device(pvt->chancounts_dev);
1222         }
1223         device_del(pvt->addrmatch_dev);
1224         put_device(pvt->addrmatch_dev);
1225 }
1226
1227 /****************************************************************************
1228         Device initialization routines: put/get, init/exit
1229  ****************************************************************************/
1230
1231 /*
1232  *      i7core_put_all_devices  'put' all the devices that we have
1233  *                              reserved via 'get'
1234  */
1235 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1236 {
1237         int i;
1238
1239         edac_dbg(0, "\n");
1240         for (i = 0; i < i7core_dev->n_devs; i++) {
1241                 struct pci_dev *pdev = i7core_dev->pdev[i];
1242                 if (!pdev)
1243                         continue;
1244                 edac_dbg(0, "Removing dev %02x:%02x.%d\n",
1245                          pdev->bus->number,
1246                          PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1247                 pci_dev_put(pdev);
1248         }
1249 }
1250
1251 static void i7core_put_all_devices(void)
1252 {
1253         struct i7core_dev *i7core_dev, *tmp;
1254
1255         list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1256                 i7core_put_devices(i7core_dev);
1257                 free_i7core_dev(i7core_dev);
1258         }
1259 }
1260
1261 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1262 {
1263         struct pci_dev *pdev = NULL;
1264         int i;
1265
1266         /*
1267          * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1268          * aren't announced by acpi. So, we need to use a legacy scan probing
1269          * to detect them
1270          */
1271         while (table && table->descr) {
1272                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1273                 if (unlikely(!pdev)) {
1274                         for (i = 0; i < MAX_SOCKET_BUSES; i++)
1275                                 pcibios_scan_specific_bus(255-i);
1276                 }
1277                 pci_dev_put(pdev);
1278                 table++;
1279         }
1280 }
1281
1282 static unsigned i7core_pci_lastbus(void)
1283 {
1284         int last_bus = 0, bus;
1285         struct pci_bus *b = NULL;
1286
1287         while ((b = pci_find_next_bus(b)) != NULL) {
1288                 bus = b->number;
1289                 edac_dbg(0, "Found bus %d\n", bus);
1290                 if (bus > last_bus)
1291                         last_bus = bus;
1292         }
1293
1294         edac_dbg(0, "Last bus %d\n", last_bus);
1295
1296         return last_bus;
1297 }
1298
1299 /*
1300  *      i7core_get_all_devices  Find and perform 'get' operation on the MCH's
1301  *                      device/functions we want to reference for this driver
1302  *
1303  *                      Need to 'get' device 16 func 1 and func 2
1304  */
1305 static int i7core_get_onedevice(struct pci_dev **prev,
1306                                 const struct pci_id_table *table,
1307                                 const unsigned devno,
1308                                 const unsigned last_bus)
1309 {
1310         struct i7core_dev *i7core_dev;
1311         const struct pci_id_descr *dev_descr = &table->descr[devno];
1312
1313         struct pci_dev *pdev = NULL;
1314         u8 bus = 0;
1315         u8 socket = 0;
1316
1317         pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1318                               dev_descr->dev_id, *prev);
1319
1320         /*
1321          * On Xeon 55xx, the Intel QuickPath Arch Generic Non-core regs
1322          * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1323          * to probe for the alternate address in case of failure
1324          */
1325         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) {
1326                 pci_dev_get(*prev);     /* pci_get_device will put it */
1327                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1328                                       PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1329         }
1330
1331         if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE &&
1332             !pdev) {
1333                 pci_dev_get(*prev);     /* pci_get_device will put it */
1334                 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1335                                       PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1336                                       *prev);
1337         }
1338
1339         if (!pdev) {
1340                 if (*prev) {
1341                         *prev = pdev;
1342                         return 0;
1343                 }
1344
1345                 if (dev_descr->optional)
1346                         return 0;
1347
1348                 if (devno == 0)
1349                         return -ENODEV;
1350
1351                 i7core_printk(KERN_INFO,
1352                         "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1353                         dev_descr->dev, dev_descr->func,
1354                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1355
1356                 /* End of list, leave */
1357                 return -ENODEV;
1358         }
1359         bus = pdev->bus->number;
1360
1361         socket = last_bus - bus;
1362
1363         i7core_dev = get_i7core_dev(socket);
1364         if (!i7core_dev) {
1365                 i7core_dev = alloc_i7core_dev(socket, table);
1366                 if (!i7core_dev) {
1367                         pci_dev_put(pdev);
1368                         return -ENOMEM;
1369                 }
1370         }
1371
1372         if (i7core_dev->pdev[devno]) {
1373                 i7core_printk(KERN_ERR,
1374                         "Duplicated device for "
1375                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1376                         bus, dev_descr->dev, dev_descr->func,
1377                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1378                 pci_dev_put(pdev);
1379                 return -ENODEV;
1380         }
1381
1382         i7core_dev->pdev[devno] = pdev;
1383
1384         /* Sanity check */
1385         if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1386                         PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1387                 i7core_printk(KERN_ERR,
1388                         "Device PCI ID %04x:%04x "
1389                         "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1390                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1391                         bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1392                         bus, dev_descr->dev, dev_descr->func);
1393                 return -ENODEV;
1394         }
1395
1396         /* Be sure that the device is enabled */
1397         if (unlikely(pci_enable_device(pdev) < 0)) {
1398                 i7core_printk(KERN_ERR,
1399                         "Couldn't enable "
1400                         "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1401                         bus, dev_descr->dev, dev_descr->func,
1402                         PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1403                 return -ENODEV;
1404         }
1405
1406         edac_dbg(0, "Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1407                  socket, bus, dev_descr->dev,
1408                  dev_descr->func,
1409                  PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1410
1411         /*
1412          * As stated on drivers/pci/search.c, the reference count for
1413          * @from is always decremented if it is not %NULL. So, as we need
1414          * to get all devices up to null, we need to do a get for the device
1415          */
1416         pci_dev_get(pdev);
1417
1418         *prev = pdev;
1419
1420         return 0;
1421 }
1422
1423 static int i7core_get_all_devices(void)
1424 {
1425         int i, rc, last_bus;
1426         struct pci_dev *pdev = NULL;
1427         const struct pci_id_table *table = pci_dev_table;
1428
1429         last_bus = i7core_pci_lastbus();
1430
1431         while (table && table->descr) {
1432                 for (i = 0; i < table->n_devs; i++) {
1433                         pdev = NULL;
1434                         do {
1435                                 rc = i7core_get_onedevice(&pdev, table, i,
1436                                                           last_bus);
1437                                 if (rc < 0) {
1438                                         if (i == 0) {
1439                                                 i = table->n_devs;
1440                                                 break;
1441                                         }
1442                                         i7core_put_all_devices();
1443                                         return -ENODEV;
1444                                 }
1445                         } while (pdev);
1446                 }
1447                 table++;
1448         }
1449
1450         return 0;
1451 }
1452
1453 static int mci_bind_devs(struct mem_ctl_info *mci,
1454                          struct i7core_dev *i7core_dev)
1455 {
1456         struct i7core_pvt *pvt = mci->pvt_info;
1457         struct pci_dev *pdev;
1458         int i, func, slot;
1459         char *family;
1460
1461         pvt->is_registered = false;
1462         pvt->enable_scrub  = false;
1463         for (i = 0; i < i7core_dev->n_devs; i++) {
1464                 pdev = i7core_dev->pdev[i];
1465                 if (!pdev)
1466                         continue;
1467
1468                 func = PCI_FUNC(pdev->devfn);
1469                 slot = PCI_SLOT(pdev->devfn);
1470                 if (slot == 3) {
1471                         if (unlikely(func > MAX_MCR_FUNC))
1472                                 goto error;
1473                         pvt->pci_mcr[func] = pdev;
1474                 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1475                         if (unlikely(func > MAX_CHAN_FUNC))
1476                                 goto error;
1477                         pvt->pci_ch[slot - 4][func] = pdev;
1478                 } else if (!slot && !func) {
1479                         pvt->pci_noncore = pdev;
1480
1481                         /* Detect the processor family */
1482                         switch (pdev->device) {
1483                         case PCI_DEVICE_ID_INTEL_I7_NONCORE:
1484                                 family = "Xeon 35xx/ i7core";
1485                                 pvt->enable_scrub = false;
1486                                 break;
1487                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
1488                                 family = "i7-800/i5-700";
1489                                 pvt->enable_scrub = false;
1490                                 break;
1491                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
1492                                 family = "Xeon 34xx";
1493                                 pvt->enable_scrub = false;
1494                                 break;
1495                         case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
1496                                 family = "Xeon 55xx";
1497                                 pvt->enable_scrub = true;
1498                                 break;
1499                         case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
1500                                 family = "Xeon 56xx / i7-900";
1501                                 pvt->enable_scrub = true;
1502                                 break;
1503                         default:
1504                                 family = "unknown";
1505                                 pvt->enable_scrub = false;
1506                         }
1507                         edac_dbg(0, "Detected a processor type %s\n", family);
1508                 } else
1509                         goto error;
1510
1511                 edac_dbg(0, "Associated fn %d.%d, dev = %p, socket %d\n",
1512                          PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1513                          pdev, i7core_dev->socket);
1514
1515                 if (PCI_SLOT(pdev->devfn) == 3 &&
1516                         PCI_FUNC(pdev->devfn) == 2)
1517                         pvt->is_registered = true;
1518         }
1519
1520         return 0;
1521
1522 error:
1523         i7core_printk(KERN_ERR, "Device %d, function %d "
1524                       "is out of the expected range\n",
1525                       slot, func);
1526         return -EINVAL;
1527 }
1528
1529 /****************************************************************************
1530                         Error check routines
1531  ****************************************************************************/
1532
1533 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1534                                          const int chan,
1535                                          const int new0,
1536                                          const int new1,
1537                                          const int new2)
1538 {
1539         struct i7core_pvt *pvt = mci->pvt_info;
1540         int add0 = 0, add1 = 0, add2 = 0;
1541         /* Updates CE counters if it is not the first time here */
1542         if (pvt->ce_count_available) {
1543                 /* Updates CE counters */
1544
1545                 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1546                 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1547                 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1548
1549                 if (add2 < 0)
1550                         add2 += 0x7fff;
1551                 pvt->rdimm_ce_count[chan][2] += add2;
1552
1553                 if (add1 < 0)
1554                         add1 += 0x7fff;
1555                 pvt->rdimm_ce_count[chan][1] += add1;
1556
1557                 if (add0 < 0)
1558                         add0 += 0x7fff;
1559                 pvt->rdimm_ce_count[chan][0] += add0;
1560         } else
1561                 pvt->ce_count_available = 1;
1562
1563         /* Store the new values */
1564         pvt->rdimm_last_ce_count[chan][2] = new2;
1565         pvt->rdimm_last_ce_count[chan][1] = new1;
1566         pvt->rdimm_last_ce_count[chan][0] = new0;
1567
1568         /*updated the edac core */
1569         if (add0 != 0)
1570                 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add0,
1571                                      0, 0, 0,
1572                                      chan, 0, -1, "error", "");
1573         if (add1 != 0)
1574                 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add1,
1575                                      0, 0, 0,
1576                                      chan, 1, -1, "error", "");
1577         if (add2 != 0)
1578                 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add2,
1579                                      0, 0, 0,
1580                                      chan, 2, -1, "error", "");
1581 }
1582
1583 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1584 {
1585         struct i7core_pvt *pvt = mci->pvt_info;
1586         u32 rcv[3][2];
1587         int i, new0, new1, new2;
1588
1589         /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1590         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1591                                                                 &rcv[0][0]);
1592         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1593                                                                 &rcv[0][1]);
1594         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1595                                                                 &rcv[1][0]);
1596         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1597                                                                 &rcv[1][1]);
1598         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1599                                                                 &rcv[2][0]);
1600         pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1601                                                                 &rcv[2][1]);
1602         for (i = 0 ; i < 3; i++) {
1603                 edac_dbg(3, "MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1604                          (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1605                 /*if the channel has 3 dimms*/
1606                 if (pvt->channel[i].dimms > 2) {
1607                         new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1608                         new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1609                         new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1610                 } else {
1611                         new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1612                                         DIMM_BOT_COR_ERR(rcv[i][0]);
1613                         new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1614                                         DIMM_BOT_COR_ERR(rcv[i][1]);
1615                         new2 = 0;
1616                 }
1617
1618                 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1619         }
1620 }
1621
1622 /* This function is based on the device 3 function 4 registers as described on:
1623  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1624  *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1625  * also available at:
1626  *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1627  */
1628 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1629 {
1630         struct i7core_pvt *pvt = mci->pvt_info;
1631         u32 rcv1, rcv0;
1632         int new0, new1, new2;
1633
1634         if (!pvt->pci_mcr[4]) {
1635                 edac_dbg(0, "MCR registers not found\n");
1636                 return;
1637         }
1638
1639         /* Corrected test errors */
1640         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1641         pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1642
1643         /* Store the new values */
1644         new2 = DIMM2_COR_ERR(rcv1);
1645         new1 = DIMM1_COR_ERR(rcv0);
1646         new0 = DIMM0_COR_ERR(rcv0);
1647
1648         /* Updates CE counters if it is not the first time here */
1649         if (pvt->ce_count_available) {
1650                 /* Updates CE counters */
1651                 int add0, add1, add2;
1652
1653                 add2 = new2 - pvt->udimm_last_ce_count[2];
1654                 add1 = new1 - pvt->udimm_last_ce_count[1];
1655                 add0 = new0 - pvt->udimm_last_ce_count[0];
1656
1657                 if (add2 < 0)
1658                         add2 += 0x7fff;
1659                 pvt->udimm_ce_count[2] += add2;
1660
1661                 if (add1 < 0)
1662                         add1 += 0x7fff;
1663                 pvt->udimm_ce_count[1] += add1;
1664
1665                 if (add0 < 0)
1666                         add0 += 0x7fff;
1667                 pvt->udimm_ce_count[0] += add0;
1668
1669                 if (add0 | add1 | add2)
1670                         i7core_printk(KERN_ERR, "New Corrected error(s): "
1671                                       "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1672                                       add0, add1, add2);
1673         } else
1674                 pvt->ce_count_available = 1;
1675
1676         /* Store the new values */
1677         pvt->udimm_last_ce_count[2] = new2;
1678         pvt->udimm_last_ce_count[1] = new1;
1679         pvt->udimm_last_ce_count[0] = new0;
1680 }
1681
1682 /*
1683  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1684  * Architectures Software Developer’s Manual Volume 3B.
1685  * Nehalem are defined as family 0x06, model 0x1a
1686  *
1687  * The MCA registers used here are the following ones:
1688  *     struct mce field MCA Register
1689  *     m->status        MSR_IA32_MC8_STATUS
1690  *     m->addr          MSR_IA32_MC8_ADDR
1691  *     m->misc          MSR_IA32_MC8_MISC
1692  * In the case of Nehalem, the error information is masked at .status and .misc
1693  * fields
1694  */
1695 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1696                                     const struct mce *m)
1697 {
1698         struct i7core_pvt *pvt = mci->pvt_info;
1699         char *optype, *err;
1700         enum hw_event_mc_err_type tp_event;
1701         unsigned long error = m->status & 0x1ff0000l;
1702         bool uncorrected_error = m->mcgstatus & 1ll << 61;
1703         bool ripv = m->mcgstatus & 1;
1704         u32 optypenum = (m->status >> 4) & 0x07;
1705         u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1706         u32 dimm = (m->misc >> 16) & 0x3;
1707         u32 channel = (m->misc >> 18) & 0x3;
1708         u32 syndrome = m->misc >> 32;
1709         u32 errnum = find_first_bit(&error, 32);
1710
1711         if (uncorrected_error) {
1712                 core_err_cnt = 1;
1713                 if (ripv)
1714                         tp_event = HW_EVENT_ERR_FATAL;
1715                 else
1716                         tp_event = HW_EVENT_ERR_UNCORRECTED;
1717         } else {
1718                 tp_event = HW_EVENT_ERR_CORRECTED;
1719         }
1720
1721         switch (optypenum) {
1722         case 0:
1723                 optype = "generic undef request";
1724                 break;
1725         case 1:
1726                 optype = "read error";
1727                 break;
1728         case 2:
1729                 optype = "write error";
1730                 break;
1731         case 3:
1732                 optype = "addr/cmd error";
1733                 break;
1734         case 4:
1735                 optype = "scrubbing error";
1736                 break;
1737         default:
1738                 optype = "reserved";
1739                 break;
1740         }
1741
1742         switch (errnum) {
1743         case 16:
1744                 err = "read ECC error";
1745                 break;
1746         case 17:
1747                 err = "RAS ECC error";
1748                 break;
1749         case 18:
1750                 err = "write parity error";
1751                 break;
1752         case 19:
1753                 err = "redundancy loss";
1754                 break;
1755         case 20:
1756                 err = "reserved";
1757                 break;
1758         case 21:
1759                 err = "memory range error";
1760                 break;
1761         case 22:
1762                 err = "RTID out of range";
1763                 break;
1764         case 23:
1765                 err = "address parity error";
1766                 break;
1767         case 24:
1768                 err = "byte enable parity error";
1769                 break;
1770         default:
1771                 err = "unknown";
1772         }
1773
1774         /*
1775          * Call the helper to output message
1776          * FIXME: what to do if core_err_cnt > 1? Currently, it generates
1777          * only one event
1778          */
1779         if (uncorrected_error || !pvt->is_registered)
1780                 edac_mc_handle_error(tp_event, mci, core_err_cnt,
1781                                      m->addr >> PAGE_SHIFT,
1782                                      m->addr & ~PAGE_MASK,
1783                                      syndrome,
1784                                      channel, dimm, -1,
1785                                      err, optype);
1786 }
1787
1788 /*
1789  *      i7core_check_error      Retrieve and process errors reported by the
1790  *                              hardware. Called by the Core module.
1791  */
1792 static void i7core_check_error(struct mem_ctl_info *mci, struct mce *m)
1793 {
1794         struct i7core_pvt *pvt = mci->pvt_info;
1795
1796         i7core_mce_output_error(mci, m);
1797
1798         /*
1799          * Now, let's increment CE error counts
1800          */
1801         if (!pvt->is_registered)
1802                 i7core_udimm_check_mc_ecc_err(mci);
1803         else
1804                 i7core_rdimm_check_mc_ecc_err(mci);
1805 }
1806
1807 /*
1808  * Check that logging is enabled and that this is the right type
1809  * of error for us to handle.
1810  */
1811 static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
1812                                   void *data)
1813 {
1814         struct mce *mce = (struct mce *)data;
1815         struct i7core_dev *i7_dev;
1816         struct mem_ctl_info *mci;
1817
1818         i7_dev = get_i7core_dev(mce->socketid);
1819         if (!i7_dev)
1820                 return NOTIFY_DONE;
1821
1822         mci = i7_dev->mci;
1823
1824         /*
1825          * Just let mcelog handle it if the error is
1826          * outside the memory controller
1827          */
1828         if (((mce->status & 0xffff) >> 7) != 1)
1829                 return NOTIFY_DONE;
1830
1831         /* Bank 8 registers are the only ones that we know how to handle */
1832         if (mce->bank != 8)
1833                 return NOTIFY_DONE;
1834
1835         i7core_check_error(mci, mce);
1836
1837         /* Advise mcelog that the errors were handled */
1838         return NOTIFY_STOP;
1839 }
1840
1841 static struct notifier_block i7_mce_dec = {
1842         .notifier_call  = i7core_mce_check_error,
1843         .priority       = MCE_PRIO_EDAC,
1844 };
1845
1846 struct memdev_dmi_entry {
1847         u8 type;
1848         u8 length;
1849         u16 handle;
1850         u16 phys_mem_array_handle;
1851         u16 mem_err_info_handle;
1852         u16 total_width;
1853         u16 data_width;
1854         u16 size;
1855         u8 form;
1856         u8 device_set;
1857         u8 device_locator;
1858         u8 bank_locator;
1859         u8 memory_type;
1860         u16 type_detail;
1861         u16 speed;
1862         u8 manufacturer;
1863         u8 serial_number;
1864         u8 asset_tag;
1865         u8 part_number;
1866         u8 attributes;
1867         u32 extended_size;
1868         u16 conf_mem_clk_speed;
1869 } __attribute__((__packed__));
1870
1871
1872 /*
1873  * Decode the DRAM Clock Frequency, be paranoid, make sure that all
1874  * memory devices show the same speed, and if they don't then consider
1875  * all speeds to be invalid.
1876  */
1877 static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
1878 {
1879         int *dclk_freq = _dclk_freq;
1880         u16 dmi_mem_clk_speed;
1881
1882         if (*dclk_freq == -1)
1883                 return;
1884
1885         if (dh->type == DMI_ENTRY_MEM_DEVICE) {
1886                 struct memdev_dmi_entry *memdev_dmi_entry =
1887                         (struct memdev_dmi_entry *)dh;
1888                 unsigned long conf_mem_clk_speed_offset =
1889                         (unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
1890                         (unsigned long)&memdev_dmi_entry->type;
1891                 unsigned long speed_offset =
1892                         (unsigned long)&memdev_dmi_entry->speed -
1893                         (unsigned long)&memdev_dmi_entry->type;
1894
1895                 /* Check that a DIMM is present */
1896                 if (memdev_dmi_entry->size == 0)
1897                         return;
1898
1899                 /*
1900                  * Pick the configured speed if it's available, otherwise
1901                  * pick the DIMM speed, or we don't have a speed.
1902                  */
1903                 if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
1904                         dmi_mem_clk_speed =
1905                                 memdev_dmi_entry->conf_mem_clk_speed;
1906                 } else if (memdev_dmi_entry->length > speed_offset) {
1907                         dmi_mem_clk_speed = memdev_dmi_entry->speed;
1908                 } else {
1909                         *dclk_freq = -1;
1910                         return;
1911                 }
1912
1913                 if (*dclk_freq == 0) {
1914                         /* First pass, speed was 0 */
1915                         if (dmi_mem_clk_speed > 0) {
1916                                 /* Set speed if a valid speed is read */
1917                                 *dclk_freq = dmi_mem_clk_speed;
1918                         } else {
1919                                 /* Otherwise we don't have a valid speed */
1920                                 *dclk_freq = -1;
1921                         }
1922                 } else if (*dclk_freq > 0 &&
1923                            *dclk_freq != dmi_mem_clk_speed) {
1924                         /*
1925                          * If we have a speed, check that all DIMMS are the same
1926                          * speed, otherwise set the speed as invalid.
1927                          */
1928                         *dclk_freq = -1;
1929                 }
1930         }
1931 }
1932
1933 /*
1934  * The default DCLK frequency is used as a fallback if we
1935  * fail to find anything reliable in the DMI. The value
1936  * is taken straight from the datasheet.
1937  */
1938 #define DEFAULT_DCLK_FREQ 800
1939
1940 static int get_dclk_freq(void)
1941 {
1942         int dclk_freq = 0;
1943
1944         dmi_walk(decode_dclk, (void *)&dclk_freq);
1945
1946         if (dclk_freq < 1)
1947                 return DEFAULT_DCLK_FREQ;
1948
1949         return dclk_freq;
1950 }
1951
1952 /*
1953  * set_sdram_scrub_rate         This routine sets byte/sec bandwidth scrub rate
1954  *                              to hardware according to SCRUBINTERVAL formula
1955  *                              found in datasheet.
1956  */
1957 static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
1958 {
1959         struct i7core_pvt *pvt = mci->pvt_info;
1960         struct pci_dev *pdev;
1961         u32 dw_scrub;
1962         u32 dw_ssr;
1963
1964         /* Get data from the MC register, function 2 */
1965         pdev = pvt->pci_mcr[2];
1966         if (!pdev)
1967                 return -ENODEV;
1968
1969         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
1970
1971         if (new_bw == 0) {
1972                 /* Prepare to disable petrol scrub */
1973                 dw_scrub &= ~STARTSCRUB;
1974                 /* Stop the patrol scrub engine */
1975                 write_and_test(pdev, MC_SCRUB_CONTROL,
1976                                dw_scrub & ~SCRUBINTERVAL_MASK);
1977
1978                 /* Get current status of scrub rate and set bit to disable */
1979                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
1980                 dw_ssr &= ~SSR_MODE_MASK;
1981                 dw_ssr |= SSR_MODE_DISABLE;
1982         } else {
1983                 const int cache_line_size = 64;
1984                 const u32 freq_dclk_mhz = pvt->dclk_freq;
1985                 unsigned long long scrub_interval;
1986                 /*
1987                  * Translate the desired scrub rate to a register value and
1988                  * program the corresponding register value.
1989                  */
1990                 scrub_interval = (unsigned long long)freq_dclk_mhz *
1991                         cache_line_size * 1000000;
1992                 do_div(scrub_interval, new_bw);
1993
1994                 if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
1995                         return -EINVAL;
1996
1997                 dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
1998
1999                 /* Start the patrol scrub engine */
2000                 pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
2001                                        STARTSCRUB | dw_scrub);
2002
2003                 /* Get current status of scrub rate and set bit to enable */
2004                 pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2005                 dw_ssr &= ~SSR_MODE_MASK;
2006                 dw_ssr |= SSR_MODE_ENABLE;
2007         }
2008         /* Disable or enable scrubbing */
2009         pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
2010
2011         return new_bw;
2012 }
2013
2014 /*
2015  * get_sdram_scrub_rate         This routine convert current scrub rate value
2016  *                              into byte/sec bandwidth according to
2017  *                              SCRUBINTERVAL formula found in datasheet.
2018  */
2019 static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
2020 {
2021         struct i7core_pvt *pvt = mci->pvt_info;
2022         struct pci_dev *pdev;
2023         const u32 cache_line_size = 64;
2024         const u32 freq_dclk_mhz = pvt->dclk_freq;
2025         unsigned long long scrub_rate;
2026         u32 scrubval;
2027
2028         /* Get data from the MC register, function 2 */
2029         pdev = pvt->pci_mcr[2];
2030         if (!pdev)
2031                 return -ENODEV;
2032
2033         /* Get current scrub control data */
2034         pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
2035
2036         /* Mask highest 8-bits to 0 */
2037         scrubval &=  SCRUBINTERVAL_MASK;
2038         if (!scrubval)
2039                 return 0;
2040
2041         /* Calculate scrub rate value into byte/sec bandwidth */
2042         scrub_rate =  (unsigned long long)freq_dclk_mhz *
2043                 1000000 * cache_line_size;
2044         do_div(scrub_rate, scrubval);
2045         return (int)scrub_rate;
2046 }
2047
2048 static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
2049 {
2050         struct i7core_pvt *pvt = mci->pvt_info;
2051         u32 pci_lock;
2052
2053         /* Unlock writes to pci registers */
2054         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2055         pci_lock &= ~0x3;
2056         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2057                                pci_lock | MC_CFG_UNLOCK);
2058
2059         mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
2060         mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
2061 }
2062
2063 static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
2064 {
2065         struct i7core_pvt *pvt = mci->pvt_info;
2066         u32 pci_lock;
2067
2068         /* Lock writes to pci registers */
2069         pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2070         pci_lock &= ~0x3;
2071         pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2072                                pci_lock | MC_CFG_LOCK);
2073 }
2074
2075 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
2076 {
2077         pvt->i7core_pci = edac_pci_create_generic_ctl(
2078                                                 &pvt->i7core_dev->pdev[0]->dev,
2079                                                 EDAC_MOD_STR);
2080         if (unlikely(!pvt->i7core_pci))
2081                 i7core_printk(KERN_WARNING,
2082                               "Unable to setup PCI error report via EDAC\n");
2083 }
2084
2085 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
2086 {
2087         if (likely(pvt->i7core_pci))
2088                 edac_pci_release_generic_ctl(pvt->i7core_pci);
2089         else
2090                 i7core_printk(KERN_ERR,
2091                                 "Couldn't find mem_ctl_info for socket %d\n",
2092                                 pvt->i7core_dev->socket);
2093         pvt->i7core_pci = NULL;
2094 }
2095
2096 static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2097 {
2098         struct mem_ctl_info *mci = i7core_dev->mci;
2099         struct i7core_pvt *pvt;
2100
2101         if (unlikely(!mci || !mci->pvt_info)) {
2102                 edac_dbg(0, "MC: dev = %p\n", &i7core_dev->pdev[0]->dev);
2103
2104                 i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
2105                 return;
2106         }
2107
2108         pvt = mci->pvt_info;
2109
2110         edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev);
2111
2112         /* Disable scrubrate setting */
2113         if (pvt->enable_scrub)
2114                 disable_sdram_scrub_setting(mci);
2115
2116         /* Disable EDAC polling */
2117         i7core_pci_ctl_release(pvt);
2118
2119         /* Remove MC sysfs nodes */
2120         i7core_delete_sysfs_devices(mci);
2121         edac_mc_del_mc(mci->pdev);
2122
2123         edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
2124         kfree(mci->ctl_name);
2125         edac_mc_free(mci);
2126         i7core_dev->mci = NULL;
2127 }
2128
2129 static int i7core_register_mci(struct i7core_dev *i7core_dev)
2130 {
2131         struct mem_ctl_info *mci;
2132         struct i7core_pvt *pvt;
2133         int rc;
2134         struct edac_mc_layer layers[2];
2135
2136         /* allocate a new MC control structure */
2137
2138         layers[0].type = EDAC_MC_LAYER_CHANNEL;
2139         layers[0].size = NUM_CHANS;
2140         layers[0].is_virt_csrow = false;
2141         layers[1].type = EDAC_MC_LAYER_SLOT;
2142         layers[1].size = MAX_DIMMS;
2143         layers[1].is_virt_csrow = true;
2144         mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers,
2145                             sizeof(*pvt));
2146         if (unlikely(!mci))
2147                 return -ENOMEM;
2148
2149         edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev);
2150
2151         pvt = mci->pvt_info;
2152         memset(pvt, 0, sizeof(*pvt));
2153
2154         /* Associates i7core_dev and mci for future usage */
2155         pvt->i7core_dev = i7core_dev;
2156         i7core_dev->mci = mci;
2157
2158         /*
2159          * FIXME: how to handle RDDR3 at MCI level? It is possible to have
2160          * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
2161          * memory channels
2162          */
2163         mci->mtype_cap = MEM_FLAG_DDR3;
2164         mci->edac_ctl_cap = EDAC_FLAG_NONE;
2165         mci->edac_cap = EDAC_FLAG_NONE;
2166         mci->mod_name = "i7core_edac.c";
2167
2168         mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d", i7core_dev->socket);
2169         if (!mci->ctl_name) {
2170                 rc = -ENOMEM;
2171                 goto fail1;
2172         }
2173
2174         mci->dev_name = pci_name(i7core_dev->pdev[0]);
2175         mci->ctl_page_to_phys = NULL;
2176
2177         /* Store pci devices at mci for faster access */
2178         rc = mci_bind_devs(mci, i7core_dev);
2179         if (unlikely(rc < 0))
2180                 goto fail0;
2181
2182
2183         /* Get dimm basic config */
2184         get_dimm_config(mci);
2185         /* record ptr to the generic device */
2186         mci->pdev = &i7core_dev->pdev[0]->dev;
2187
2188         /* Enable scrubrate setting */
2189         if (pvt->enable_scrub)
2190                 enable_sdram_scrub_setting(mci);
2191
2192         /* add this new MC control structure to EDAC's list of MCs */
2193         if (unlikely(edac_mc_add_mc_with_groups(mci, i7core_dev_groups))) {
2194                 edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
2195                 /* FIXME: perhaps some code should go here that disables error
2196                  * reporting if we just enabled it
2197                  */
2198
2199                 rc = -EINVAL;
2200                 goto fail0;
2201         }
2202         if (i7core_create_sysfs_devices(mci)) {
2203                 edac_dbg(0, "MC: failed to create sysfs nodes\n");
2204                 edac_mc_del_mc(mci->pdev);
2205                 rc = -EINVAL;
2206                 goto fail0;
2207         }
2208
2209         /* Default error mask is any memory */
2210         pvt->inject.channel = 0;
2211         pvt->inject.dimm = -1;
2212         pvt->inject.rank = -1;
2213         pvt->inject.bank = -1;
2214         pvt->inject.page = -1;
2215         pvt->inject.col = -1;
2216
2217         /* allocating generic PCI control info */
2218         i7core_pci_ctl_create(pvt);
2219
2220         /* DCLK for scrub rate setting */
2221         pvt->dclk_freq = get_dclk_freq();
2222
2223         return 0;
2224
2225 fail0:
2226         kfree(mci->ctl_name);
2227
2228 fail1:
2229         edac_mc_free(mci);
2230         i7core_dev->mci = NULL;
2231         return rc;
2232 }
2233
2234 /*
2235  *      i7core_probe    Probe for ONE instance of device to see if it is
2236  *                      present.
2237  *      return:
2238  *              0 for FOUND a device
2239  *              < 0 for error code
2240  */
2241
2242 static int i7core_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2243 {
2244         int rc, count = 0;
2245         struct i7core_dev *i7core_dev;
2246
2247         /* get the pci devices we want to reserve for our use */
2248         mutex_lock(&i7core_edac_lock);
2249
2250         /*
2251          * All memory controllers are allocated at the first pass.
2252          */
2253         if (unlikely(probed >= 1)) {
2254                 mutex_unlock(&i7core_edac_lock);
2255                 return -ENODEV;
2256         }
2257         probed++;
2258
2259         rc = i7core_get_all_devices();
2260         if (unlikely(rc < 0))
2261                 goto fail0;
2262
2263         list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2264                 count++;
2265                 rc = i7core_register_mci(i7core_dev);
2266                 if (unlikely(rc < 0))
2267                         goto fail1;
2268         }
2269
2270         /*
2271          * Nehalem-EX uses a different memory controller. However, as the
2272          * memory controller is not visible on some Nehalem/Nehalem-EP, we
2273          * need to indirectly probe via a X58 PCI device. The same devices
2274          * are found on (some) Nehalem-EX. So, on those machines, the
2275          * probe routine needs to return -ENODEV, as the actual Memory
2276          * Controller registers won't be detected.
2277          */
2278         if (!count) {
2279                 rc = -ENODEV;
2280                 goto fail1;
2281         }
2282
2283         i7core_printk(KERN_INFO,
2284                       "Driver loaded, %d memory controller(s) found.\n",
2285                       count);
2286
2287         mutex_unlock(&i7core_edac_lock);
2288         return 0;
2289
2290 fail1:
2291         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2292                 i7core_unregister_mci(i7core_dev);
2293
2294         i7core_put_all_devices();
2295 fail0:
2296         mutex_unlock(&i7core_edac_lock);
2297         return rc;
2298 }
2299
2300 /*
2301  *      i7core_remove   destructor for one instance of device
2302  *
2303  */
2304 static void i7core_remove(struct pci_dev *pdev)
2305 {
2306         struct i7core_dev *i7core_dev;
2307
2308         edac_dbg(0, "\n");
2309
2310         /*
2311          * we have a trouble here: pdev value for removal will be wrong, since
2312          * it will point to the X58 register used to detect that the machine
2313          * is a Nehalem or upper design. However, due to the way several PCI
2314          * devices are grouped together to provide MC functionality, we need
2315          * to use a different method for releasing the devices
2316          */
2317
2318         mutex_lock(&i7core_edac_lock);
2319
2320         if (unlikely(!probed)) {
2321                 mutex_unlock(&i7core_edac_lock);
2322                 return;
2323         }
2324
2325         list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2326                 i7core_unregister_mci(i7core_dev);
2327
2328         /* Release PCI resources */
2329         i7core_put_all_devices();
2330
2331         probed--;
2332
2333         mutex_unlock(&i7core_edac_lock);
2334 }
2335
2336 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2337
2338 /*
2339  *      i7core_driver   pci_driver structure for this module
2340  *
2341  */
2342 static struct pci_driver i7core_driver = {
2343         .name     = "i7core_edac",
2344         .probe    = i7core_probe,
2345         .remove   = i7core_remove,
2346         .id_table = i7core_pci_tbl,
2347 };
2348
2349 /*
2350  *      i7core_init             Module entry function
2351  *                      Try to initialize this module for its devices
2352  */
2353 static int __init i7core_init(void)
2354 {
2355         int pci_rc;
2356
2357         edac_dbg(2, "\n");
2358
2359         /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2360         opstate_init();
2361
2362         if (use_pci_fixup)
2363                 i7core_xeon_pci_fixup(pci_dev_table);
2364
2365         pci_rc = pci_register_driver(&i7core_driver);
2366
2367         if (pci_rc >= 0) {
2368                 mce_register_decode_chain(&i7_mce_dec);
2369                 return 0;
2370         }
2371
2372         i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2373                       pci_rc);
2374
2375         return pci_rc;
2376 }
2377
2378 /*
2379  *      i7core_exit()   Module exit function
2380  *                      Unregister the driver
2381  */
2382 static void __exit i7core_exit(void)
2383 {
2384         edac_dbg(2, "\n");
2385         pci_unregister_driver(&i7core_driver);
2386         mce_unregister_decode_chain(&i7_mce_dec);
2387 }
2388
2389 module_init(i7core_init);
2390 module_exit(i7core_exit);
2391
2392 MODULE_LICENSE("GPL");
2393 MODULE_AUTHOR("Mauro Carvalho Chehab");
2394 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2395 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2396                    I7CORE_REVISION);
2397
2398 module_param(edac_op_state, int, 0444);
2399 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");