]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/pci/pcie/err.c
62ab665f0f037af70f9e021058d2c72937b3ddb6
[linux.git] / drivers / pci / pcie / err.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * This file implements the error recovery as a core part of PCIe error
4  * reporting. When a PCIe error is delivered, an error message will be
5  * collected and printed to console, then, an error recovery procedure
6  * will be executed by following the PCI error recovery rules.
7  *
8  * Copyright (C) 2006 Intel Corp.
9  *      Tom Long Nguyen (tom.l.nguyen@intel.com)
10  *      Zhang Yanmin (yanmin.zhang@intel.com)
11  */
12
13 #include <linux/pci.h>
14 #include <linux/module.h>
15 #include <linux/pci.h>
16 #include <linux/kernel.h>
17 #include <linux/errno.h>
18 #include <linux/aer.h>
19 #include "portdrv.h"
20 #include "../pci.h"
21
22 struct aer_broadcast_data {
23         enum pci_channel_state state;
24         enum pci_ers_result result;
25 };
26
27 static pci_ers_result_t merge_result(enum pci_ers_result orig,
28                                   enum pci_ers_result new)
29 {
30         if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
31                 return PCI_ERS_RESULT_NO_AER_DRIVER;
32
33         if (new == PCI_ERS_RESULT_NONE)
34                 return orig;
35
36         switch (orig) {
37         case PCI_ERS_RESULT_CAN_RECOVER:
38         case PCI_ERS_RESULT_RECOVERED:
39                 orig = new;
40                 break;
41         case PCI_ERS_RESULT_DISCONNECT:
42                 if (new == PCI_ERS_RESULT_NEED_RESET)
43                         orig = PCI_ERS_RESULT_NEED_RESET;
44                 break;
45         default:
46                 break;
47         }
48
49         return orig;
50 }
51
52 static int report_error_detected(struct pci_dev *dev, void *data)
53 {
54         pci_ers_result_t vote;
55         const struct pci_error_handlers *err_handler;
56         struct aer_broadcast_data *result_data;
57
58         result_data = (struct aer_broadcast_data *) data;
59
60         device_lock(&dev->dev);
61         dev->error_state = result_data->state;
62
63         if (!dev->driver ||
64                 !dev->driver->err_handler ||
65                 !dev->driver->err_handler->error_detected) {
66                 if (result_data->state == pci_channel_io_frozen &&
67                         dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
68                         /*
69                          * In case of fatal recovery, if one of down-
70                          * stream device has no driver. We might be
71                          * unable to recover because a later insmod
72                          * of a driver for this device is unaware of
73                          * its hw state.
74                          */
75                         pci_printk(KERN_DEBUG, dev, "device has %s\n",
76                                    dev->driver ?
77                                    "no AER-aware driver" : "no driver");
78                 }
79
80                 /*
81                  * If there's any device in the subtree that does not
82                  * have an error_detected callback, returning
83                  * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of
84                  * the subsequent mmio_enabled/slot_reset/resume
85                  * callbacks of "any" device in the subtree. All the
86                  * devices in the subtree are left in the error state
87                  * without recovery.
88                  */
89
90                 if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
91                         vote = PCI_ERS_RESULT_NO_AER_DRIVER;
92                 else
93                         vote = PCI_ERS_RESULT_NONE;
94         } else {
95                 err_handler = dev->driver->err_handler;
96                 vote = err_handler->error_detected(dev, result_data->state);
97                 pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
98         }
99
100         result_data->result = merge_result(result_data->result, vote);
101         device_unlock(&dev->dev);
102         return 0;
103 }
104
105 static int report_mmio_enabled(struct pci_dev *dev, void *data)
106 {
107         pci_ers_result_t vote;
108         const struct pci_error_handlers *err_handler;
109         struct aer_broadcast_data *result_data;
110
111         result_data = (struct aer_broadcast_data *) data;
112
113         device_lock(&dev->dev);
114         if (!dev->driver ||
115                 !dev->driver->err_handler ||
116                 !dev->driver->err_handler->mmio_enabled)
117                 goto out;
118
119         err_handler = dev->driver->err_handler;
120         vote = err_handler->mmio_enabled(dev);
121         result_data->result = merge_result(result_data->result, vote);
122 out:
123         device_unlock(&dev->dev);
124         return 0;
125 }
126
127 static int report_slot_reset(struct pci_dev *dev, void *data)
128 {
129         pci_ers_result_t vote;
130         const struct pci_error_handlers *err_handler;
131         struct aer_broadcast_data *result_data;
132
133         result_data = (struct aer_broadcast_data *) data;
134
135         device_lock(&dev->dev);
136         if (!dev->driver ||
137                 !dev->driver->err_handler ||
138                 !dev->driver->err_handler->slot_reset)
139                 goto out;
140
141         err_handler = dev->driver->err_handler;
142         vote = err_handler->slot_reset(dev);
143         result_data->result = merge_result(result_data->result, vote);
144 out:
145         device_unlock(&dev->dev);
146         return 0;
147 }
148
149 static int report_resume(struct pci_dev *dev, void *data)
150 {
151         const struct pci_error_handlers *err_handler;
152
153         device_lock(&dev->dev);
154         dev->error_state = pci_channel_io_normal;
155
156         if (!dev->driver ||
157                 !dev->driver->err_handler ||
158                 !dev->driver->err_handler->resume)
159                 goto out;
160
161         err_handler = dev->driver->err_handler;
162         err_handler->resume(dev);
163         pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
164 out:
165         device_unlock(&dev->dev);
166         return 0;
167 }
168
169 /**
170  * default_reset_link - default reset function
171  * @dev: pointer to pci_dev data structure
172  *
173  * Invoked when performing link reset on a Downstream Port or a
174  * Root Port with no aer driver.
175  */
176 static pci_ers_result_t default_reset_link(struct pci_dev *dev)
177 {
178         int rc;
179
180         rc = pci_bus_error_reset(dev);
181         pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
182         return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
183 }
184
185 static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
186 {
187         struct pci_dev *udev;
188         pci_ers_result_t status;
189         struct pcie_port_service_driver *driver = NULL;
190
191         if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
192                 /* Reset this port for all subordinates */
193                 udev = dev;
194         } else {
195                 /* Reset the upstream component (likely downstream port) */
196                 udev = dev->bus->self;
197         }
198
199         /* Use the aer driver of the component firstly */
200         driver = pcie_port_find_service(udev, service);
201
202         if (driver && driver->reset_link) {
203                 status = driver->reset_link(udev);
204         } else if (udev->has_secondary_link) {
205                 status = default_reset_link(udev);
206         } else {
207                 pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
208                         pci_name(udev));
209                 return PCI_ERS_RESULT_DISCONNECT;
210         }
211
212         if (status != PCI_ERS_RESULT_RECOVERED) {
213                 pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
214                         pci_name(udev));
215                 return PCI_ERS_RESULT_DISCONNECT;
216         }
217
218         return status;
219 }
220
221 /**
222  * broadcast_error_message - handle message broadcast to downstream drivers
223  * @dev: pointer to from where in a hierarchy message is broadcasted down
224  * @state: error state
225  * @error_mesg: message to print
226  * @cb: callback to be broadcasted
227  *
228  * Invoked during error recovery process. Once being invoked, the content
229  * of error severity will be broadcasted to all downstream drivers in a
230  * hierarchy in question.
231  */
232 static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
233         enum pci_channel_state state,
234         char *error_mesg,
235         int (*cb)(struct pci_dev *, void *))
236 {
237         struct aer_broadcast_data result_data;
238
239         pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg);
240         result_data.state = state;
241         if (cb == report_error_detected)
242                 result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
243         else
244                 result_data.result = PCI_ERS_RESULT_RECOVERED;
245
246         if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
247                 /*
248                  * If the error is reported by a bridge, we think this error
249                  * is related to the downstream link of the bridge, so we
250                  * do error recovery on all subordinates of the bridge instead
251                  * of the bridge and clear the error status of the bridge.
252                  */
253                 if (cb == report_error_detected)
254                         dev->error_state = state;
255                 pci_walk_bus(dev->subordinate, cb, &result_data);
256                 if (cb == report_resume) {
257                         pci_aer_clear_device_status(dev);
258                         pci_cleanup_aer_uncorrect_error_status(dev);
259                         dev->error_state = pci_channel_io_normal;
260                 }
261         } else {
262                 /*
263                  * If the error is reported by an end point, we think this
264                  * error is related to the upstream link of the end point.
265                  * The error is non fatal so the bus is ok; just invoke
266                  * the callback for the function that logged the error.
267                  */
268                 cb(dev, &result_data);
269         }
270
271         return result_data.result;
272 }
273
274 /**
275  * pcie_do_fatal_recovery - handle fatal error recovery process
276  * @dev: pointer to a pci_dev data structure of agent detecting an error
277  *
278  * Invoked when an error is fatal. Once being invoked, removes the devices
279  * beneath this AER agent, followed by reset link e.g. secondary bus reset
280  * followed by re-enumeration of devices.
281  */
282 void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service)
283 {
284         struct pci_dev *udev;
285         struct pci_bus *parent;
286         struct pci_dev *pdev, *temp;
287         pci_ers_result_t result;
288
289         if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
290                 udev = dev;
291         else
292                 udev = dev->bus->self;
293
294         parent = udev->subordinate;
295         pci_walk_bus(parent, pci_dev_set_disconnected, NULL);
296
297         pci_lock_rescan_remove();
298         pci_dev_get(dev);
299         list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
300                                          bus_list) {
301                 pci_stop_and_remove_bus_device(pdev);
302         }
303
304         result = reset_link(udev, service);
305
306         if ((service == PCIE_PORT_SERVICE_AER) &&
307             (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) {
308                 /*
309                  * If the error is reported by a bridge, we think this error
310                  * is related to the downstream link of the bridge, so we
311                  * do error recovery on all subordinates of the bridge instead
312                  * of the bridge and clear the error status of the bridge.
313                  */
314                 pci_aer_clear_fatal_status(dev);
315                 pci_aer_clear_device_status(dev);
316         }
317
318         if (result == PCI_ERS_RESULT_RECOVERED) {
319                 if (pcie_wait_for_link(udev, true))
320                         pci_rescan_bus(udev->bus);
321                 pci_info(dev, "Device recovery from fatal error successful\n");
322         } else {
323                 pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
324                 pci_info(dev, "Device recovery from fatal error failed\n");
325         }
326
327         pci_dev_put(dev);
328         pci_unlock_rescan_remove();
329 }
330
331 /**
332  * pcie_do_nonfatal_recovery - handle nonfatal error recovery process
333  * @dev: pointer to a pci_dev data structure of agent detecting an error
334  *
335  * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
336  * error detected message to all downstream drivers within a hierarchy in
337  * question and return the returned code.
338  */
339 void pcie_do_nonfatal_recovery(struct pci_dev *dev)
340 {
341         pci_ers_result_t status;
342         enum pci_channel_state state;
343
344         state = pci_channel_io_normal;
345
346         status = broadcast_error_message(dev,
347                         state,
348                         "error_detected",
349                         report_error_detected);
350
351         if (status == PCI_ERS_RESULT_CAN_RECOVER)
352                 status = broadcast_error_message(dev,
353                                 state,
354                                 "mmio_enabled",
355                                 report_mmio_enabled);
356
357         if (status == PCI_ERS_RESULT_NEED_RESET) {
358                 /*
359                  * TODO: Should call platform-specific
360                  * functions to reset slot before calling
361                  * drivers' slot_reset callbacks?
362                  */
363                 status = broadcast_error_message(dev,
364                                 state,
365                                 "slot_reset",
366                                 report_slot_reset);
367         }
368
369         if (status != PCI_ERS_RESULT_RECOVERED)
370                 goto failed;
371
372         broadcast_error_message(dev,
373                                 state,
374                                 "resume",
375                                 report_resume);
376
377         pci_info(dev, "AER: Device recovery successful\n");
378         return;
379
380 failed:
381         pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
382
383         /* TODO: Should kernel panic here? */
384         pci_info(dev, "AER: Device recovery failed\n");
385 }