1 // SPDX-License-Identifier: GPL-2.0
3 * This file implements the error recovery as a core part of PCIe error
4 * reporting. When a PCIe error is delivered, an error message will be
5 * collected and printed to console, then, an error recovery procedure
6 * will be executed by following the PCI error recovery rules.
8 * Copyright (C) 2006 Intel Corp.
9 * Tom Long Nguyen (tom.l.nguyen@intel.com)
10 * Zhang Yanmin (yanmin.zhang@intel.com)
13 #include <linux/pci.h>
14 #include <linux/module.h>
15 #include <linux/pci.h>
16 #include <linux/kernel.h>
17 #include <linux/errno.h>
18 #include <linux/aer.h>
22 struct aer_broadcast_data {
23 enum pci_channel_state state;
24 enum pci_ers_result result;
27 static pci_ers_result_t merge_result(enum pci_ers_result orig,
28 enum pci_ers_result new)
30 if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
31 return PCI_ERS_RESULT_NO_AER_DRIVER;
33 if (new == PCI_ERS_RESULT_NONE)
37 case PCI_ERS_RESULT_CAN_RECOVER:
38 case PCI_ERS_RESULT_RECOVERED:
41 case PCI_ERS_RESULT_DISCONNECT:
42 if (new == PCI_ERS_RESULT_NEED_RESET)
43 orig = PCI_ERS_RESULT_NEED_RESET;
52 static int report_error_detected(struct pci_dev *dev, void *data)
54 pci_ers_result_t vote;
55 const struct pci_error_handlers *err_handler;
56 struct aer_broadcast_data *result_data;
58 result_data = (struct aer_broadcast_data *) data;
60 device_lock(&dev->dev);
61 dev->error_state = result_data->state;
64 !dev->driver->err_handler ||
65 !dev->driver->err_handler->error_detected) {
66 if (result_data->state == pci_channel_io_frozen &&
67 dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
69 * In case of fatal recovery, if one of down-
70 * stream device has no driver. We might be
71 * unable to recover because a later insmod
72 * of a driver for this device is unaware of
75 pci_printk(KERN_DEBUG, dev, "device has %s\n",
77 "no AER-aware driver" : "no driver");
81 * If there's any device in the subtree that does not
82 * have an error_detected callback, returning
83 * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of
84 * the subsequent mmio_enabled/slot_reset/resume
85 * callbacks of "any" device in the subtree. All the
86 * devices in the subtree are left in the error state
90 if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
91 vote = PCI_ERS_RESULT_NO_AER_DRIVER;
93 vote = PCI_ERS_RESULT_NONE;
95 err_handler = dev->driver->err_handler;
96 vote = err_handler->error_detected(dev, result_data->state);
97 pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
100 result_data->result = merge_result(result_data->result, vote);
101 device_unlock(&dev->dev);
105 static int report_mmio_enabled(struct pci_dev *dev, void *data)
107 pci_ers_result_t vote;
108 const struct pci_error_handlers *err_handler;
109 struct aer_broadcast_data *result_data;
111 result_data = (struct aer_broadcast_data *) data;
113 device_lock(&dev->dev);
115 !dev->driver->err_handler ||
116 !dev->driver->err_handler->mmio_enabled)
119 err_handler = dev->driver->err_handler;
120 vote = err_handler->mmio_enabled(dev);
121 result_data->result = merge_result(result_data->result, vote);
123 device_unlock(&dev->dev);
127 static int report_slot_reset(struct pci_dev *dev, void *data)
129 pci_ers_result_t vote;
130 const struct pci_error_handlers *err_handler;
131 struct aer_broadcast_data *result_data;
133 result_data = (struct aer_broadcast_data *) data;
135 device_lock(&dev->dev);
137 !dev->driver->err_handler ||
138 !dev->driver->err_handler->slot_reset)
141 err_handler = dev->driver->err_handler;
142 vote = err_handler->slot_reset(dev);
143 result_data->result = merge_result(result_data->result, vote);
145 device_unlock(&dev->dev);
149 static int report_resume(struct pci_dev *dev, void *data)
151 const struct pci_error_handlers *err_handler;
153 device_lock(&dev->dev);
154 dev->error_state = pci_channel_io_normal;
157 !dev->driver->err_handler ||
158 !dev->driver->err_handler->resume)
161 err_handler = dev->driver->err_handler;
162 err_handler->resume(dev);
163 pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
165 device_unlock(&dev->dev);
170 * default_reset_link - default reset function
171 * @dev: pointer to pci_dev data structure
173 * Invoked when performing link reset on a Downstream Port or a
174 * Root Port with no aer driver.
176 static pci_ers_result_t default_reset_link(struct pci_dev *dev)
178 pci_reset_bridge_secondary_bus(dev);
179 pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
180 return PCI_ERS_RESULT_RECOVERED;
183 static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
185 struct pci_dev *udev;
186 pci_ers_result_t status;
187 struct pcie_port_service_driver *driver = NULL;
189 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
190 /* Reset this port for all subordinates */
193 /* Reset the upstream component (likely downstream port) */
194 udev = dev->bus->self;
197 /* Use the aer driver of the component firstly */
198 driver = pcie_port_find_service(udev, service);
200 if (driver && driver->reset_link) {
201 status = driver->reset_link(udev);
202 } else if (udev->has_secondary_link) {
203 status = default_reset_link(udev);
205 pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
207 return PCI_ERS_RESULT_DISCONNECT;
210 if (status != PCI_ERS_RESULT_RECOVERED) {
211 pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
213 return PCI_ERS_RESULT_DISCONNECT;
220 * broadcast_error_message - handle message broadcast to downstream drivers
221 * @dev: pointer to from where in a hierarchy message is broadcasted down
222 * @state: error state
223 * @error_mesg: message to print
224 * @cb: callback to be broadcasted
226 * Invoked during error recovery process. Once being invoked, the content
227 * of error severity will be broadcasted to all downstream drivers in a
228 * hierarchy in question.
230 static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
231 enum pci_channel_state state,
233 int (*cb)(struct pci_dev *, void *))
235 struct aer_broadcast_data result_data;
237 pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg);
238 result_data.state = state;
239 if (cb == report_error_detected)
240 result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
242 result_data.result = PCI_ERS_RESULT_RECOVERED;
244 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
246 * If the error is reported by a bridge, we think this error
247 * is related to the downstream link of the bridge, so we
248 * do error recovery on all subordinates of the bridge instead
249 * of the bridge and clear the error status of the bridge.
251 if (cb == report_error_detected)
252 dev->error_state = state;
253 pci_walk_bus(dev->subordinate, cb, &result_data);
254 if (cb == report_resume) {
255 pci_cleanup_aer_uncorrect_error_status(dev);
256 dev->error_state = pci_channel_io_normal;
260 * If the error is reported by an end point, we think this
261 * error is related to the upstream link of the end point.
263 if (state == pci_channel_io_normal)
265 * the error is non fatal so the bus is ok, just invoke
266 * the callback for the function that logged the error.
268 cb(dev, &result_data);
270 pci_walk_bus(dev->bus, cb, &result_data);
273 return result_data.result;
277 * pcie_do_fatal_recovery - handle fatal error recovery process
278 * @dev: pointer to a pci_dev data structure of agent detecting an error
280 * Invoked when an error is fatal. Once being invoked, removes the devices
281 * beneath this AER agent, followed by reset link e.g. secondary bus reset
282 * followed by re-enumeration of devices.
284 void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service)
286 struct pci_dev *udev;
287 struct pci_bus *parent;
288 struct pci_dev *pdev, *temp;
289 pci_ers_result_t result;
291 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
294 udev = dev->bus->self;
296 parent = udev->subordinate;
297 pci_lock_rescan_remove();
299 list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
302 pci_dev_set_disconnected(pdev, NULL);
303 if (pci_has_subordinate(pdev))
304 pci_walk_bus(pdev->subordinate,
305 pci_dev_set_disconnected, NULL);
306 pci_stop_and_remove_bus_device(pdev);
310 result = reset_link(udev, service);
312 if ((service == PCIE_PORT_SERVICE_AER) &&
313 (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) {
315 * If the error is reported by a bridge, we think this error
316 * is related to the downstream link of the bridge, so we
317 * do error recovery on all subordinates of the bridge instead
318 * of the bridge and clear the error status of the bridge.
320 pci_cleanup_aer_uncorrect_error_status(dev);
323 if (result == PCI_ERS_RESULT_RECOVERED) {
324 if (pcie_wait_for_link(udev, true))
325 pci_rescan_bus(udev->bus);
326 pci_info(dev, "Device recovery from fatal error successful\n");
328 pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
329 pci_info(dev, "Device recovery from fatal error failed\n");
333 pci_unlock_rescan_remove();
337 * pcie_do_nonfatal_recovery - handle nonfatal error recovery process
338 * @dev: pointer to a pci_dev data structure of agent detecting an error
340 * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
341 * error detected message to all downstream drivers within a hierarchy in
342 * question and return the returned code.
344 void pcie_do_nonfatal_recovery(struct pci_dev *dev)
346 pci_ers_result_t status;
347 enum pci_channel_state state;
349 state = pci_channel_io_normal;
351 status = broadcast_error_message(dev,
354 report_error_detected);
356 if (status == PCI_ERS_RESULT_CAN_RECOVER)
357 status = broadcast_error_message(dev,
360 report_mmio_enabled);
362 if (status == PCI_ERS_RESULT_NEED_RESET) {
364 * TODO: Should call platform-specific
365 * functions to reset slot before calling
366 * drivers' slot_reset callbacks?
368 status = broadcast_error_message(dev,
374 if (status != PCI_ERS_RESULT_RECOVERED)
377 broadcast_error_message(dev,
382 pci_info(dev, "AER: Device recovery successful\n");
386 pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
388 /* TODO: Should kernel panic here? */
389 pci_info(dev, "AER: Device recovery failed\n");