]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/pci/pcie/err.c
PCI/AER: Work around use-after-free in pcie_do_fatal_recovery()
[linux.git] / drivers / pci / pcie / err.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * This file implements the error recovery as a core part of PCIe error
4  * reporting. When a PCIe error is delivered, an error message will be
5  * collected and printed to console, then, an error recovery procedure
6  * will be executed by following the PCI error recovery rules.
7  *
8  * Copyright (C) 2006 Intel Corp.
9  *      Tom Long Nguyen (tom.l.nguyen@intel.com)
10  *      Zhang Yanmin (yanmin.zhang@intel.com)
11  */
12
13 #include <linux/pci.h>
14 #include <linux/module.h>
15 #include <linux/pci.h>
16 #include <linux/kernel.h>
17 #include <linux/errno.h>
18 #include <linux/aer.h>
19 #include "portdrv.h"
20 #include "../pci.h"
21
22 struct aer_broadcast_data {
23         enum pci_channel_state state;
24         enum pci_ers_result result;
25 };
26
27 static pci_ers_result_t merge_result(enum pci_ers_result orig,
28                                   enum pci_ers_result new)
29 {
30         if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
31                 return PCI_ERS_RESULT_NO_AER_DRIVER;
32
33         if (new == PCI_ERS_RESULT_NONE)
34                 return orig;
35
36         switch (orig) {
37         case PCI_ERS_RESULT_CAN_RECOVER:
38         case PCI_ERS_RESULT_RECOVERED:
39                 orig = new;
40                 break;
41         case PCI_ERS_RESULT_DISCONNECT:
42                 if (new == PCI_ERS_RESULT_NEED_RESET)
43                         orig = PCI_ERS_RESULT_NEED_RESET;
44                 break;
45         default:
46                 break;
47         }
48
49         return orig;
50 }
51
52 static int report_error_detected(struct pci_dev *dev, void *data)
53 {
54         pci_ers_result_t vote;
55         const struct pci_error_handlers *err_handler;
56         struct aer_broadcast_data *result_data;
57
58         result_data = (struct aer_broadcast_data *) data;
59
60         device_lock(&dev->dev);
61         dev->error_state = result_data->state;
62
63         if (!dev->driver ||
64                 !dev->driver->err_handler ||
65                 !dev->driver->err_handler->error_detected) {
66                 if (result_data->state == pci_channel_io_frozen &&
67                         dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
68                         /*
69                          * In case of fatal recovery, if one of down-
70                          * stream device has no driver. We might be
71                          * unable to recover because a later insmod
72                          * of a driver for this device is unaware of
73                          * its hw state.
74                          */
75                         pci_printk(KERN_DEBUG, dev, "device has %s\n",
76                                    dev->driver ?
77                                    "no AER-aware driver" : "no driver");
78                 }
79
80                 /*
81                  * If there's any device in the subtree that does not
82                  * have an error_detected callback, returning
83                  * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of
84                  * the subsequent mmio_enabled/slot_reset/resume
85                  * callbacks of "any" device in the subtree. All the
86                  * devices in the subtree are left in the error state
87                  * without recovery.
88                  */
89
90                 if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
91                         vote = PCI_ERS_RESULT_NO_AER_DRIVER;
92                 else
93                         vote = PCI_ERS_RESULT_NONE;
94         } else {
95                 err_handler = dev->driver->err_handler;
96                 vote = err_handler->error_detected(dev, result_data->state);
97                 pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
98         }
99
100         result_data->result = merge_result(result_data->result, vote);
101         device_unlock(&dev->dev);
102         return 0;
103 }
104
105 static int report_mmio_enabled(struct pci_dev *dev, void *data)
106 {
107         pci_ers_result_t vote;
108         const struct pci_error_handlers *err_handler;
109         struct aer_broadcast_data *result_data;
110
111         result_data = (struct aer_broadcast_data *) data;
112
113         device_lock(&dev->dev);
114         if (!dev->driver ||
115                 !dev->driver->err_handler ||
116                 !dev->driver->err_handler->mmio_enabled)
117                 goto out;
118
119         err_handler = dev->driver->err_handler;
120         vote = err_handler->mmio_enabled(dev);
121         result_data->result = merge_result(result_data->result, vote);
122 out:
123         device_unlock(&dev->dev);
124         return 0;
125 }
126
127 static int report_slot_reset(struct pci_dev *dev, void *data)
128 {
129         pci_ers_result_t vote;
130         const struct pci_error_handlers *err_handler;
131         struct aer_broadcast_data *result_data;
132
133         result_data = (struct aer_broadcast_data *) data;
134
135         device_lock(&dev->dev);
136         if (!dev->driver ||
137                 !dev->driver->err_handler ||
138                 !dev->driver->err_handler->slot_reset)
139                 goto out;
140
141         err_handler = dev->driver->err_handler;
142         vote = err_handler->slot_reset(dev);
143         result_data->result = merge_result(result_data->result, vote);
144 out:
145         device_unlock(&dev->dev);
146         return 0;
147 }
148
149 static int report_resume(struct pci_dev *dev, void *data)
150 {
151         const struct pci_error_handlers *err_handler;
152
153         device_lock(&dev->dev);
154         dev->error_state = pci_channel_io_normal;
155
156         if (!dev->driver ||
157                 !dev->driver->err_handler ||
158                 !dev->driver->err_handler->resume)
159                 goto out;
160
161         err_handler = dev->driver->err_handler;
162         err_handler->resume(dev);
163         pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
164 out:
165         device_unlock(&dev->dev);
166         return 0;
167 }
168
169 /**
170  * default_reset_link - default reset function
171  * @dev: pointer to pci_dev data structure
172  *
173  * Invoked when performing link reset on a Downstream Port or a
174  * Root Port with no aer driver.
175  */
176 static pci_ers_result_t default_reset_link(struct pci_dev *dev)
177 {
178         pci_reset_bridge_secondary_bus(dev);
179         pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
180         return PCI_ERS_RESULT_RECOVERED;
181 }
182
183 static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
184 {
185         struct pci_dev *udev;
186         pci_ers_result_t status;
187         struct pcie_port_service_driver *driver = NULL;
188
189         if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
190                 /* Reset this port for all subordinates */
191                 udev = dev;
192         } else {
193                 /* Reset the upstream component (likely downstream port) */
194                 udev = dev->bus->self;
195         }
196
197         /* Use the aer driver of the component firstly */
198         driver = pcie_port_find_service(udev, service);
199
200         if (driver && driver->reset_link) {
201                 status = driver->reset_link(udev);
202         } else if (udev->has_secondary_link) {
203                 status = default_reset_link(udev);
204         } else {
205                 pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
206                         pci_name(udev));
207                 return PCI_ERS_RESULT_DISCONNECT;
208         }
209
210         if (status != PCI_ERS_RESULT_RECOVERED) {
211                 pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
212                         pci_name(udev));
213                 return PCI_ERS_RESULT_DISCONNECT;
214         }
215
216         return status;
217 }
218
219 /**
220  * broadcast_error_message - handle message broadcast to downstream drivers
221  * @dev: pointer to from where in a hierarchy message is broadcasted down
222  * @state: error state
223  * @error_mesg: message to print
224  * @cb: callback to be broadcasted
225  *
226  * Invoked during error recovery process. Once being invoked, the content
227  * of error severity will be broadcasted to all downstream drivers in a
228  * hierarchy in question.
229  */
230 static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
231         enum pci_channel_state state,
232         char *error_mesg,
233         int (*cb)(struct pci_dev *, void *))
234 {
235         struct aer_broadcast_data result_data;
236
237         pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg);
238         result_data.state = state;
239         if (cb == report_error_detected)
240                 result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
241         else
242                 result_data.result = PCI_ERS_RESULT_RECOVERED;
243
244         if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
245                 /*
246                  * If the error is reported by a bridge, we think this error
247                  * is related to the downstream link of the bridge, so we
248                  * do error recovery on all subordinates of the bridge instead
249                  * of the bridge and clear the error status of the bridge.
250                  */
251                 if (cb == report_error_detected)
252                         dev->error_state = state;
253                 pci_walk_bus(dev->subordinate, cb, &result_data);
254                 if (cb == report_resume) {
255                         pci_cleanup_aer_uncorrect_error_status(dev);
256                         dev->error_state = pci_channel_io_normal;
257                 }
258         } else {
259                 /*
260                  * If the error is reported by an end point, we think this
261                  * error is related to the upstream link of the end point.
262                  */
263                 if (state == pci_channel_io_normal)
264                         /*
265                          * the error is non fatal so the bus is ok, just invoke
266                          * the callback for the function that logged the error.
267                          */
268                         cb(dev, &result_data);
269                 else
270                         pci_walk_bus(dev->bus, cb, &result_data);
271         }
272
273         return result_data.result;
274 }
275
276 /**
277  * pcie_do_fatal_recovery - handle fatal error recovery process
278  * @dev: pointer to a pci_dev data structure of agent detecting an error
279  *
280  * Invoked when an error is fatal. Once being invoked, removes the devices
281  * beneath this AER agent, followed by reset link e.g. secondary bus reset
282  * followed by re-enumeration of devices.
283  */
284 void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service)
285 {
286         struct pci_dev *udev;
287         struct pci_bus *parent;
288         struct pci_dev *pdev, *temp;
289         pci_ers_result_t result;
290
291         if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
292                 udev = dev;
293         else
294                 udev = dev->bus->self;
295
296         parent = udev->subordinate;
297         pci_lock_rescan_remove();
298         pci_dev_get(dev);
299         list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
300                                          bus_list) {
301                 pci_dev_get(pdev);
302                 pci_dev_set_disconnected(pdev, NULL);
303                 if (pci_has_subordinate(pdev))
304                         pci_walk_bus(pdev->subordinate,
305                                      pci_dev_set_disconnected, NULL);
306                 pci_stop_and_remove_bus_device(pdev);
307                 pci_dev_put(pdev);
308         }
309
310         result = reset_link(udev, service);
311
312         if ((service == PCIE_PORT_SERVICE_AER) &&
313             (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) {
314                 /*
315                  * If the error is reported by a bridge, we think this error
316                  * is related to the downstream link of the bridge, so we
317                  * do error recovery on all subordinates of the bridge instead
318                  * of the bridge and clear the error status of the bridge.
319                  */
320                 pci_cleanup_aer_uncorrect_error_status(dev);
321         }
322
323         if (result == PCI_ERS_RESULT_RECOVERED) {
324                 if (pcie_wait_for_link(udev, true))
325                         pci_rescan_bus(udev->bus);
326                 pci_info(dev, "Device recovery from fatal error successful\n");
327         } else {
328                 pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
329                 pci_info(dev, "Device recovery from fatal error failed\n");
330         }
331
332         pci_dev_put(dev);
333         pci_unlock_rescan_remove();
334 }
335
336 /**
337  * pcie_do_nonfatal_recovery - handle nonfatal error recovery process
338  * @dev: pointer to a pci_dev data structure of agent detecting an error
339  *
340  * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
341  * error detected message to all downstream drivers within a hierarchy in
342  * question and return the returned code.
343  */
344 void pcie_do_nonfatal_recovery(struct pci_dev *dev)
345 {
346         pci_ers_result_t status;
347         enum pci_channel_state state;
348
349         state = pci_channel_io_normal;
350
351         status = broadcast_error_message(dev,
352                         state,
353                         "error_detected",
354                         report_error_detected);
355
356         if (status == PCI_ERS_RESULT_CAN_RECOVER)
357                 status = broadcast_error_message(dev,
358                                 state,
359                                 "mmio_enabled",
360                                 report_mmio_enabled);
361
362         if (status == PCI_ERS_RESULT_NEED_RESET) {
363                 /*
364                  * TODO: Should call platform-specific
365                  * functions to reset slot before calling
366                  * drivers' slot_reset callbacks?
367                  */
368                 status = broadcast_error_message(dev,
369                                 state,
370                                 "slot_reset",
371                                 report_slot_reset);
372         }
373
374         if (status != PCI_ERS_RESULT_RECOVERED)
375                 goto failed;
376
377         broadcast_error_message(dev,
378                                 state,
379                                 "resume",
380                                 report_resume);
381
382         pci_info(dev, "AER: Device recovery successful\n");
383         return;
384
385 failed:
386         pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
387
388         /* TODO: Should kernel panic here? */
389         pci_info(dev, "AER: Device recovery failed\n");
390 }