Merge tag 'powerpc-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

[linux.git] / arch / powerpc / kernel / eeh_driver.c
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c

index 0c0b66fc5bfb32f6c61e8fa454fbfe1a467db89b..b8a329f048141c28940c49018ba67332c01096b6 100644 (file)
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -207,18 +207,18 @@ static void *eeh_report_error(void *data, void *userdata)
  
         if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
                 return NULL;
+
+       device_lock(&dev->dev);
         dev->error_state = pci_channel_io_frozen;
  
         driver = eeh_pcid_get(dev);
-       if (!driver) return NULL;
+       if (!driver) goto out_no_dev;
  
         eeh_disable_irq(dev);
  
         if (!driver->err_handler ||
-           !driver->err_handler->error_detected) {
-               eeh_pcid_put(dev);
-               return NULL;
-       }
+           !driver->err_handler->error_detected)
+               goto out;
  
         rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
  
@@ -227,8 +227,12 @@ static void *eeh_report_error(void *data, void *userdata)
         if (*res == PCI_ERS_RESULT_NONE) *res = rc;
  
         edev->in_error = true;
-       eeh_pcid_put(dev);
         pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
+
+out:
+       eeh_pcid_put(dev);
+out_no_dev:
+       device_unlock(&dev->dev);
         return NULL;
  }
  
@@ -251,15 +255,14 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
         if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
                 return NULL;
  
+       device_lock(&dev->dev);
         driver = eeh_pcid_get(dev);
-       if (!driver) return NULL;
+       if (!driver) goto out_no_dev;
  
         if (!driver->err_handler ||
             !driver->err_handler->mmio_enabled ||
-           (edev->mode & EEH_DEV_NO_HANDLER)) {
-               eeh_pcid_put(dev);
-               return NULL;
-       }
+           (edev->mode & EEH_DEV_NO_HANDLER))
+               goto out;
  
         rc = driver->err_handler->mmio_enabled(dev);
  
@@ -267,7 +270,10 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
         if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
         if (*res == PCI_ERS_RESULT_NONE) *res = rc;
  
+out:
         eeh_pcid_put(dev);
+out_no_dev:
+       device_unlock(&dev->dev);
         return NULL;
  }
  
@@ -290,20 +296,20 @@ static void *eeh_report_reset(void *data, void *userdata)
  
         if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
                 return NULL;
+
+       device_lock(&dev->dev);
         dev->error_state = pci_channel_io_normal;
  
         driver = eeh_pcid_get(dev);
-       if (!driver) return NULL;
+       if (!driver) goto out_no_dev;
  
         eeh_enable_irq(dev);
  
         if (!driver->err_handler ||
             !driver->err_handler->slot_reset ||
             (edev->mode & EEH_DEV_NO_HANDLER) ||
-           (!edev->in_error)) {
-               eeh_pcid_put(dev);
-               return NULL;
-       }
+           (!edev->in_error))
+               goto out;
  
         rc = driver->err_handler->slot_reset(dev);
         if ((*res == PCI_ERS_RESULT_NONE) ||
@@ -311,7 +317,10 @@ static void *eeh_report_reset(void *data, void *userdata)
         if (*res == PCI_ERS_RESULT_DISCONNECT &&
              rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
  
+out:
         eeh_pcid_put(dev);
+out_no_dev:
+       device_unlock(&dev->dev);
         return NULL;
  }
  
@@ -362,10 +371,12 @@ static void *eeh_report_resume(void *data, void *userdata)
  
         if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
                 return NULL;
+
+       device_lock(&dev->dev);
         dev->error_state = pci_channel_io_normal;
  
         driver = eeh_pcid_get(dev);
-       if (!driver) return NULL;
+       if (!driver) goto out_no_dev;
  
         was_in_error = edev->in_error;
         edev->in_error = false;
@@ -375,18 +386,20 @@ static void *eeh_report_resume(void *data, void *userdata)
             !driver->err_handler->resume ||
             (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
                 edev->mode &= ~EEH_DEV_NO_HANDLER;
-               eeh_pcid_put(dev);
-               return NULL;
+               goto out;
         }
  
         driver->err_handler->resume(dev);
  
-       eeh_pcid_put(dev);
         pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
+out:
+       eeh_pcid_put(dev);
  #ifdef CONFIG_PCI_IOV
         if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
                 eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
  #endif
+out_no_dev:
+       device_unlock(&dev->dev);
         return NULL;
  }
  
@@ -406,23 +419,26 @@ static void *eeh_report_failure(void *data, void *userdata)
  
         if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
                 return NULL;
+
+       device_lock(&dev->dev);
         dev->error_state = pci_channel_io_perm_failure;
  
         driver = eeh_pcid_get(dev);
-       if (!driver) return NULL;
+       if (!driver) goto out_no_dev;
  
         eeh_disable_irq(dev);
  
         if (!driver->err_handler ||
-           !driver->err_handler->error_detected) {
-               eeh_pcid_put(dev);
-               return NULL;
-       }
+           !driver->err_handler->error_detected)
+               goto out;
  
         driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
  
-       eeh_pcid_put(dev);
         pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
+out:
+       eeh_pcid_put(dev);
+out_no_dev:
+       device_unlock(&dev->dev);
         return NULL;
  }
  
@@ -619,17 +635,19 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
  
  /**
   * eeh_reset_device - Perform actual reset of a pci slot
+ * @driver_eeh_aware: Does the device's driver provide EEH support?
   * @pe: EEH PE
   * @bus: PCI bus corresponding to the isolcated slot
+ * @rmv_data: Optional, list to record removed devices
   *
   * This routine must be called to do reset on the indicated PE.
   * During the reset, udev might be invoked because those affected
   * PCI devices will be removed and then added.
   */
  static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
-                               struct eeh_rmv_data *rmv_data)
+                           struct eeh_rmv_data *rmv_data,
+                           bool driver_eeh_aware)
  {
-       struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
         time64_t tstamp;
         int cnt, rc;
         struct eeh_dev *edev;
@@ -645,16 +663,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
          * into pci_hp_add_devices().
          */
         eeh_pe_state_mark(pe, EEH_PE_KEEP);
-       if (bus) {
-               if (pe->type & EEH_PE_VF) {
-                       eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
-               } else {
-                       pci_lock_rescan_remove();
-                       pci_hp_remove_devices(bus);
-                       pci_unlock_rescan_remove();
-               }
-       } else if (frozen_bus) {
+       if (driver_eeh_aware || (pe->type & EEH_PE_VF)) {
                 eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
+       } else {
+               pci_lock_rescan_remove();
+               pci_hp_remove_devices(bus);
+               pci_unlock_rescan_remove();
         }
  
         /*
@@ -689,8 +703,9 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
          * the device up before the scripts have taken it down,
          * potentially weird things happen.
          */
-       if (bus) {
-               pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
+       if (!driver_eeh_aware || rmv_data->removed) {
+               pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
+                       (driver_eeh_aware ? "partial" : "complete"));
                 ssleep(5);
  
                 /*
@@ -703,19 +718,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
                 if (pe->type & EEH_PE_VF) {
                         eeh_add_virt_device(edev, NULL);
                 } else {
-                       eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+                       if (!driver_eeh_aware)
+                               eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
                         pci_hp_add_devices(bus);
                 }
-       } else if (frozen_bus && rmv_data->removed) {
-               pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
-               ssleep(5);
-
-               edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
-               eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
-               if (pe->type & EEH_PE_VF)
-                       eeh_add_virt_device(edev, NULL);
-               else
-                       pci_hp_add_devices(frozen_bus);
         }
         eeh_pe_state_clear(pe, EEH_PE_KEEP);
  
@@ -733,28 +739,42 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
  
  /**
   * eeh_handle_normal_event - Handle EEH events on a specific PE
- * @pe: EEH PE
+ * @pe: EEH PE - which should not be used after we return, as it may
+ * have been invalidated.
   *
   * Attempts to recover the given PE.  If recovery fails or the PE has failed
   * too many times, remove the PE.
   *
- * Returns true if @pe should no longer be used, else false.
+ * While PHB detects address or data parity errors on particular PCI
+ * slot, the associated PE will be frozen. Besides, DMA's occurring
+ * to wild addresses (which usually happen due to bugs in device
+ * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
+ * #PERR or other misc PCI-related errors also can trigger EEH errors.
+ *
+ * Recovery process consists of unplugging the device driver (which
+ * generated hotplug events to userspace), then issuing a PCI #RST to
+ * the device, then reconfiguring the PCI config space for all bridges
+ * & devices under this slot, and then finally restarting the device
+ * drivers (which cause a second set of hotplug events to go out to
+ * userspace).
   */
-static bool eeh_handle_normal_event(struct eeh_pe *pe)
+void eeh_handle_normal_event(struct eeh_pe *pe)
  {
-       struct pci_bus *frozen_bus;
+       struct pci_bus *bus;
         struct eeh_dev *edev, *tmp;
         int rc = 0;
         enum pci_ers_result result = PCI_ERS_RESULT_NONE;
         struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
  
-       frozen_bus = eeh_pe_bus_get(pe);
-       if (!frozen_bus) {
+       bus = eeh_pe_bus_get(pe);
+       if (!bus) {
                 pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
                         __func__, pe->phb->global_number, pe->addr);
-               return false;
+               return;
         }
  
+       eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+
         eeh_pe_update_time_stamp(pe);
         pe->freeze_count++;
         if (pe->freeze_count > eeh_max_freezes) {
@@ -806,7 +826,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
          */
         if (result == PCI_ERS_RESULT_NONE) {
                 pr_info("EEH: Reset with hotplug activity\n");
-               rc = eeh_reset_device(pe, frozen_bus, NULL);
+               rc = eeh_reset_device(pe, bus, NULL, false);
                 if (rc) {
                         pr_warn("%s: Unable to reset, err=%d\n",
                                 __func__, rc);
@@ -858,7 +878,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
         /* If any device called out for a reset, then reset the slot */
         if (result == PCI_ERS_RESULT_NEED_RESET) {
                 pr_info("EEH: Reset without hotplug activity\n");
-               rc = eeh_reset_device(pe, NULL, &rmv_data);
+               rc = eeh_reset_device(pe, bus, &rmv_data, true);
                 if (rc) {
                         pr_warn("%s: Cannot reset, err=%d\n",
                                 __func__, rc);
@@ -891,7 +911,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
         pr_info("EEH: Notify device driver to resume\n");
         eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
  
-       return false;
+       goto final;
  
  hard_fail:
         /*
@@ -916,23 +936,21 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
          * all removed devices correctly to avoid access
          * the their PCI config any more.
          */
-       if (frozen_bus) {
-               if (pe->type & EEH_PE_VF) {
-                       eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
-                       eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
-               } else {
-                       eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
-                       eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
-
-                       pci_lock_rescan_remove();
-                       pci_hp_remove_devices(frozen_bus);
-                       pci_unlock_rescan_remove();
+       if (pe->type & EEH_PE_VF) {
+               eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+               eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+       } else {
+               eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+               eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
  
-                       /* The passed PE should no longer be used */
-                       return true;
-               }
+               pci_lock_rescan_remove();
+               pci_hp_remove_devices(bus);
+               pci_unlock_rescan_remove();
+               /* The passed PE should no longer be used */
+               return;
         }
-       return false;
+final:
+       eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
  }
  
  /**
@@ -942,7 +960,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
   * specific PE.  Iterates through possible failures and handles them as
   * necessary.
   */
-static void eeh_handle_special_event(void)
+void eeh_handle_special_event(void)
  {
         struct eeh_pe *pe, *phb_pe;
         struct pci_bus *bus;
@@ -1005,15 +1023,7 @@ static void eeh_handle_special_event(void)
                  */
                 if (rc == EEH_NEXT_ERR_FROZEN_PE ||
                     rc == EEH_NEXT_ERR_FENCED_PHB) {
-                       /*
-                        * eeh_handle_normal_event() can make the PE stale if it
-                        * determines that the PE cannot possibly be recovered.
-                        * Don't modify the PE state if that's the case.
-                        */
-                       if (eeh_handle_normal_event(pe))
-                               continue;
-
-                       eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+                       eeh_handle_normal_event(pe);
                 } else {
                         pci_lock_rescan_remove();
                         list_for_each_entry(hose, &hose_list, list_node) {
@@ -1049,28 +1059,3 @@ static void eeh_handle_special_event(void)
                         break;
         } while (rc != EEH_NEXT_ERR_NONE);
  }
-
-/**
- * eeh_handle_event - Reset a PCI device after hard lockup.
- * @pe: EEH PE
- *
- * While PHB detects address or data parity errors on particular PCI
- * slot, the associated PE will be frozen. Besides, DMA's occurring
- * to wild addresses (which usually happen due to bugs in device
- * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
- * #PERR or other misc PCI-related errors also can trigger EEH errors.
- *
- * Recovery process consists of unplugging the device driver (which
- * generated hotplug events to userspace), then issuing a PCI #RST to
- * the device, then reconfiguring the PCI config space for all bridges
- * & devices under this slot, and then finally restarting the device
- * drivers (which cause a second set of hotplug events to go out to
- * userspace).
- */
-void eeh_handle_event(struct eeh_pe *pe)
-{
-       if (pe)
-               eeh_handle_normal_event(pe);
-       else
-               eeh_handle_special_event();
-}