From 41608b64b10b80fe00dd253cd8326ec8ad85930f Mon Sep 17 00:00:00 2001 From: Long Li Date: Mon, 30 Aug 2021 16:13:27 -0700 Subject: PCI: hv: Fix sleep while in non-sleep context when removing child devices from the bus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In hv_pci_bus_exit, the code is holding a spinlock while calling pci_destroy_slot(), which takes a mutex. This is not safe for spinlock. Fix this by moving the children to be deleted to a list on the stack, and removing them after spinlock is released. Fixes: 94d22763207a ("PCI: hv: Fix a race condition when removing the device") Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Wei Liu Cc: Dexuan Cui Cc: Lorenzo Pieralisi Cc: Rob Herring Cc: "Krzysztof WilczyƄski" Cc: Bjorn Helgaas Cc: Michael Kelley Reported-by: Dan Carpenter Link: https://lore.kernel.org/linux-hyperv/20210823152130.GA21501@kili/ Signed-off-by: Long Li Reviewed-by: Wei Liu Link: https://lore.kernel.org/r/1630365207-20616-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Wei Liu --- drivers/pci/controller/pci-hyperv.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index a53bd8728d0d..fc1a29acadbb 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -3229,9 +3229,17 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs) return 0; if (!keep_devs) { - /* Delete any children which might still exist. */ + struct list_head removed; + + /* Move all present children to the list on stack */ + INIT_LIST_HEAD(&removed); spin_lock_irqsave(&hbus->device_list_lock, flags); - list_for_each_entry_safe(hpdev, tmp, &hbus->children, list_entry) { + list_for_each_entry_safe(hpdev, tmp, &hbus->children, list_entry) + list_move_tail(&hpdev->list_entry, &removed); + spin_unlock_irqrestore(&hbus->device_list_lock, flags); + + /* Remove all children in the list */ + list_for_each_entry_safe(hpdev, tmp, &removed, list_entry) { list_del(&hpdev->list_entry); if (hpdev->pci_slot) pci_destroy_slot(hpdev->pci_slot); @@ -3239,7 +3247,6 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs) put_pcichild(hpdev); put_pcichild(hpdev); } - spin_unlock_irqrestore(&hbus->device_list_lock, flags); } ret = hv_send_resources_released(hdev); -- cgit v1.2.3-70-g09d2 From a46044a92add6a400f4dada7b943b30221f7cc80 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 22 Sep 2021 15:55:12 +0200 Subject: s390/pci: fix zpci_zdev_put() on reserve Since commit 2a671f77ee49 ("s390/pci: fix use after free of zpci_dev") the reference count of a zpci_dev is incremented between pcibios_add_device() and pcibios_release_device() which was supposed to prevent the zpci_dev from being freed while the common PCI code has access to it. It was missed however that the handling of zPCI availability events assumed that once zpci_zdev_put() was called no later availability event would still see the device. With the previously mentioned commit however this assumption no longer holds and we must make sure that we only drop the initial long-lived reference the zPCI subsystem holds exactly once. Do so by introducing a zpci_device_reserved() function that handles when a device is reserved. Here we make sure the zpci_dev will not be considered for further events by removing it from the zpci_list. This also means that the device actually stays in the ZPCI_FN_STATE_RESERVED state between the time we know it has been reserved and the final reference going away. We thus need to consider it a real state instead of just a conceptual state after the removal. The final cleanup of PCI resources, removal from zbus, and destruction of the IOMMU stays in zpci_release_device() to make sure holders of the reference do see valid data until the release. Fixes: 2a671f77ee49 ("s390/pci: fix use after free of zpci_dev") Cc: stable@vger.kernel.org Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pci.h | 2 ++ arch/s390/pci/pci.c | 45 +++++++++++++++++++++++++++++++++----- arch/s390/pci/pci_event.c | 4 ++-- drivers/pci/hotplug/s390_pci_hpc.c | 9 +------- 4 files changed, 45 insertions(+), 15 deletions(-) (limited to 'drivers/pci') diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index e4803ec51110..6b3c366af78e 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -207,6 +207,8 @@ int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh); int zpci_deconfigure_device(struct zpci_dev *zdev); +void zpci_device_reserved(struct zpci_dev *zdev); +bool zpci_is_device_configured(struct zpci_dev *zdev); int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); int zpci_unregister_ioat(struct zpci_dev *, u8); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index e7e6788d75a8..b833155ce838 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -92,7 +92,7 @@ void zpci_remove_reserved_devices(void) spin_unlock(&zpci_list_lock); list_for_each_entry_safe(zdev, tmp, &remove, entry) - zpci_zdev_put(zdev); + zpci_device_reserved(zdev); } int pci_domain_nr(struct pci_bus *bus) @@ -751,6 +751,14 @@ error: return ERR_PTR(rc); } +bool zpci_is_device_configured(struct zpci_dev *zdev) +{ + enum zpci_state state = zdev->state; + + return state != ZPCI_FN_STATE_RESERVED && + state != ZPCI_FN_STATE_STANDBY; +} + /** * zpci_scan_configured_device() - Scan a freshly configured zpci_dev * @zdev: The zpci_dev to be configured @@ -822,6 +830,31 @@ int zpci_deconfigure_device(struct zpci_dev *zdev) return 0; } +/** + * zpci_device_reserved() - Mark device as resverved + * @zdev: the zpci_dev that was reserved + * + * Handle the case that a given zPCI function was reserved by another system. + * After a call to this function the zpci_dev can not be found via + * get_zdev_by_fid() anymore but may still be accessible via existing + * references though it will not be functional anymore. + */ +void zpci_device_reserved(struct zpci_dev *zdev) +{ + if (zdev->has_hp_slot) + zpci_exit_slot(zdev); + /* + * Remove device from zpci_list as it is going away. This also + * makes sure we ignore subsequent zPCI events for this device. + */ + spin_lock(&zpci_list_lock); + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); + zdev->state = ZPCI_FN_STATE_RESERVED; + zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + zpci_zdev_put(zdev); +} + void zpci_release_device(struct kref *kref) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); @@ -843,6 +876,12 @@ void zpci_release_device(struct kref *kref) case ZPCI_FN_STATE_STANDBY: if (zdev->has_hp_slot) zpci_exit_slot(zdev); + spin_lock(&zpci_list_lock); + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); + zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + fallthrough; + case ZPCI_FN_STATE_RESERVED: if (zdev->has_resources) zpci_cleanup_bus_resources(zdev); zpci_bus_device_unregister(zdev); @@ -851,10 +890,6 @@ void zpci_release_device(struct kref *kref) default: break; } - - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); zpci_dbg(3, "rem fid:%x\n", zdev->fid); kfree(zdev); } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index c856f80cb21b..5b8d647523f9 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -140,7 +140,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) /* The 0x0304 event may immediately reserve the device */ if (!clp_get_state(zdev->fid, &state) && state == ZPCI_FN_STATE_RESERVED) { - zpci_zdev_put(zdev); + zpci_device_reserved(zdev); } } break; @@ -151,7 +151,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0308: /* Standby -> Reserved */ if (!zdev) break; - zpci_zdev_put(zdev); + zpci_device_reserved(zdev); break; default: break; diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index 014868752cd4..dcefdb42ac46 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -62,14 +62,7 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value) struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, hotplug_slot); - switch (zdev->state) { - case ZPCI_FN_STATE_STANDBY: - *value = 0; - break; - default: - *value = 1; - break; - } + *value = zpci_is_device_configured(zdev) ? 1 : 0; return 0; } -- cgit v1.2.3-70-g09d2 From 3fb937f441c64af1eec60bfd3732f64001fcc534 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 1 Oct 2021 15:58:10 +0200 Subject: PCI: ACPI: Check parent pointer in acpi_pci_find_companion() If acpi_pci_find_companion() is called for a device whose parent pointer is NULL, it will crash when attempting to get the ACPI companion of the parent due to a NULL pointer dereference in the ACPI_COMPANION() macro. This was not a problem before commit 375553a93201 ("PCI: Setup ACPI fwnode early and at the same time with OF") that made pci_setup_device() call pci_set_acpi_fwnode() and so it allowed devices with NULL parent pointers to be passed to acpi_pci_find_companion() which is the case in pci_iov_add_virtfn(), for instance. Fix this issue by making acpi_pci_find_companion() check the device's parent pointer upfront and bail out if it is NULL. While pci_iov_add_virtfn() can be changed to set the device's parent pointer before calling pci_setup_device() for it, checking pointers against NULL before dereferencing them is prudent anyway and looking for ACPI companions of virtual functions isn't really useful. Fixes: 375553a93201 ("PCI: Setup ACPI fwnode early and at the same time with OF") Link: https://lore.kernel.org/linux-acpi/8e4bbd5c59de31db71f718556654c0aa077df03d.camel@linux.ibm.com/ Reported-by: Niklas Schnelle Tested-by: Niklas Schnelle Signed-off-by: Rafael J. Wysocki Acked-by: Bjorn Helgaas --- drivers/pci/pci-acpi.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 0f40943a9a18..260a06fb78a6 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -1249,6 +1249,9 @@ static struct acpi_device *acpi_pci_find_companion(struct device *dev) bool check_children; u64 addr; + if (!dev->parent) + return NULL; + down_read(&pci_acpi_companion_lookup_sem); adev = pci_acpi_find_companion_hook ? -- cgit v1.2.3-70-g09d2 From 2b94b6b79b7c24092a6169db9e83c4565be0db42 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Tue, 12 Oct 2021 15:15:56 +0800 Subject: PCI/MSI: Handle msi_populate_sysfs() errors correctly Previously, when msi_populate_sysfs() failed, we saved the error return value as dev->msi_irq_groups, which leads to a page fault when free_msi_irqs() calls msi_destroy_sysfs(). To prevent this, leave dev->msi_irq_groups alone when msi_populate_sysfs() fails. Found by the Hulk Robot when injecting a memory allocation fault in msi_populate_sysfs(): BUG: unable to handle page fault for address: fffffffffffffff4 ... Call Trace: msi_destroy_sysfs+0x30/0xa0 free_msi_irqs+0x11d/0x1b0 Fixes: 2f170814bdd2 ("genirq/msi: Move MSI sysfs handling from PCI to MSI core") Link: https://lore.kernel.org/r/20211012071556.939137-1-wanghai38@huawei.com Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: Bjorn Helgaas Acked-by: Barry Song --- drivers/pci/msi.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 0099a00af361..4b4792940e86 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -535,6 +535,7 @@ static int msi_verify_entries(struct pci_dev *dev) static int msi_capability_init(struct pci_dev *dev, int nvec, struct irq_affinity *affd) { + const struct attribute_group **groups; struct msi_desc *entry; int ret; @@ -558,12 +559,14 @@ static int msi_capability_init(struct pci_dev *dev, int nvec, if (ret) goto err; - dev->msi_irq_groups = msi_populate_sysfs(&dev->dev); - if (IS_ERR(dev->msi_irq_groups)) { - ret = PTR_ERR(dev->msi_irq_groups); + groups = msi_populate_sysfs(&dev->dev); + if (IS_ERR(groups)) { + ret = PTR_ERR(groups); goto err; } + dev->msi_irq_groups = groups; + /* Set MSI enabled bits */ pci_intx_for_msi(dev, 0); pci_msi_set_enable(dev, 1); @@ -691,6 +694,7 @@ static void msix_mask_all(void __iomem *base, int tsize) static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, int nvec, struct irq_affinity *affd) { + const struct attribute_group **groups; void __iomem *base; int ret, tsize; u16 control; @@ -730,12 +734,14 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, msix_update_entries(dev, entries); - dev->msi_irq_groups = msi_populate_sysfs(&dev->dev); - if (IS_ERR(dev->msi_irq_groups)) { - ret = PTR_ERR(dev->msi_irq_groups); + groups = msi_populate_sysfs(&dev->dev); + if (IS_ERR(groups)) { + ret = PTR_ERR(groups); goto out_free; } + dev->msi_irq_groups = groups; + /* Set MSI-X enabled bits and unmask the function */ pci_intx_for_msi(dev, 0); dev->msix_enabled = 1; -- cgit v1.2.3-70-g09d2