From ced5b697a76d325e7a7ac7d382dbbb632c765093 Mon Sep 17 00:00:00 2001 From: Brandon Phiilps Date: Wed, 10 Feb 2010 01:20:06 -0800 Subject: x86: Avoid race condition in pci_enable_msix() Keep chip_data in create_irq_nr and destroy_irq. When two drivers are setting up MSI-X at the same time via pci_enable_msix() there is a race. See this dmesg excerpt: [ 85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X [ 85.170611] alloc irq_desc for 99 on node -1 [ 85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X [ 85.170614] alloc kstat_irqs on node -1 [ 85.170616] alloc irq_2_iommu on node -1 [ 85.170617] alloc irq_desc for 100 on node -1 [ 85.170619] alloc kstat_irqs on node -1 [ 85.170621] alloc irq_2_iommu on node -1 [ 85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X [ 85.170626] alloc irq_desc for 101 on node -1 [ 85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X [ 85.170630] alloc kstat_irqs on node -1 [ 85.170631] alloc irq_2_iommu on node -1 [ 85.170635] alloc irq_desc for 102 on node -1 [ 85.170636] alloc kstat_irqs on node -1 [ 85.170639] alloc irq_2_iommu on node -1 [ 85.170646] BUG: unable to handle kernel NULL pointer dereference at 0000000000000088 As you can see igb and ixgbe are both alternating on create_irq_nr() via pci_enable_msix() in their probe function. ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data = NULL via dynamic_irq_init(). igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[] via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this: cfg_new = irq_desc_ptrs[102]->chip_data; if (cfg_new->vector != 0) continue; This hits the NULL deref. Another possible race exists via pci_disable_msix() in a driver or in the number of error paths that call free_msi_irqs(): destroy_irq() dynamic_irq_cleanup() which sets desc->chip_data = NULL ...race window... desc->chip_data = cfg; Remove the save and restore code for cfg in create_irq_nr() and destroy_irq() and take the desc->lock when checking the irq_cfg. Reported-and-analyzed-by: Brandon Philips Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-3-git-send-email-yinghai@kernel.org> Signed-off-by: Brandon Phililps Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- kernel/irq/chip.c | 52 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 9 deletions(-) (limited to 'kernel/irq') diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index ecc3fa2..d70394f 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -18,11 +18,7 @@ #include "internals.h" -/** - * dynamic_irq_init - initialize a dynamically allocated irq - * @irq: irq number to initialize - */ -void dynamic_irq_init(unsigned int irq) +static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data) { struct irq_desc *desc; unsigned long flags; @@ -41,7 +37,8 @@ void dynamic_irq_init(unsigned int irq) desc->depth = 1; desc->msi_desc = NULL; desc->handler_data = NULL; - desc->chip_data = NULL; + if (!keep_chip_data) + desc->chip_data = NULL; desc->action = NULL; desc->irq_count = 0; desc->irqs_unhandled = 0; @@ -55,10 +52,26 @@ void dynamic_irq_init(unsigned int irq) } /** - * dynamic_irq_cleanup - cleanup a dynamically allocated irq + * dynamic_irq_init - initialize a dynamically allocated irq * @irq: irq number to initialize */ -void dynamic_irq_cleanup(unsigned int irq) +void dynamic_irq_init(unsigned int irq) +{ + dynamic_irq_init_x(irq, false); +} + +/** + * dynamic_irq_init_keep_chip_data - initialize a dynamically allocated irq + * @irq: irq number to initialize + * + * does not set irq_to_desc(irq)->chip_data to NULL + */ +void dynamic_irq_init_keep_chip_data(unsigned int irq) +{ + dynamic_irq_init_x(irq, true); +} + +static void dynamic_irq_cleanup_x(unsigned int irq, bool keep_chip_data) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@ -77,7 +90,8 @@ void dynamic_irq_cleanup(unsigned int irq) } desc->msi_desc = NULL; desc->handler_data = NULL; - desc->chip_data = NULL; + if (!keep_chip_data) + desc->chip_data = NULL; desc->handle_irq = handle_bad_irq; desc->chip = &no_irq_chip; desc->name = NULL; @@ -85,6 +99,26 @@ void dynamic_irq_cleanup(unsigned int irq) raw_spin_unlock_irqrestore(&desc->lock, flags); } +/** + * dynamic_irq_cleanup - cleanup a dynamically allocated irq + * @irq: irq number to initialize + */ +void dynamic_irq_cleanup(unsigned int irq) +{ + dynamic_irq_cleanup_x(irq, false); +} + +/** + * dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq + * @irq: irq number to initialize + * + * does not set irq_to_desc(irq)->chip_data to NULL + */ +void dynamic_irq_cleanup_keep_chip_data(unsigned int irq) +{ + dynamic_irq_cleanup_x(irq, true); +} + /** * set_irq_chip - set the irq chip for an irq -- cgit v1.1