From: Linus Torvalds Date: Thu, 19 May 2011 23:14:58 +0000 (-0700) Subject: Merge branches 'stable/irq', 'stable/p2m.bugfixes', 'stable/e820.bugfixes' and 'stabl... X-Git-Tag: v3.0-rc1~413 X-Git-Url: https://asedeno.scripts.mit.edu/gitweb/?a=commitdiff_plain;h=e33ab8f275cf6e0e0bf6c9c44149de46222b36cc;p=linux.git Merge branches 'stable/irq', 'stable/p2m.bugfixes', 'stable/e820.bugfixes' and 'stable/mmu.bugfixes' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen * 'stable/irq' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: xen: do not clear and mask evtchns in __xen_evtchn_do_upcall * 'stable/p2m.bugfixes' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: xen/p2m: Create entries in the P2M_MFN trees's to track 1-1 mappings * 'stable/e820.bugfixes' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: xen/setup: Fix for incorrect xen_extra_mem_start initialization under 32-bit xen/setup: Ignore E820_UNUSABLE when setting 1-1 mappings. * 'stable/mmu.bugfixes' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: xen mmu: fix a race window causing leave_mm BUG() --- e33ab8f275cf6e0e0bf6c9c44149de46222b36cc diff --cc arch/x86/xen/mmu.c index b5f776f60b1b,55c965b38c27,c82df6c9c0f0,55c965b38c27,4fd7387222bf..02d752460371 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@@@@@ -546,41 -546,41 -546,41 -546,41 -532,6 +546,41 @@@@@@ pte_t xen_make_pte(pteval_t pte } PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); +#ifdef CONFIG_XEN_DEBUG +pte_t xen_make_pte_debug(pteval_t pte) +{ + phys_addr_t addr = (pte & PTE_PFN_MASK); + phys_addr_t other_addr; + bool io_page = false; + pte_t _pte; + + if (pte & _PAGE_IOMAP) + io_page = true; + + _pte = xen_make_pte(pte); + + if (!addr) + return _pte; + + if (io_page && + (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { + other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT; - WARN(addr != other_addr, + + WARN_ONCE(addr != other_addr, + "0x%lx is using VM_IO, but it is 0x%lx!\n", + (unsigned long)addr, (unsigned long)other_addr); + } else { + pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP; + other_addr = (_pte.pte & PTE_PFN_MASK); - WARN((addr == other_addr) && (!io_page) && (!iomap_set), + + WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set), + "0x%lx is missing VM_IO (and wasn't fixed)!\n", + (unsigned long)addr); + } + + return _pte; +} +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug); +#endif + pgd_t xen_make_pgd(pgdval_t pgd) { pgd = pte_pfn_to_mfn(pgd); @@@@@@ -1487,30 -1586,38 -1473,26 -1586,38 -1426,24 +1487,30 @@@@@@ static void xen_pgd_free(struct mm_stru #endif } - -static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) - -{ - - unsigned long pfn = pte_pfn(pte); - - #ifdef CONFIG_X86_32 - - static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) ++++static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) + +{ /* If there's an existing pte, then don't allow _PAGE_RW to be set */ if (pte_val_ma(*ptep) & _PAGE_PRESENT) pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & pte_val_ma(pte)); - -#endif + + + + return pte; + +} + +#else /* CONFIG_X86_64 */ - - static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) ++++static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) + +{ + + unsigned long pfn = pte_pfn(pte); - - /* - - * A bit of optimization. We do not need to call the workaround - - * when xen_set_pte_init is called with a PTE with 0 as PFN. - - * That is b/c the pagetable at that point are just being populated - - * with empty values and we can save some cycles by not calling - - * the 'memblock' code.*/ - - if (pfn) - - mark_rw_past_pgt(); /* * If the new pfn is within the range of the newly allocated * kernel pagetable, and it isn't being mapped into an - * early_ioremap fixmap slot, make sure it is RO. + * early_ioremap fixmap slot as a freshly allocated page, make sure + * it is RO. */ - if (!is_early_ioremap_ptep(ptep) && - pfn >= e820_table_start && pfn < e820_table_end) + if (((!is_early_ioremap_ptep(ptep) && - pfn >= pgt_buf_start && pfn < pgt_buf_end)) || + + pfn >= pgt_buf_start && pfn < pgt_buf_top)) || + (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) pte = pte_wrprotect(pte); return pte; @@@@@@ -2009,11 -2116,13 -1990,11 -2116,13 -1940,8 +2009,11 @@@@@@ void __init xen_ident_map_ISA(void xen_flush_tlb(); } ----static __init void xen_post_allocator_init(void) ++++static void __init xen_post_allocator_init(void) { - - mark_rw_past_pgt(); - - +#ifdef CONFIG_XEN_DEBUG + pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); +#endif pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pmd = xen_set_pmd; pv_mmu_ops.set_pud = xen_set_pud; diff --cc arch/x86/xen/p2m.c index c851397e657c,141eb0de8b06,a01e6532b46a,141eb0de8b06,fd12d7ce7ff9..58efeb9d5440 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@@@@@ -497,82 -497,82 -497,108 -497,82 -354,6 +497,108 @@@@@@ static bool alloc_p2m(unsigned long pfn return true; } +static bool __init __early_alloc_p2m(unsigned long pfn) +{ + unsigned topidx, mididx, idx; + + topidx = p2m_top_index(pfn); + mididx = p2m_mid_index(pfn); + idx = p2m_index(pfn); + + /* Pfff.. No boundary cross-over, lets get out. */ + if (!idx) + return false; + + WARN(p2m_top[topidx][mididx] == p2m_identity, + "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n", + topidx, mididx); + + /* + * Could be done by xen_build_dynamic_phys_to_machine.. + */ + if (p2m_top[topidx][mididx] != p2m_missing) + return false; + + /* Boundary cross-over for the edges: */ + if (idx) { + unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); ++ ++ unsigned long *mid_mfn_p; + + p2m_init(p2m); + + p2m_top[topidx][mididx] = p2m; + ++ ++ /* For save/restore we need to MFN of the P2M saved */ ++ ++ ++ ++ mid_mfn_p = p2m_top_mfn_p[topidx]; ++ ++ WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing), ++ ++ "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n", ++ ++ topidx, mididx); ++ ++ mid_mfn_p[mididx] = virt_to_mfn(p2m); ++ ++ + } + return idx != 0; +} +unsigned long __init set_phys_range_identity(unsigned long pfn_s, + unsigned long pfn_e) +{ + unsigned long pfn; + + if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN)) + return 0; + + if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) + return pfn_e - pfn_s; + + if (pfn_s > pfn_e) + return 0; + + for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1)); + pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE)); + pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) + { + unsigned topidx = p2m_top_index(pfn); -- - if (p2m_top[topidx] == p2m_mid_missing) { -- - unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); ++ ++ unsigned long *mid_mfn_p; ++ ++ unsigned long **mid; ++ ++ ++ ++ mid = p2m_top[topidx]; ++ ++ mid_mfn_p = p2m_top_mfn_p[topidx]; ++ ++ if (mid == p2m_mid_missing) { ++ ++ mid = extend_brk(PAGE_SIZE, PAGE_SIZE); + + p2m_mid_init(mid); + + p2m_top[topidx] = mid; ++ ++ ++ ++ BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); ++ ++ } ++ ++ /* And the save/restore P2M tables.. */ ++ ++ if (mid_mfn_p == p2m_mid_missing_mfn) { ++ ++ mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); ++ ++ p2m_mid_mfn_init(mid_mfn_p); ++ ++ ++ ++ p2m_top_mfn_p[topidx] = mid_mfn_p; ++ ++ p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); ++ ++ /* Note: we don't set mid_mfn_p[midix] here, ++ ++ * look in __early_alloc_p2m */ + } + } + + __early_alloc_p2m(pfn_s); + __early_alloc_p2m(pfn_e); + + for (pfn = pfn_s; pfn < pfn_e; pfn++) + if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) + break; + + if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s), + "Identity mapping failed. We are %ld short of 1-1 mappings!\n", + (pfn_e - pfn_s) - (pfn - pfn_s))) + printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn); + + return pfn - pfn_s; +} + /* Try to install p2m mapping; fail if intermediate bits missing */ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) { @@@@@@ -670,12 -671,13 -697,13 -671,13 -438,11 +696,12 @@@@@@ int m2p_add_override(unsigned long mfn page->private = mfn; page->index = pfn_to_mfn(pfn); - __set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); - if (!PageHighMem(page)) + if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) + return -ENOMEM; + --- if (!PageHighMem(page)) ++++ if (clear_pte && !PageHighMem(page)) /* Just zap old mapping for now */ pte_clear(&init_mm, address, ptep); ---- spin_lock_irqsave(&m2p_override_lock, flags); list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); spin_unlock_irqrestore(&m2p_override_lock, flags); @@@@@@ -709,9 -711,9 -737,9 -711,9 -476,9 +735,9 @@@@@@ int m2p_remove_override(struct page *pa spin_lock_irqsave(&m2p_override_lock, flags); list_del(&page->lru); spin_unlock_irqrestore(&m2p_override_lock, flags); - __set_phys_to_machine(pfn, page->index); + set_phys_to_machine(pfn, page->index); ---- if (!PageHighMem(page)) ++++ if (clear_pte && !PageHighMem(page)) set_pte_at(&init_mm, address, ptep, pfn_pte(pfn, PAGE_KERNEL)); /* No tlb flush necessary because the caller already diff --cc arch/x86/xen/setup.c index d3663df2f967,90bac0aac3a5,fa0269a99377,ca6297bd4e3c,a8a66a50d446..be1a464f6d66 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@@@@@ -50,10 -50,10 -50,10 -50,10 -50,8 +50,10 @@@@@@ phys_addr_t xen_extra_mem_start, xen_ex */ #define EXTRA_MEM_RATIO (10) ----static __init void xen_add_extra_mem(unsigned long pages) ++++static void __init xen_add_extra_mem(unsigned long pages) { + unsigned long pfn; + u64 size = (u64)pages * PAGE_SIZE; u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; @@@@@@ -143,48 -143,48 -143,48 -143,48 -138,6 +143,48 @@@@@@ static unsigned long __init xen_return_ return released; } +static unsigned long __init xen_set_identity(const struct e820entry *list, + ssize_t map_size) +{ + phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS; + phys_addr_t start_pci = last; + const struct e820entry *entry; + unsigned long identity = 0; + int i; + + for (i = 0, entry = list; i < map_size; i++, entry++) { + phys_addr_t start = entry->addr; + phys_addr_t end = start + entry->size; + + if (start < last) + start = last; + + if (end <= start) + continue; + + /* Skip over the 1MB region. */ + if (last > end) + continue; + --- if (entry->type == E820_RAM) { +++ + if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) { + if (start > start_pci) + identity += set_phys_range_identity( + PFN_UP(start_pci), PFN_DOWN(start)); + + /* Without saving 'last' we would gooble RAM too + * at the end of the loop. */ + last = end; + start_pci = end; + continue; + } + start_pci = min(start, start_pci); + last = end; + } + if (last > start_pci) + identity += set_phys_range_identity( + PFN_UP(start_pci), PFN_DOWN(last)); + return identity; +} /** * machine_specific_memory_setup - Hook for machine specific memory setup. **/ @@@@@@ -225,9 -225,9 -225,9 -225,13 -176,8 +225,13 @@@@@@ char * __init xen_memory_setup(void } BUG_ON(rc); + memcpy(map_raw, map, sizeof(map)); e820.nr_map = 0; +++ +#ifdef CONFIG_X86_32 ++ xen_extra_mem_start = mem_end; +++ +#else + + xen_extra_mem_start = max((1ULL << 32), mem_end); +++ +#endif for (i = 0; i < memmap.nr_entries; i++) { unsigned long long end; diff --cc drivers/xen/events.c index 35e02a10110b,0ae1d4d7e18c,036343ba204e,33167b43ac7e,74681478100a..3ff822b48145 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@@@@@ -119,84 -118,84 -118,82 -118,82 -127,37 +119,86 @@@@@@ static DEFINE_PER_CPU(unsigned long [NR static struct irq_chip xen_dynamic_chip; static struct irq_chip xen_percpu_chip; static struct irq_chip xen_pirq_chip; + +++static void enable_dynirq(struct irq_data *data); + +++static void disable_dynirq(struct irq_data *data); + +/* Get info for IRQ */ +static struct irq_info *info_for_irq(unsigned irq) +{ + return irq_get_handler_data(irq); +} -/* Constructor for packed IRQ information. */ -static struct irq_info mk_unbound_info(void) +/* Constructors for packed IRQ information. */ +static void xen_irq_info_common_init(struct irq_info *info, + unsigned irq, + enum xen_irq_type type, + unsigned short evtchn, + unsigned short cpu) { - return (struct irq_info) { .type = IRQT_UNBOUND }; + + BUG_ON(info->type != IRQT_UNBOUND && info->type != type); + + info->type = type; + info->irq = irq; + info->evtchn = evtchn; + info->cpu = cpu; + + evtchn_to_irq[evtchn] = irq; } -static struct irq_info mk_evtchn_info(unsigned short evtchn) +static void xen_irq_info_evtchn_init(unsigned irq, + unsigned short evtchn) { - return (struct irq_info) { .type = IRQT_EVTCHN, .evtchn = evtchn, - .cpu = 0 }; + struct irq_info *info = info_for_irq(irq); + + xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0); } -static struct irq_info mk_ipi_info(unsigned short evtchn, enum ipi_vector ipi) +static void xen_irq_info_ipi_init(unsigned cpu, + unsigned irq, + unsigned short evtchn, + enum ipi_vector ipi) { - return (struct irq_info) { .type = IRQT_IPI, .evtchn = evtchn, - .cpu = 0, .u.ipi = ipi }; + struct irq_info *info = info_for_irq(irq); + + xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0); + + info->u.ipi = ipi; + + per_cpu(ipi_to_irq, cpu)[ipi] = irq; } -static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq) +static void xen_irq_info_virq_init(unsigned cpu, + unsigned irq, + unsigned short evtchn, + unsigned short virq) { - return (struct irq_info) { .type = IRQT_VIRQ, .evtchn = evtchn, - .cpu = 0, .u.virq = virq }; + struct irq_info *info = info_for_irq(irq); + + xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0); + + info->u.virq = virq; + + per_cpu(virq_to_irq, cpu)[virq] = irq; } -static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short pirq, - unsigned short gsi, unsigned short vector) +static void xen_irq_info_pirq_init(unsigned irq, + unsigned short evtchn, + unsigned short pirq, + unsigned short gsi, + unsigned short vector, ++++ uint16_t domid, + unsigned char flags) { - return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn, - .cpu = 0, - .u.pirq = { .pirq = pirq, .gsi = gsi, .vector = vector } }; + struct irq_info *info = info_for_irq(irq); + + xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0); + + info->u.pirq.pirq = pirq; + info->u.pirq.gsi = gsi; + info->u.pirq.vector = vector; ++++ info->u.pirq.domid = domid; + info->u.pirq.flags = flags; } /* @@@@@@ -390,102 -389,92 -387,102 -387,102 -376,93 +392,92 @@@@@@ static void unmask_evtchn(int port put_cpu(); } -static int get_nr_hw_irqs(void) +static void xen_irq_init(unsigned irq) { - int ret = 1; + struct irq_info *info; + struct irq_desc *desc = irq_to_desc(irq); -#ifdef CONFIG_X86_IO_APIC - ret = get_nr_irqs_gsi(); +#ifdef CONFIG_SMP + /* By default all event channels notify CPU#0. */ + cpumask_copy(desc->irq_data.affinity, cpumask_of(0)); #endif - return ret; -} + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (info == NULL) + panic("Unable to allocate metadata for IRQ%d\n", irq); -static int find_unbound_pirq(int type) -{ - int rc, i; - struct physdev_get_free_pirq op_get_free_pirq; - op_get_free_pirq.type = type; + info->type = IRQT_UNBOUND; - rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); - if (!rc) - return op_get_free_pirq.pirq; + irq_set_handler_data(irq, info); - for (i = 0; i < nr_irqs; i++) { - if (pirq_to_irq[i] < 0) - return i; - } - return -1; + list_add_tail(&info->list, &xen_irq_list_head); } -static int find_unbound_irq(void) +static int __must_check xen_allocate_irq_dynamic(void) { - struct irq_data *data; - int irq, res; - int bottom = get_nr_hw_irqs(); - int top = nr_irqs-1; - - if (bottom == nr_irqs) - goto no_irqs; + int first = 0; + int irq; - /* This loop starts from the top of IRQ space and goes down. - * We need this b/c if we have a PCI device in a Xen PV guest - * we do not have an IO-APIC (though the backend might have them) - * mapped in. To not have a collision of physical IRQs with the Xen - * event channels start at the top of the IRQ space for virtual IRQs. +#ifdef CONFIG_X86_IO_APIC + /* + * For an HVM guest or domain 0 which see "real" (emulated or - * actual repectively) GSIs we allocate dynamic IRQs + + * actual respectively) GSIs we allocate dynamic IRQs + * e.g. those corresponding to event channels or MSIs + * etc. from the range above those "real" GSIs to avoid + * collisions. */ - for (irq = top; irq > bottom; irq--) { - data = irq_get_irq_data(irq); - /* only 15->0 have init'd desc; handle irq > 16 */ - if (!data) - break; - if (data->chip == &no_irq_chip) - break; - if (data->chip != &xen_dynamic_chip) - continue; - if (irq_info[irq].type == IRQT_UNBOUND) - return irq; - } - - if (irq == bottom) - goto no_irqs; + if (xen_initial_domain() || xen_hvm_domain()) + first = get_nr_irqs_gsi(); +#endif - res = irq_alloc_desc_at(irq, -1); + irq = irq_alloc_desc_from(first, -1); - if (WARN_ON(res != irq)) - return -1; + xen_irq_init(irq); return irq; - -no_irqs: - panic("No available IRQ to bind to: increase nr_irqs!\n"); } -static bool identity_mapped_irq(unsigned irq) +static int __must_check xen_allocate_irq_gsi(unsigned gsi) { - /* identity map all the hardware irqs */ - return irq < get_nr_hw_irqs(); + int irq; + + /* + * A PV guest has no concept of a GSI (since it has no ACPI + * nor access to/knowledge of the physical APICs). Therefore + * all IRQs are dynamically allocated from the entire IRQ + * space. + */ + if (xen_pv_domain() && !xen_initial_domain()) + return xen_allocate_irq_dynamic(); + + /* Legacy IRQ descriptors are already allocated by the arch. */ + if (gsi < NR_IRQS_LEGACY) + irq = gsi; + else + irq = irq_alloc_desc_at(gsi, -1); + + xen_irq_init(irq); + + return irq; } -static void pirq_unmask_notify(int irq) +static void xen_free_irq(unsigned irq) { - struct physdev_eoi eoi = { .irq = pirq_from_irq(irq) }; + struct irq_info *info = irq_get_handler_data(irq); - if (unlikely(pirq_needs_eoi(irq))) { - int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); - WARN_ON(rc); - } + list_del(&info->list); + + irq_set_handler_data(irq, NULL); + + kfree(info); + + /* Legacy IRQ descriptors are managed by the arch. */ + if (irq < NR_IRQS_LEGACY) + return; + + irq_free_desc(irq); } - -- static void pirq_unmask_notify(int irq) - -- { - -- struct physdev_eoi eoi = { .irq = pirq_from_irq(irq) }; - -- - -- if (unlikely(pirq_needs_eoi(irq))) { - -- int rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); - -- WARN_ON(rc); - -- } - -- } - -- static void pirq_query_unmask(int irq) { struct physdev_irq_status_query irq_status; @@@@@@ -509,7 -498,30 -506,7 -506,7 -486,7 +501,30 @@@@@@ static bool probing_irq(int irq return desc && desc->action == NULL; } -static unsigned int startup_pirq(unsigned int irq) + +++static void eoi_pirq(struct irq_data *data) + +++{ + +++ int evtchn = evtchn_from_irq(data->irq); + +++ struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; + +++ int rc = 0; + +++ + +++ irq_move_irq(data); + +++ + +++ if (VALID_EVTCHN(evtchn)) + +++ clear_evtchn(evtchn); + +++ + +++ if (pirq_needs_eoi(data->irq)) { + +++ rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); + +++ WARN_ON(rc); + +++ } + +++} + +++ + +++static void mask_ack_pirq(struct irq_data *data) + +++{ + +++ disable_dynirq(data); + +++ eoi_pirq(data); + +++} + +++ +static unsigned int __startup_pirq(unsigned int irq) { struct evtchn_bind_pirq bind_pirq; struct irq_info *info = info_for_irq(irq); @@@@@@ -575,25 -587,14 -572,25 -572,25 -546,42 +590,14 @@@@@@ static void shutdown_pirq(struct irq_da info->evtchn = 0; } -static void enable_pirq(unsigned int irq) +static void enable_pirq(struct irq_data *data) { - startup_pirq(irq); + startup_pirq(data); } -static void disable_pirq(unsigned int irq) +static void disable_pirq(struct irq_data *data) { - ---} - --- - -- static void ack_pirq(struct irq_data *data) -static void ack_pirq(unsigned int irq) - ---{ - -- int evtchn = evtchn_from_irq(data->irq); - int evtchn = evtchn_from_irq(irq); - --- - -- irq_move_irq(data); - move_native_irq(irq); - --- - --- if (VALID_EVTCHN(evtchn)) { - --- mask_evtchn(evtchn); - --- clear_evtchn(evtchn); - } -} - -static void end_pirq(unsigned int irq) -{ - int evtchn = evtchn_from_irq(irq); - struct irq_desc *desc = irq_to_desc(irq); - - if (WARN_ON(!desc)) - return; - - if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) == - (IRQ_DISABLED|IRQ_PENDING)) { - shutdown_pirq(irq); - } else if (VALID_EVTCHN(evtchn)) { - unmask_evtchn(evtchn); - pirq_unmask_notify(irq); - --- } + +++ disable_dynirq(data); } static int find_irq_by_gsi(unsigned gsi) @@@@@@ -638,13 -639,10 -635,13 -635,13 -638,18 +642,10 @@@@@@ int xen_bind_pirq_gsi_to_irq(unsigned g goto out; /* XXX need refcount? */ } - /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore - * we are using the !xen_initial_domain() to drop in the function.*/ - if (identity_mapped_irq(gsi) || (!xen_initial_domain() && - xen_pv_domain())) { - irq = gsi; - irq_alloc_desc_at(irq, -1); - } else - irq = find_unbound_irq(); - - set_irq_chip_and_handler_name(irq, &xen_pirq_chip, - handle_level_irq, name); + irq = xen_allocate_irq_gsi(gsi); + if (irq < 0) + goto out; - -- irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_level_irq, - -- name); - -- irq_op.irq = irq; irq_op.vector = 0; @@@@@@ -658,9 -656,35 -655,9 -655,9 -663,10 +659,35 @@@@@@ goto out; } --- xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, - irq_info[irq] = mk_pirq_info(0, pirq, gsi, irq_op.vector); - irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0; - pirq_to_irq[pirq] = irq; ++++ xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, DOMID_SELF, + shareable ? PIRQ_SHAREABLE : 0); + + +++ pirq_query_unmask(irq); + +++ /* We try to use the handler with the appropriate semantic for the + +++ * type of interrupt: if the interrupt doesn't need an eoi + +++ * (pirq_needs_eoi returns false), we treat it like an edge + +++ * triggered interrupt so we use handle_edge_irq. + +++ * As a matter of fact this only happens when the corresponding + +++ * physical interrupt is edge triggered or an msi. + +++ * + +++ * On the other hand if the interrupt needs an eoi (pirq_needs_eoi + +++ * returns true) we treat it like a level triggered interrupt so we + +++ * use handle_fasteoi_irq like the native code does for this kind of + +++ * interrupts. + +++ * Depending on the Xen version, pirq_needs_eoi might return true + +++ * not only for level triggered interrupts but for edge triggered + +++ * interrupts too. In any case Xen always honors the eoi mechanism, + +++ * not injecting any more pirqs of the same kind if the first one + +++ * hasn't received an eoi yet. Therefore using the fasteoi handler + +++ * is the right choice either way. + +++ */ + +++ if (pirq_needs_eoi(irq)) + +++ irq_set_chip_and_handler_name(irq, &xen_pirq_chip, + +++ handle_fasteoi_irq, name); + +++ else + +++ irq_set_chip_and_handler_name(irq, &xen_pirq_chip, + +++ handle_edge_irq, name); + ++ out: spin_unlock(&irq_mapping_update_lock); @@@@@@ -668,25 -692,24 -665,24 -665,24 -674,61 +695,25 @@@@@@ } #ifdef CONFIG_PCI_MSI -#include -#include "../pci/msi.h" - -void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc) +int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc) { - spin_lock(&irq_mapping_update_lock); - - if (alloc & XEN_ALLOC_IRQ) { - *irq = find_unbound_irq(); - if (*irq == -1) - goto out; - } - - if (alloc & XEN_ALLOC_PIRQ) { - *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI); - if (*pirq == -1) - goto out; - } + int rc; + struct physdev_get_free_pirq op_get_free_pirq; - set_irq_chip_and_handler_name(*irq, &xen_pirq_chip, - handle_level_irq, name); + op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI; + rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); - irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0); - pirq_to_irq[*pirq] = *irq; + WARN_ONCE(rc == -ENOSYS, + "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n"); -out: - spin_unlock(&irq_mapping_update_lock); + return rc ? -1 : op_get_free_pirq.pirq; } -int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type) +int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, --- int pirq, int vector, const char *name) ++++ int pirq, int vector, const char *name, ++++ domid_t domid) { - int irq = -1; - struct physdev_map_pirq map_irq; - int rc; - int pos; - u32 table_offset, bir; - - memset(&map_irq, 0, sizeof(map_irq)); - map_irq.domid = DOMID_SELF; - map_irq.type = MAP_PIRQ_TYPE_MSI; - map_irq.index = -1; - map_irq.pirq = -1; - map_irq.bus = dev->bus->number; - map_irq.devfn = dev->devfn; - - if (type == PCI_CAP_ID_MSIX) { - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); - - pci_read_config_dword(dev, msix_table_offset_reg(pos), - &table_offset); - bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK); - - map_irq.table_base = pci_resource_start(dev, bir); - map_irq.entry_nr = msidesc->msi_attrib.entry_nr; - } + int irq, ret; spin_lock(&irq_mapping_update_lock); @@@@@@ -694,13 -717,13 -690,13 -690,13 -737,21 +721,13 @@@@@@ if (irq == -1) goto out; - -- irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_level_irq, - -- name); - rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); - if (rc) { - printk(KERN_WARNING "xen map irq failed %d\n", rc); - - irq_free_desc(irq); - - irq = -1; - goto out; - } - irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index); - - set_irq_chip_and_handler_name(irq, &xen_pirq_chip, - handle_level_irq, - (type == PCI_CAP_ID_MSIX) ? "msi-x":"msi"); + +++ irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq, + +++ name); --- xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, 0); ++++ xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, domid, 0); + ret = irq_set_msi_desc(irq, msidesc); + if (ret < 0) + goto error_irq; out: spin_unlock(&irq_mapping_update_lock); return irq; @@@@@@ -748,34 -764,28 -737,28 -737,28 -790,21 +775,34 @@@@@@ out return rc; } -int xen_vector_from_irq(unsigned irq) +int xen_irq_from_pirq(unsigned pirq) { - return vector_from_irq(irq); -} + int irq; -int xen_gsi_from_irq(unsigned irq) -{ - return gsi_from_irq(irq); + struct irq_info *info; + + spin_lock(&irq_mapping_update_lock); + + list_for_each_entry(info, &xen_irq_list_head, list) { + if (info == NULL || info->type != IRQT_PIRQ) + continue; + irq = info->irq; + if (info->u.pirq.pirq == pirq) + goto out; + } + irq = -1; +out: + spin_unlock(&irq_mapping_update_lock); + + return irq; } -int xen_irq_from_pirq(unsigned pirq) ++++ ++++int xen_pirq_from_irq(unsigned irq) +++ { - return pirq_to_irq[pirq]; ++++ return pirq_from_irq(irq); +++ } - ++++EXPORT_SYMBOL_GPL(xen_pirq_from_irq); int bind_evtchn_to_irq(unsigned int evtchn) { int irq; @@@@@@ -785,17 -795,17 -768,17 -768,17 -814,15 +812,17 @@@@@@ irq = evtchn_to_irq[evtchn]; if (irq == -1) { - irq = find_unbound_irq(); + irq = xen_allocate_irq_dynamic(); + if (irq == -1) + goto out; - set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, - handle_fasteoi_irq, "event"); + irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, - -- handle_fasteoi_irq, "event"); + +++ handle_edge_irq, "event"); - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_evtchn_info(evtchn); + xen_irq_info_evtchn_init(irq, evtchn); } +out: spin_unlock(&irq_mapping_update_lock); return irq; @@@@@@ -929,11 -939,11 -912,12 -912,11 -945,10 +956,11 @@@@@@ int bind_evtchn_to_irqhandler(unsigned unsigned long irqflags, const char *devname, void *dev_id) { - - unsigned int irq; - - int retval; + + int irq, retval; irq = bind_evtchn_to_irq(evtchn); + if (irq < 0) + return irq; retval = request_irq(irq, handler, irqflags, devname, dev_id); if (retval != 0) { unbind_from_irq(irq); @@@@@@ -971,11 -981,11 -955,12 -954,11 -963,10 +998,11 @@@@@@ int bind_virq_to_irqhandler(unsigned in irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) { - - unsigned int irq; - - int retval; + + int irq, retval; irq = bind_virq_to_irq(virq, cpu); + if (irq < 0) + return irq; retval = request_irq(irq, handler, irqflags, devname, dev_id); if (retval != 0) { unbind_from_irq(irq); @@@@@@ -1145,60 -1155,57 -1130,60 -1128,60 -1126,20 +1172,57 @@@@@@ static void __xen_evtchn_do_upcall(void wmb(); #endif pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); - while (pending_words != 0) { + + start_word_idx = __this_cpu_read(current_word_idx); + start_bit_idx = __this_cpu_read(current_bit_idx); + + word_idx = start_word_idx; + + for (i = 0; pending_words != 0; i++) { unsigned long pending_bits; - int word_idx = __ffs(pending_words); - pending_words &= ~(1UL << word_idx); + unsigned long words; + + words = MASK_LSBS(pending_words, word_idx); + + /* + * If we masked out all events, wrap to beginning. + */ + if (words == 0) { + word_idx = 0; + bit_idx = 0; + continue; + } + word_idx = __ffs(words); + + pending_bits = active_evtchns(cpu, s, word_idx); + bit_idx = 0; /* usually scan entire word from start */ + if (word_idx == start_word_idx) { + /* We scan the starting word in two parts */ + if (i == 0) + /* 1st time: start in the middle */ + bit_idx = start_bit_idx; + else + /* 2nd time: mask bits done already */ + bit_idx &= (1UL << start_bit_idx) - 1; + } - while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) { - int bit_idx = __ffs(pending_bits); - int port = (word_idx * BITS_PER_LONG) + bit_idx; - int irq = evtchn_to_irq[port]; + do { + unsigned long bits; + int port, irq; struct irq_desc *desc; - mask_evtchn(port); - clear_evtchn(port); + bits = MASK_LSBS(pending_bits, bit_idx); + + /* If we masked out all events, move on. */ + if (bits == 0) + break; + + bit_idx = __ffs(bits); + + /* Process port. */ + port = (word_idx * BITS_PER_LONG) + bit_idx; + irq = evtchn_to_irq[port]; - -- mask_evtchn(port); - -- clear_evtchn(port); - -- if (irq != -1) { desc = irq_to_desc(irq); if (desc) @@@@@@ -1350,19 -1357,25 -1335,19 -1333,19 -1273,19 +1374,25 @@@@@@ static void disable_dynirq(struct irq_d mask_evtchn(evtchn); } -static void ack_dynirq(unsigned int irq) +static void ack_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(irq); + int evtchn = evtchn_from_irq(data->irq); - -- irq_move_masked_irq(data); - move_masked_irq(irq); + +++ irq_move_irq(data); if (VALID_EVTCHN(evtchn)) - --- unmask_evtchn(evtchn); + +++ clear_evtchn(evtchn); + ++ } + ++ -static int retrigger_dynirq(unsigned int irq) + +++static void mask_ack_dynirq(struct irq_data *data) + ++ { - int evtchn = evtchn_from_irq(irq); + +++ disable_dynirq(data); + +++ ack_dynirq(data); +} + +static int retrigger_dynirq(struct irq_data *data) +{ + int evtchn = evtchn_from_irq(data->irq); struct shared_info *sh = HYPERVISOR_shared_info; int ret = 0; @@@@@@ -1519,22 -1532,10 -1504,10 -1502,10 -1442,10 +1549,22 @@@@@@ void xen_poll_irq(int irq xen_poll_irq_timeout(irq, 0 /* no timeout */); } ++++/* Check whether the IRQ line is shared with other guests. */ ++++int xen_test_irq_shared(int irq) ++++{ ++++ struct irq_info *info = info_for_irq(irq); ++++ struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq }; ++++ ++++ if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) ++++ return 0; ++++ return !(irq_status.flags & XENIRQSTAT_shared); ++++} ++++EXPORT_SYMBOL_GPL(xen_test_irq_shared); ++++ void xen_irq_resume(void) { - unsigned int cpu, irq, evtchn; - struct irq_desc *desc; + unsigned int cpu, evtchn; + struct irq_info *info; init_evtchn_cpu_bindings(); @@@@@@ -1558,34 -1559,37 -1531,34 -1529,34 -1486,35 +1588,37 @@@@@@ } static struct irq_chip xen_dynamic_chip __read_mostly = { - .name = "xen-dyn", + .name = "xen-dyn", - .disable = disable_dynirq, - .mask = disable_dynirq, - .unmask = enable_dynirq, + .irq_disable = disable_dynirq, + .irq_mask = disable_dynirq, + .irq_unmask = enable_dynirq, - -- .irq_eoi = ack_dynirq, - .eoi = ack_dynirq, - .set_affinity = set_affinity_irq, - .retrigger = retrigger_dynirq, + +++ .irq_ack = ack_dynirq, + +++ .irq_mask_ack = mask_ack_dynirq, + +++ + .irq_set_affinity = set_affinity_irq, + .irq_retrigger = retrigger_dynirq, }; static struct irq_chip xen_pirq_chip __read_mostly = { - .name = "xen-pirq", - - .startup = startup_pirq, - .shutdown = shutdown_pirq, + .name = "xen-pirq", - .enable = enable_pirq, - .unmask = enable_pirq, + .irq_startup = startup_pirq, + .irq_shutdown = shutdown_pirq, - -- + .irq_enable = enable_pirq, - -- .irq_unmask = enable_pirq, - -- + .irq_disable = disable_pirq, - -- .irq_mask = disable_pirq, - -- .irq_ack = ack_pirq, - .disable = disable_pirq, - .mask = disable_pirq, + +++ .irq_mask = disable_dynirq, + +++ .irq_unmask = enable_dynirq, + ++ - .ack = ack_pirq, - .end = end_pirq, + +++ .irq_ack = eoi_pirq, + +++ .irq_eoi = eoi_pirq, + +++ .irq_mask_ack = mask_ack_pirq, - .set_affinity = set_affinity_irq, + .irq_set_affinity = set_affinity_irq, - .retrigger = retrigger_dynirq, + .irq_retrigger = retrigger_dynirq, }; static struct irq_chip xen_percpu_chip __read_mostly = {