These are the patches from an earlier submitted series that haven''t got ack-ed by anyone so far. 1: VT-d: enable for multi-vector MSI 2: x86: enable multi-vector MSI 3: pciif: add multi-vector-MSI command Signed-off-by: Jan Beulich <jbeulich@suse.com>
The main change being to make alloc_remap_entry() capable of allocating a block of entries. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/drivers/passthrough/vtd/intremap.c +++ b/xen/drivers/passthrough/vtd/intremap.c @@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm } /* - * Look for a free intr remap entry. + * Look for a free intr remap entry (or a contiguous set thereof). * Need hold iremap_lock, and setup returned entry before releasing lock. */ -static int alloc_remap_entry(struct iommu *iommu) +static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr) { struct iremap_entry *iremap_entries = NULL; struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); - int i; + unsigned int i, found; ASSERT( spin_is_locked(&ir_ctrl->iremap_lock) ); - for ( i = 0; i < IREMAP_ENTRY_NR; i++ ) + for ( found = i = 0; i < IREMAP_ENTRY_NR; i++ ) { struct iremap_entry *p; if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 ) @@ -220,7 +220,9 @@ static int alloc_remap_entry(struct iomm else p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)]; - if ( p->lo_val == 0 && p->hi_val == 0 ) /* a free entry */ + if ( p->lo_val || p->hi_val ) /* not a free entry */ + found = 0; + else if ( ++found == nr ) break; } @@ -228,7 +230,7 @@ static int alloc_remap_entry(struct iomm unmap_vtd_domain_page(iremap_entries); if ( i < IREMAP_ENTRY_NR ) - ir_ctrl->iremap_num++; + ir_ctrl->iremap_num += nr; return i; } @@ -293,7 +295,7 @@ static int ioapic_rte_to_remap_entry(str index = apic_pin_2_ir_idx[apic][ioapic_pin]; if ( index < 0 ) { - index = alloc_remap_entry(iommu); + index = alloc_remap_entry(iommu, 1); if ( index < IREMAP_ENTRY_NR ) apic_pin_2_ir_idx[apic][ioapic_pin] = index; } @@ -485,19 +487,18 @@ static void set_msi_source_id(struct pci } static int remap_entry_to_msi_msg( - struct iommu *iommu, struct msi_msg *msg) + struct iommu *iommu, struct msi_msg *msg, unsigned int index) { struct iremap_entry *iremap_entry = NULL, *iremap_entries; struct msi_msg_remap_entry *remap_rte; - int index; unsigned long flags; struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); remap_rte = (struct msi_msg_remap_entry *) msg; - index = (remap_rte->address_lo.index_15 << 15) | + index += (remap_rte->address_lo.index_15 << 15) | remap_rte->address_lo.index_0_14; - if ( index < 0 || index > IREMAP_ENTRY_NR - 1 ) + if ( index >= IREMAP_ENTRY_NR ) { dprintk(XENLOG_ERR VTDPREFIX, "%s: index (%d) for remap table is invalid !\n", @@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry( struct iremap_entry *iremap_entry = NULL, *iremap_entries; struct iremap_entry new_ire; struct msi_msg_remap_entry *remap_rte; - int index; + unsigned int index, i, nr = 1; unsigned long flags; struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); - remap_rte = (struct msi_msg_remap_entry *) msg; + if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) + nr = msi_desc->msi.nvec; + spin_lock_irqsave(&ir_ctrl->iremap_lock, flags); if ( msg == NULL ) { - /* Free specified unused IRTE */ - free_remap_entry(iommu, msi_desc->remap_index); + /* Free specified unused IRTEs */ + for ( i = 0; i < nr; ++i ) + free_remap_entry(iommu, msi_desc->remap_index + i); spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); return 0; } if ( msi_desc->remap_index < 0 ) { - /* - * TODO: Multiple-vector MSI requires allocating multiple continuous - * entries and configuring addr/data of msi_msg in different way. So - * alloca_remap_entry will be changed if enabling multiple-vector MSI - * in future. - */ - index = alloc_remap_entry(iommu); - msi_desc->remap_index = index; + index = alloc_remap_entry(iommu, nr); + for ( i = 0; i < nr; ++i ) + msi_desc[i].remap_index = index + i; } else index = msi_desc->remap_index; @@ -590,7 +589,8 @@ static int msi_msg_to_remap_entry( "%s: intremap index (%d) is larger than" " the maximum index (%d)!\n", __func__, index, IREMAP_ENTRY_NR - 1); - msi_desc->remap_index = -1; + for ( i = 0; i < nr; ++i ) + msi_desc[i].remap_index = -1; spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); return -EFAULT; } @@ -626,14 +626,18 @@ static int msi_msg_to_remap_entry( new_ire.lo.p = 1; /* finally, set present bit */ /* now construct new MSI/MSI-X rte entry */ + remap_rte = (struct msi_msg_remap_entry *)msg; remap_rte->address_lo.dontcare = 0; - remap_rte->address_lo.index_15 = (index >> 15) & 0x1; - remap_rte->address_lo.index_0_14 = index & 0x7fff; + i = index; + if ( !nr ) + i -= msi_desc->msi_attrib.entry_nr; + remap_rte->address_lo.index_15 = (i >> 15) & 0x1; + remap_rte->address_lo.index_0_14 = i & 0x7fff; remap_rte->address_lo.SHV = 1; remap_rte->address_lo.format = 1; remap_rte->address_hi = 0; - remap_rte->data = 0; + remap_rte->data = index - i; memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry)); iommu_flush_cache_entry(iremap_entry, sizeof(struct iremap_entry)); @@ -654,7 +658,9 @@ void msi_msg_read_remap_rte( drhd = pdev ? acpi_find_matched_drhd_unit(pdev) : hpet_to_drhd(msi_desc->hpet_id); if ( drhd ) - remap_entry_to_msi_msg(drhd->iommu, msg); + remap_entry_to_msi_msg(drhd->iommu, msg, + msi_desc->msi_attrib.type == PCI_CAP_ID_MSI + ? msi_desc->msi_attrib.entry_nr : 0); } int msi_msg_write_remap_rte( @@ -680,7 +686,7 @@ int __init intel_setup_hpet_msi(struct m return 0; spin_lock_irqsave(&ir_ctrl->iremap_lock, flags); - msi_desc->remap_index = alloc_remap_entry(iommu); + msi_desc->remap_index = alloc_remap_entry(iommu, 1); if ( msi_desc->remap_index >= IREMAP_ENTRY_NR ) { dprintk(XENLOG_ERR VTDPREFIX, _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
This implies - extending the public interface to have a way to request a block of MSIs - allocating a block of contiguous pIRQ-s for the target domain (but note that the Xen IRQs allocated have no need of being contiguous) - repeating certain operations for all involved IRQs - fixing multi_msi_enable() - adjusting the mask bit accesses for maskable MSIs Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/irq.c +++ b/xen/arch/x86/irq.c @@ -1863,6 +1863,25 @@ int get_free_pirq(struct domain *d, int return -ENOSPC; } +int get_free_pirqs(struct domain *d, unsigned int nr) +{ + unsigned int i, found = 0; + + ASSERT(spin_is_locked(&d->event_lock)); + + for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i ) + if ( is_free_pirq(d, pirq_info(d, i)) ) + { + pirq_get_info(d, i); + if ( ++found == nr ) + return i; + } + else + found = 0; + + return -ENOSPC; +} + int map_domain_pirq( struct domain *d, int pirq, int irq, int type, void *data) { @@ -1918,11 +1937,12 @@ int map_domain_pirq( desc = irq_to_desc(irq); - if ( type == MAP_PIRQ_TYPE_MSI ) + if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI ) { struct msi_info *msi = (struct msi_info *)data; struct msi_desc *msi_desc; struct pci_dev *pdev; + unsigned int nr = 0; ASSERT(spin_is_locked(&pcidevs_lock)); @@ -1933,7 +1953,14 @@ int map_domain_pirq( pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn); ret = pci_enable_msi(msi, &msi_desc); if ( ret ) + { + if ( ret > 0 ) + { + msi->entry_nr = ret; + ret = -ENFILE; + } goto done; + } spin_lock_irqsave(&desc->lock, flags); @@ -1947,25 +1974,73 @@ int map_domain_pirq( goto done; } - ret = setup_msi_irq(desc, msi_desc); - if ( ret ) + while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) ) { + if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV && + !desc->arch.used_vectors ) + { + desc->arch.used_vectors = &pdev->arch.used_vectors; + if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED ) + { + int vector = desc->arch.vector; + + ASSERT(!test_bit(vector, desc->arch.used_vectors)); + set_bit(vector, desc->arch.used_vectors); + } + } + if ( type == MAP_PIRQ_TYPE_MSI || + msi_desc->msi_attrib.type != PCI_CAP_ID_MSI || + ++nr == msi->entry_nr ) + break; + + set_domain_irq_pirq(d, irq, info); spin_unlock_irqrestore(&desc->lock, flags); - pci_disable_msi(msi_desc); - goto done; + + info = NULL; + irq = create_irq(NUMA_NO_NODE); + ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info) + : irq; + if ( ret ) + break; + msi_desc[nr].irq = irq; + + if ( irq_permit_access(d, irq) != 0 ) + printk(XENLOG_G_WARNING + "dom%d: could not permit access to IRQ%d (pirq %d)\n", + d->domain_id, irq, pirq); + + desc = irq_to_desc(irq); + spin_lock_irqsave(&desc->lock, flags); + + if ( desc->handler != &no_irq_type ) + { + dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use (%s)\n", + d->domain_id, irq, pirq + nr, desc->handler->typename); + ret = -EBUSY; + break; + } } - if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV - && !desc->arch.used_vectors ) + if ( ret ) { - desc->arch.used_vectors = &pdev->arch.used_vectors; - if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED ) + spin_unlock_irqrestore(&desc->lock, flags); + while ( nr-- ) { - int vector = desc->arch.vector; - ASSERT(!test_bit(vector, desc->arch.used_vectors)); - - set_bit(vector, desc->arch.used_vectors); + if ( irq >= 0 ) + { + if ( irq_deny_access(d, irq) ) + printk(XENLOG_G_ERR + "dom%d: could not revoke access to IRQ%d (pirq %d)\n", + d->domain_id, irq, pirq); + destroy_irq(irq); + } + if ( info ) + cleanup_domain_irq_pirq(d, irq, info); + info = pirq_info(d, pirq + nr); + irq = info->arch.irq; } + pci_disable_msi(msi_desc); + goto done; } set_domain_irq_pirq(d, irq, info); @@ -1996,7 +2071,8 @@ int unmap_domain_pirq(struct domain *d, { unsigned long flags; struct irq_desc *desc; - int irq, ret = 0; + int irq, ret = 0, rc; + unsigned int i, nr = 1; bool_t forced_unbind; struct pirq *info; struct msi_desc *msi_desc = NULL; @@ -2018,6 +2094,18 @@ int unmap_domain_pirq(struct domain *d, desc = irq_to_desc(irq); msi_desc = desc->msi_desc; + if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) + { + if ( msi_desc->msi_attrib.entry_nr ) + { + printk(XENLOG_G_ERR + "dom%d: trying to unmap secondary MSI pirq %d\n", + d->domain_id, pirq); + ret = -EBUSY; + goto done; + } + nr = msi_desc->msi.nvec; + } ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc); if ( ret ) @@ -2033,37 +2121,83 @@ int unmap_domain_pirq(struct domain *d, spin_lock_irqsave(&desc->lock, flags); - BUG_ON(irq != domain_pirq_to_irq(d, pirq)); - - if ( !forced_unbind ) - clear_domain_irq_pirq(d, irq, info); - else + for ( i = 0; ; ) { - info->arch.irq = -irq; - radix_tree_replace_slot( - radix_tree_lookup_slot(&d->arch.irq_pirq, irq), - radix_tree_int_to_ptr(-pirq)); + BUG_ON(irq != domain_pirq_to_irq(d, pirq + i)); + + if ( !forced_unbind ) + clear_domain_irq_pirq(d, irq, info); + else + { + info->arch.irq = -irq; + radix_tree_replace_slot( + radix_tree_lookup_slot(&d->arch.irq_pirq, irq), + radix_tree_int_to_ptr(-pirq)); + } + + if ( msi_desc ) + { + desc->handler = &no_irq_type; + desc->msi_desc = NULL; + } + + if ( ++i == nr ) + break; + + spin_unlock_irqrestore(&desc->lock, flags); + + if ( !forced_unbind ) + cleanup_domain_irq_pirq(d, irq, info); + + rc = irq_deny_access(d, irq); + if ( rc ) + { + printk(XENLOG_G_ERR + "dom%d: could not deny access to IRQ%d (pirq %d)\n", + d->domain_id, irq, pirq + i); + ret = rc; + } + + do { + info = pirq_info(d, pirq + i); + if ( info && (irq = info->arch.irq) > 0 ) + break; + printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n", + d->domain_id, pirq + i); + } while ( ++i < nr ); + + if ( i == nr ) + { + desc = NULL; + break; + } + + desc = irq_to_desc(irq); + BUG_ON(desc->msi_desc != msi_desc + i); + + spin_lock_irqsave(&desc->lock, flags); } - if ( msi_desc ) + if ( desc ) { - desc->handler = &no_irq_type; - desc->msi_desc = NULL; + spin_unlock_irqrestore(&desc->lock, flags); + + if ( !forced_unbind ) + cleanup_domain_irq_pirq(d, irq, info); + + rc = irq_deny_access(d, irq); + if ( rc ) + { + printk(XENLOG_G_ERR + "dom%d: could not deny access to IRQ%d (pirq %d)\n", + d->domain_id, irq, pirq + nr - 1); + ret = rc; + } } - spin_unlock_irqrestore(&desc->lock, flags); if (msi_desc) msi_free_irq(msi_desc); - if ( !forced_unbind ) - cleanup_domain_irq_pirq(d, irq, info); - - ret = irq_deny_access(d, irq); - if ( ret ) - printk(XENLOG_G_ERR - "dom%d: could not deny access to IRQ%d (pirq %d)\n", - d->domain_id, irq, pirq); - done: return ret; } --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -236,6 +236,11 @@ static int write_msi_msg(struct msi_desc u8 bus = dev->bus; u8 slot = PCI_SLOT(dev->devfn); u8 func = PCI_FUNC(dev->devfn); + int nr = entry->msi_attrib.entry_nr; + + ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr); + if ( nr ) + return 0; pci_conf_write32(seg, bus, slot, func, msi_lower_address_reg(pos), msg->address_lo); @@ -359,8 +364,8 @@ static void msi_set_mask_bit(struct irq_ u8 func = PCI_FUNC(entry->dev->devfn); mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos); - mask_bits &= ~(1); - mask_bits |= flag; + mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr); + mask_bits |= (u32)flag << entry->msi_attrib.entry_nr; pci_conf_write32(seg, bus, slot, func, entry->msi.mpos, mask_bits); } break; @@ -384,10 +389,11 @@ static int msi_get_mask_bit(const struct case PCI_CAP_ID_MSI: if (!entry->dev || !entry->msi_attrib.maskbit) break; - return pci_conf_read32(entry->dev->seg, entry->dev->bus, - PCI_SLOT(entry->dev->devfn), - PCI_FUNC(entry->dev->devfn), - entry->msi.mpos) & 1; + return (pci_conf_read32(entry->dev->seg, entry->dev->bus, + PCI_SLOT(entry->dev->devfn), + PCI_FUNC(entry->dev->devfn), + entry->msi.mpos) >> + entry->msi_attrib.entry_nr) & 1; case PCI_CAP_ID_MSIX: return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1; } @@ -453,17 +459,20 @@ static hw_irq_controller pci_msi_nonmask .set_affinity = set_msi_affinity }; -static struct msi_desc* alloc_msi_entry(void) +static struct msi_desc *alloc_msi_entry(unsigned int nr) { struct msi_desc *entry; - entry = xmalloc(struct msi_desc); + entry = xmalloc_array(struct msi_desc, nr); if ( !entry ) return NULL; INIT_LIST_HEAD(&entry->list); - entry->dev = NULL; - entry->remap_index = -1; + while ( nr-- ) + { + entry[nr].dev = NULL; + entry[nr].remap_index = -1; + } return entry; } @@ -488,17 +497,24 @@ int __setup_msi_irq(struct irq_desc *des int msi_free_irq(struct msi_desc *entry) { - destroy_irq(entry->irq); + unsigned int nr = entry->msi.nvec; + if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) { unsigned long start; start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1); msix_put_fixmap(entry->dev, virt_to_fix(start)); + nr = 1; } - /* Free the unused IRTE if intr remap enabled */ - if ( iommu_intremap ) - iommu_update_ire_from_msi(entry, NULL); + while ( nr-- ) + { + destroy_irq(entry[nr].irq); + + /* Free the unused IRTE if intr remap enabled */ + if ( iommu_intremap ) + iommu_update_ire_from_msi(entry + nr, NULL); + } list_del(&entry->list); xfree(entry); @@ -531,11 +547,12 @@ static struct msi_desc *find_msi_entry(s **/ static int msi_capability_init(struct pci_dev *dev, int irq, - struct msi_desc **desc) + struct msi_desc **desc, + unsigned int nvec) { struct msi_desc *entry; int pos; - unsigned int maxvec, mpos; + unsigned int i, maxvec, mpos; u16 control, seg = dev->seg; u8 bus = dev->bus; u8 slot = PCI_SLOT(dev->devfn); @@ -545,27 +562,34 @@ static int msi_capability_init(struct pc pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI); control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos)); maxvec = multi_msi_capable(control); + if ( nvec > maxvec ) + return maxvec; control &= ~PCI_MSI_FLAGS_QSIZE; + multi_msi_enable(control, nvec); /* MSI Entry Initialization */ msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */ - entry = alloc_msi_entry(); + entry = alloc_msi_entry(nvec); if ( !entry ) return -ENOMEM; - entry->msi_attrib.type = PCI_CAP_ID_MSI; - entry->msi_attrib.is_64 = is_64bit_address(control); - entry->msi_attrib.entry_nr = 0; - entry->msi_attrib.maskbit = is_mask_bit_support(control); - entry->msi_attrib.masked = 1; - entry->msi_attrib.pos = pos; mpos = msi_mask_bits_reg(pos, is_64bit_address(control)); - entry->msi.nvec = 1; + for ( i = 0; i < nvec; ++i ) + { + entry[i].msi_attrib.type = PCI_CAP_ID_MSI; + entry[i].msi_attrib.is_64 = is_64bit_address(control); + entry[i].msi_attrib.entry_nr = i; + entry[i].msi_attrib.maskbit = is_mask_bit_support(control); + entry[i].msi_attrib.masked = 1; + entry[i].msi_attrib.pos = pos; + if ( entry[i].msi_attrib.maskbit ) + entry[i].msi.mpos = mpos; + entry[i].msi.nvec = 0; + entry[i].dev = dev; + } + entry->msi.nvec = nvec; entry->irq = irq; - if ( is_mask_bit_support(control) ) - entry->msi.mpos = mpos; - entry->dev = dev; if ( entry->msi_attrib.maskbit ) { u32 maskbits; @@ -693,7 +717,7 @@ static int msix_capability_init(struct p if ( desc ) { - entry = alloc_msi_entry(); + entry = alloc_msi_entry(1); if ( !entry ) return -ENOMEM; ASSERT(msi); @@ -851,7 +875,6 @@ static int msix_capability_init(struct p static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc) { - int status; struct pci_dev *pdev; struct msi_desc *old_desc; @@ -880,8 +903,7 @@ static int __pci_enable_msi(struct msi_i pci_disable_msi(old_desc); } - status = msi_capability_init(pdev, msi->irq, desc); - return status; + return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr); } static void __pci_disable_msi(struct msi_desc *entry) @@ -1101,6 +1123,8 @@ int pci_restore_msi_state(struct pci_dev list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list ) { + unsigned int i = 0, nr = 1; + irq = entry->irq; desc = &irq_desc[irq]; @@ -1110,30 +1134,58 @@ int pci_restore_msi_state(struct pci_dev if (desc->msi_desc != entry) { + bogus: dprintk(XENLOG_ERR, - "Restore MSI for dev %04x:%02x:%02x:%x not set before?\n", + "Restore MSI for %04x:%02x:%02x:%u entry %u not set?\n", pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn)); + PCI_FUNC(pdev->devfn), i); spin_unlock_irqrestore(&desc->lock, flags); return -EINVAL; } if ( entry->msi_attrib.type == PCI_CAP_ID_MSI ) + { msi_set_enable(pdev, 0); + nr = entry->msi.nvec; + } else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) msix_set_enable(pdev, 0); msg = entry->msg; write_msi_msg(entry, &msg); - msi_set_mask_bit(desc, entry->msi_attrib.masked); + for ( i = 0; ; ) + { + msi_set_mask_bit(desc, entry[i].msi_attrib.masked); + spin_unlock_irqrestore(&desc->lock, flags); + + if ( !--nr ) + break; + + desc = &irq_desc[entry[++i].irq]; + spin_lock_irqsave(&desc->lock, flags); + if ( desc->msi_desc != entry + i ) + goto bogus; + } + + spin_unlock_irqrestore(&desc->lock, flags); if ( entry->msi_attrib.type == PCI_CAP_ID_MSI ) + { + unsigned int cpos = msi_control_reg(entry->msi_attrib.pos); + u16 control = pci_conf_read16(pdev->seg, pdev->bus, + PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), cpos); + + control &= ~PCI_MSI_FLAGS_QSIZE; + multi_msi_enable(control, entry->msi.nvec); + pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), cpos, control); + msi_set_enable(pdev, 1); + } else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) msix_set_enable(pdev, 1); - - spin_unlock_irqrestore(&desc->lock, flags); } return 0; --- a/xen/arch/x86/physdev.c +++ b/xen/arch/x86/physdev.c @@ -140,8 +140,11 @@ int physdev_map_pirq(domid_t domid, int break; case MAP_PIRQ_TYPE_MSI: + if ( !msi->table_base ) + msi->entry_nr = 1; irq = *index; if ( irq == -1 ) + case MAP_PIRQ_TYPE_MULTI_MSI: irq = create_irq(NUMA_NO_NODE); if ( irq < nr_irqs_gsi || irq >= nr_irqs ) @@ -179,6 +182,30 @@ int physdev_map_pirq(domid_t domid, int goto done; } } + else if ( type == MAP_PIRQ_TYPE_MULTI_MSI ) + { + if ( msi->entry_nr <= 0 || msi->entry_nr > 32 ) + ret = -EDOM; + else if ( msi->entry_nr != 1 && !iommu_intremap ) + ret = -EOPNOTSUPP; + else + { + while ( msi->entry_nr & (msi->entry_nr - 1) ) + msi->entry_nr += msi->entry_nr & -msi->entry_nr; + pirq = get_free_pirqs(d, msi->entry_nr); + if ( pirq < 0 ) + { + while ( (msi->entry_nr >>= 1) > 1 ) + if ( get_free_pirqs(d, msi->entry_nr) > 0 ) + break; + dprintk(XENLOG_G_ERR, "dom%d: no block of %d free pirqs\n", + d->domain_id, msi->entry_nr << 1); + ret = pirq; + } + } + if ( ret < 0 ) + goto done; + } else { pirq = get_free_pirq(d, type); @@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int done: spin_unlock(&d->event_lock); spin_unlock(&pcidevs_lock); - if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) ) - destroy_irq(irq); + if ( ret != 0 ) + switch ( type ) + { + case MAP_PIRQ_TYPE_MSI: + if ( *index == -1 ) + case MAP_PIRQ_TYPE_MULTI_MSI: + destroy_irq(irq); + break; + } free_domain: rcu_unlock_domain(d); return ret; @@ -390,14 +424,22 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H if ( copy_from_guest(&map, arg, 1) != 0 ) break; - if ( map.type == MAP_PIRQ_TYPE_MSI_SEG ) + switch ( map.type ) { + case MAP_PIRQ_TYPE_MSI_SEG: map.type = MAP_PIRQ_TYPE_MSI; msi.seg = map.bus >> 16; - } - else - { + break; + + case MAP_PIRQ_TYPE_MULTI_MSI: + if ( map.table_base ) + return -EINVAL; + msi.seg = map.bus >> 16; + break; + + default: msi.seg = 0; + break; } msi.bus = map.bus; msi.devfn = map.devfn; @@ -406,6 +448,8 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H ret = physdev_map_pirq(map.domid, map.type, &map.index, &map.pirq, &msi); + if ( map.type == MAP_PIRQ_TYPE_MULTI_MSI ) + map.entry_nr = msi.entry_nr; if ( __copy_to_guest(arg, &map, 1) ) ret = -EFAULT; break; --- a/xen/include/asm-x86/irq.h +++ b/xen/include/asm-x86/irq.h @@ -141,6 +141,7 @@ int map_domain_pirq(struct domain *d, in void *data); int unmap_domain_pirq(struct domain *d, int pirq); int get_free_pirq(struct domain *d, int type); +int get_free_pirqs(struct domain *, unsigned int nr); void free_domain_pirqs(struct domain *d); int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq); int unmap_domain_pirq_emuirq(struct domain *d, int pirq); --- a/xen/include/asm-x86/msi.h +++ b/xen/include/asm-x86/msi.h @@ -148,7 +148,7 @@ int msi_free_irq(struct msi_desc *entry) #define multi_msi_capable(control) \ (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1)) #define multi_msi_enable(control, num) \ - control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE); + control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE); #define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) #define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) #define msi_enable(control, num) multi_msi_enable(control, num); \ --- a/xen/include/public/physdev.h +++ b/xen/include/public/physdev.h @@ -151,21 +151,22 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); #define MAP_PIRQ_TYPE_GSI 0x1 #define MAP_PIRQ_TYPE_UNKNOWN 0x2 #define MAP_PIRQ_TYPE_MSI_SEG 0x3 +#define MAP_PIRQ_TYPE_MULTI_MSI 0x4 #define PHYSDEVOP_map_pirq 13 struct physdev_map_pirq { domid_t domid; /* IN */ int type; - /* IN */ + /* IN (ignored for ..._MULTI_MSI) */ int index; /* IN or OUT */ int pirq; - /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */ + /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */ int bus; /* IN */ int devfn; - /* IN */ + /* IN (also OUT for ..._MULTI_MSI) */ int entry_nr; /* IN */ uint64_t table_base; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
The requested vector count is to be passed in struct xen_pci_op''s info field. Upon failure, if a smaller vector count might work, the backend will pass that smaller count in the value field (which so far is always being set to zero in the error path). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/include/public/io/pciif.h +++ b/xen/include/public/io/pciif.h @@ -46,6 +46,7 @@ #define XEN_PCI_OP_aer_resume (7) #define XEN_PCI_OP_aer_mmio (8) #define XEN_PCI_OP_aer_slotreset (9) +#define XEN_PCI_OP_enable_multi_msi (10) /* xen_pci_op error numbers */ #define XEN_PCI_ERR_success (0) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Andrew Cooper
2013-Jul-16 11:15 UTC
Re: [PATCH resend 1/3] VT-d: enable for multi-vector MSI
On 16/07/13 11:13, Jan Beulich wrote:> The main change being to make alloc_remap_entry() capable of allocating > a block of entries. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/drivers/passthrough/vtd/intremap.c > +++ b/xen/drivers/passthrough/vtd/intremap.c > @@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm > } > > /* > - * Look for a free intr remap entry. > + * Look for a free intr remap entry (or a contiguous set thereof). > * Need hold iremap_lock, and setup returned entry before releasing lock. > */ > -static int alloc_remap_entry(struct iommu *iommu) > +static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr)alloc_remap_entries() now that it unconditionally takes a count (and you already have to patch all callsites)> { > struct iremap_entry *iremap_entries = NULL; > struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); > - int i; > + unsigned int i, found; > > ASSERT( spin_is_locked(&ir_ctrl->iremap_lock) ); > > - for ( i = 0; i < IREMAP_ENTRY_NR; i++ ) > + for ( found = i = 0; i < IREMAP_ENTRY_NR; i++ ) > { > struct iremap_entry *p; > if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 ) > @@ -220,7 +220,9 @@ static int alloc_remap_entry(struct iomm > else > p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)]; > > - if ( p->lo_val == 0 && p->hi_val == 0 ) /* a free entry */ > + if ( p->lo_val || p->hi_val ) /* not a free entry */ > + found = 0; > + else if ( ++found == nr ) > break; > } > > @@ -228,7 +230,7 @@ static int alloc_remap_entry(struct iomm > unmap_vtd_domain_page(iremap_entries); > > if ( i < IREMAP_ENTRY_NR ) > - ir_ctrl->iremap_num++; > + ir_ctrl->iremap_num += nr; > return i; > } > > @@ -293,7 +295,7 @@ static int ioapic_rte_to_remap_entry(str > index = apic_pin_2_ir_idx[apic][ioapic_pin]; > if ( index < 0 ) > { > - index = alloc_remap_entry(iommu); > + index = alloc_remap_entry(iommu, 1); > if ( index < IREMAP_ENTRY_NR ) > apic_pin_2_ir_idx[apic][ioapic_pin] = index; > } > @@ -485,19 +487,18 @@ static void set_msi_source_id(struct pci > } > > static int remap_entry_to_msi_msg( > - struct iommu *iommu, struct msi_msg *msg) > + struct iommu *iommu, struct msi_msg *msg, unsigned int index) > { > struct iremap_entry *iremap_entry = NULL, *iremap_entries; > struct msi_msg_remap_entry *remap_rte; > - int index; > unsigned long flags; > struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); > > remap_rte = (struct msi_msg_remap_entry *) msg; > - index = (remap_rte->address_lo.index_15 << 15) | > + index += (remap_rte->address_lo.index_15 << 15) | > remap_rte->address_lo.index_0_14; > > - if ( index < 0 || index > IREMAP_ENTRY_NR - 1 ) > + if ( index >= IREMAP_ENTRY_NR ) > { > dprintk(XENLOG_ERR VTDPREFIX, > "%s: index (%d) for remap table is invalid !\n", > @@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry( > struct iremap_entry *iremap_entry = NULL, *iremap_entries; > struct iremap_entry new_ire; > struct msi_msg_remap_entry *remap_rte; > - int index; > + unsigned int index, i, nr = 1;Does this hardcoding of nr=1 defeat the purpose of the following logic? ~Andrew> unsigned long flags; > struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); > > - remap_rte = (struct msi_msg_remap_entry *) msg; > + if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) > + nr = msi_desc->msi.nvec; > + > spin_lock_irqsave(&ir_ctrl->iremap_lock, flags); > > if ( msg == NULL ) > { > - /* Free specified unused IRTE */ > - free_remap_entry(iommu, msi_desc->remap_index); > + /* Free specified unused IRTEs */ > + for ( i = 0; i < nr; ++i ) > + free_remap_entry(iommu, msi_desc->remap_index + i); > spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); > return 0; > } > > if ( msi_desc->remap_index < 0 ) > { > - /* > - * TODO: Multiple-vector MSI requires allocating multiple continuous > - * entries and configuring addr/data of msi_msg in different way. So > - * alloca_remap_entry will be changed if enabling multiple-vector MSI > - * in future. > - */ > - index = alloc_remap_entry(iommu); > - msi_desc->remap_index = index; > + index = alloc_remap_entry(iommu, nr); > + for ( i = 0; i < nr; ++i ) > + msi_desc[i].remap_index = index + i; > } > else > index = msi_desc->remap_index; > @@ -590,7 +589,8 @@ static int msi_msg_to_remap_entry( > "%s: intremap index (%d) is larger than" > " the maximum index (%d)!\n", > __func__, index, IREMAP_ENTRY_NR - 1); > - msi_desc->remap_index = -1; > + for ( i = 0; i < nr; ++i ) > + msi_desc[i].remap_index = -1; > spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); > return -EFAULT; > } > @@ -626,14 +626,18 @@ static int msi_msg_to_remap_entry( > new_ire.lo.p = 1; /* finally, set present bit */ > > /* now construct new MSI/MSI-X rte entry */ > + remap_rte = (struct msi_msg_remap_entry *)msg; > remap_rte->address_lo.dontcare = 0; > - remap_rte->address_lo.index_15 = (index >> 15) & 0x1; > - remap_rte->address_lo.index_0_14 = index & 0x7fff; > + i = index; > + if ( !nr ) > + i -= msi_desc->msi_attrib.entry_nr; > + remap_rte->address_lo.index_15 = (i >> 15) & 0x1; > + remap_rte->address_lo.index_0_14 = i & 0x7fff; > remap_rte->address_lo.SHV = 1; > remap_rte->address_lo.format = 1; > > remap_rte->address_hi = 0; > - remap_rte->data = 0; > + remap_rte->data = index - i; > > memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry)); > iommu_flush_cache_entry(iremap_entry, sizeof(struct iremap_entry)); > @@ -654,7 +658,9 @@ void msi_msg_read_remap_rte( > drhd = pdev ? acpi_find_matched_drhd_unit(pdev) > : hpet_to_drhd(msi_desc->hpet_id); > if ( drhd ) > - remap_entry_to_msi_msg(drhd->iommu, msg); > + remap_entry_to_msi_msg(drhd->iommu, msg, > + msi_desc->msi_attrib.type == PCI_CAP_ID_MSI > + ? msi_desc->msi_attrib.entry_nr : 0); > } > > int msi_msg_write_remap_rte( > @@ -680,7 +686,7 @@ int __init intel_setup_hpet_msi(struct m > return 0; > > spin_lock_irqsave(&ir_ctrl->iremap_lock, flags); > - msi_desc->remap_index = alloc_remap_entry(iommu); > + msi_desc->remap_index = alloc_remap_entry(iommu, 1); > if ( msi_desc->remap_index >= IREMAP_ENTRY_NR ) > { > dprintk(XENLOG_ERR VTDPREFIX, > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Andrew Cooper
2013-Jul-16 11:19 UTC
Re: [PATCH resend 3/3] pciif: add multi-vector-MSI command
On 16/07/13 11:15, Jan Beulich wrote:> The requested vector count is to be passed in struct xen_pci_op''s info > field. Upon failure, if a smaller vector count might work, the backend > will pass that smaller count in the value field (which so far is always > being set to zero in the error path). > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/include/public/io/pciif.h > +++ b/xen/include/public/io/pciif.h > @@ -46,6 +46,7 @@ > #define XEN_PCI_OP_aer_resume (7) > #define XEN_PCI_OP_aer_mmio (8) > #define XEN_PCI_OP_aer_slotreset (9) > +#define XEN_PCI_OP_enable_multi_msi (10)/* Be sure to bump this number if you change this file */ #define XEN_PCI_MAGIC "7" Should you bump this version, or is the comment stale? The only in-tree consumer I can find is MiniOS''s pcifront, which writes it into xenstore. ~Andrew> > /* xen_pci_op error numbers */ > #define XEN_PCI_ERR_success (0) > > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Jul-16 11:32 UTC
Re: [PATCH resend 1/3] VT-d: enable for multi-vector MSI
>>> On 16.07.13 at 13:15, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > On 16/07/13 11:13, Jan Beulich wrote: >> The main change being to make alloc_remap_entry() capable of allocating >> a block of entries. >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> >> --- a/xen/drivers/passthrough/vtd/intremap.c >> +++ b/xen/drivers/passthrough/vtd/intremap.c >> @@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm >> } >> >> /* >> - * Look for a free intr remap entry. >> + * Look for a free intr remap entry (or a contiguous set thereof). >> * Need hold iremap_lock, and setup returned entry before releasing lock. >> */ >> -static int alloc_remap_entry(struct iommu *iommu) >> +static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr) > > alloc_remap_entries() now that it unconditionally takes a count (and you > already have to patch all callsites)Actually I checked with Linux, and the use singular in the function name too (albeit the name isn''t identical).>> @@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry( >> struct iremap_entry *iremap_entry = NULL, *iremap_entries; >> struct iremap_entry new_ire; >> struct msi_msg_remap_entry *remap_rte; >> - int index; >> + unsigned int index, i, nr = 1; > > Does this hardcoding of nr=1 defeat the purpose of the following logic?In what way?>> unsigned long flags; >> struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); >> >> - remap_rte = (struct msi_msg_remap_entry *) msg; >> + if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) >> + nr = msi_desc->msi.nvec;The logic here makes the vector count 1 for MSI-X and msi.nvec for MSI. Jan
Jan Beulich
2013-Jul-16 11:35 UTC
Re: [PATCH resend 3/3] pciif: add multi-vector-MSI command
>>> On 16.07.13 at 13:19, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > On 16/07/13 11:15, Jan Beulich wrote: >> The requested vector count is to be passed in struct xen_pci_op''s info >> field. Upon failure, if a smaller vector count might work, the backend >> will pass that smaller count in the value field (which so far is always >> being set to zero in the error path). >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> >> --- a/xen/include/public/io/pciif.h >> +++ b/xen/include/public/io/pciif.h >> @@ -46,6 +46,7 @@ >> #define XEN_PCI_OP_aer_resume (7) >> #define XEN_PCI_OP_aer_mmio (8) >> #define XEN_PCI_OP_aer_slotreset (9) >> +#define XEN_PCI_OP_enable_multi_msi (10) > > /* Be sure to bump this number if you change this file */ > #define XEN_PCI_MAGIC "7" > > Should you bump this version, or is the comment stale? The only in-tree > consumer I can find is MiniOS''s pcifront, which writes it into xenstore.Whether it''s stale I don''t know (likely it is considering that you found just a single consumer), but bumping a revision just because of the (backwards compatible) addition seems superfluous to me. This would be different if I changed the existing enable_msi... Jan
On 16/07/13 11:14, Jan Beulich wrote:> This implies > - extending the public interface to have a way to request a block of > MSIs > - allocating a block of contiguous pIRQ-s for the target domain (but > note that the Xen IRQs allocated have no need of being contiguous) > - repeating certain operations for all involved IRQs > - fixing multi_msi_enable() > - adjusting the mask bit accesses for maskable MSIs > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/arch/x86/irq.c > +++ b/xen/arch/x86/irq.c > @@ -1863,6 +1863,25 @@ int get_free_pirq(struct domain *d, int > return -ENOSPC; > } > > +int get_free_pirqs(struct domain *d, unsigned int nr) > +{ > + unsigned int i, found = 0; > + > + ASSERT(spin_is_locked(&d->event_lock)); > + > + for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i ) > + if ( is_free_pirq(d, pirq_info(d, i)) ) > + { > + pirq_get_info(d, i); > + if ( ++found == nr ) > + return i; > + } > + else > + found = 0; > + > + return -ENOSPC; > +} > +Is there any reason why this loop is backwards? Unless I am mistaken, guests can choose their own pirqs when binding them, reducing the likelyhood that the top of the available space will be free.> int map_domain_pirq( > struct domain *d, int pirq, int irq, int type, void *data) > { > @@ -1918,11 +1937,12 @@ int map_domain_pirq( > > desc = irq_to_desc(irq); > > - if ( type == MAP_PIRQ_TYPE_MSI ) > + if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI ) > { > struct msi_info *msi = (struct msi_info *)data; > struct msi_desc *msi_desc; > struct pci_dev *pdev; > + unsigned int nr = 0; > > ASSERT(spin_is_locked(&pcidevs_lock)); > > @@ -1933,7 +1953,14 @@ int map_domain_pirq( > pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn); > ret = pci_enable_msi(msi, &msi_desc); > if ( ret ) > + { > + if ( ret > 0 ) > + { > + msi->entry_nr = ret; > + ret = -ENFILE; > + } > goto done; > + } > > spin_lock_irqsave(&desc->lock, flags); > > @@ -1947,25 +1974,73 @@ int map_domain_pirq( > goto done; > } > > - ret = setup_msi_irq(desc, msi_desc); > - if ( ret ) > + while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) ) > { > + if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV && > + !desc->arch.used_vectors ) > + { > + desc->arch.used_vectors = &pdev->arch.used_vectors; > + if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED ) > + { > + int vector = desc->arch.vector; > + > + ASSERT(!test_bit(vector, desc->arch.used_vectors)); > + set_bit(vector, desc->arch.used_vectors); > + } > + } > + if ( type == MAP_PIRQ_TYPE_MSI || > + msi_desc->msi_attrib.type != PCI_CAP_ID_MSI || > + ++nr == msi->entry_nr ) > + break; > + > + set_domain_irq_pirq(d, irq, info); > spin_unlock_irqrestore(&desc->lock, flags); > - pci_disable_msi(msi_desc); > - goto done; > + > + info = NULL; > + irq = create_irq(NUMA_NO_NODE); > + ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info) > + : irq; > + if ( ret ) > + break; > + msi_desc[nr].irq = irq; > + > + if ( irq_permit_access(d, irq) != 0 ) > + printk(XENLOG_G_WARNING > + "dom%d: could not permit access to IRQ%d (pirq %d)\n", > + d->domain_id, irq, pirq); > + > + desc = irq_to_desc(irq); > + spin_lock_irqsave(&desc->lock, flags); > + > + if ( desc->handler != &no_irq_type ) > + { > + dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use (%s)\n", > + d->domain_id, irq, pirq + nr, desc->handler->typename); > + ret = -EBUSY; > + break; > + } > } > > - if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV > - && !desc->arch.used_vectors ) > + if ( ret ) > { > - desc->arch.used_vectors = &pdev->arch.used_vectors; > - if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED ) > + spin_unlock_irqrestore(&desc->lock, flags); > + while ( nr-- ) > { > - int vector = desc->arch.vector; > - ASSERT(!test_bit(vector, desc->arch.used_vectors)); > - > - set_bit(vector, desc->arch.used_vectors); > + if ( irq >= 0 ) > + { > + if ( irq_deny_access(d, irq) ) > + printk(XENLOG_G_ERR > + "dom%d: could not revoke access to IRQ%d (pirq %d)\n", > + d->domain_id, irq, pirq); > + destroy_irq(irq); > + } > + if ( info ) > + cleanup_domain_irq_pirq(d, irq, info); > + info = pirq_info(d, pirq + nr); > + irq = info->arch.irq; > } > + pci_disable_msi(msi_desc); > + goto done; > } > > set_domain_irq_pirq(d, irq, info); > @@ -1996,7 +2071,8 @@ int unmap_domain_pirq(struct domain *d, > { > unsigned long flags; > struct irq_desc *desc; > - int irq, ret = 0; > + int irq, ret = 0, rc; > + unsigned int i, nr = 1; > bool_t forced_unbind; > struct pirq *info; > struct msi_desc *msi_desc = NULL; > @@ -2018,6 +2094,18 @@ int unmap_domain_pirq(struct domain *d, > > desc = irq_to_desc(irq); > msi_desc = desc->msi_desc; > + if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) > + { > + if ( msi_desc->msi_attrib.entry_nr ) > + { > + printk(XENLOG_G_ERR > + "dom%d: trying to unmap secondary MSI pirq %d\n", > + d->domain_id, pirq); > + ret = -EBUSY; > + goto done; > + } > + nr = msi_desc->msi.nvec; > + } > > ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc); > if ( ret ) > @@ -2033,37 +2121,83 @@ int unmap_domain_pirq(struct domain *d, > > spin_lock_irqsave(&desc->lock, flags); > > - BUG_ON(irq != domain_pirq_to_irq(d, pirq)); > - > - if ( !forced_unbind ) > - clear_domain_irq_pirq(d, irq, info); > - else > + for ( i = 0; ; ) > { > - info->arch.irq = -irq; > - radix_tree_replace_slot( > - radix_tree_lookup_slot(&d->arch.irq_pirq, irq), > - radix_tree_int_to_ptr(-pirq)); > + BUG_ON(irq != domain_pirq_to_irq(d, pirq + i)); > + > + if ( !forced_unbind ) > + clear_domain_irq_pirq(d, irq, info); > + else > + { > + info->arch.irq = -irq; > + radix_tree_replace_slot( > + radix_tree_lookup_slot(&d->arch.irq_pirq, irq), > + radix_tree_int_to_ptr(-pirq)); > + } > + > + if ( msi_desc ) > + { > + desc->handler = &no_irq_type; > + desc->msi_desc = NULL; > + } > + > + if ( ++i == nr ) > + break; > + > + spin_unlock_irqrestore(&desc->lock, flags); > + > + if ( !forced_unbind ) > + cleanup_domain_irq_pirq(d, irq, info); > + > + rc = irq_deny_access(d, irq); > + if ( rc ) > + { > + printk(XENLOG_G_ERR > + "dom%d: could not deny access to IRQ%d (pirq %d)\n", > + d->domain_id, irq, pirq + i); > + ret = rc; > + } > + > + do { > + info = pirq_info(d, pirq + i); > + if ( info && (irq = info->arch.irq) > 0 ) > + break; > + printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n", > + d->domain_id, pirq + i); > + } while ( ++i < nr ); > + > + if ( i == nr ) > + { > + desc = NULL; > + break; > + } > + > + desc = irq_to_desc(irq); > + BUG_ON(desc->msi_desc != msi_desc + i); > + > + spin_lock_irqsave(&desc->lock, flags); > } > > - if ( msi_desc ) > + if ( desc ) > { > - desc->handler = &no_irq_type; > - desc->msi_desc = NULL; > + spin_unlock_irqrestore(&desc->lock, flags); > + > + if ( !forced_unbind ) > + cleanup_domain_irq_pirq(d, irq, info); > + > + rc = irq_deny_access(d, irq); > + if ( rc ) > + { > + printk(XENLOG_G_ERR > + "dom%d: could not deny access to IRQ%d (pirq %d)\n", > + d->domain_id, irq, pirq + nr - 1); > + ret = rc; > + } > } > > - spin_unlock_irqrestore(&desc->lock, flags); > if (msi_desc) > msi_free_irq(msi_desc); > > - if ( !forced_unbind ) > - cleanup_domain_irq_pirq(d, irq, info); > - > - ret = irq_deny_access(d, irq); > - if ( ret ) > - printk(XENLOG_G_ERR > - "dom%d: could not deny access to IRQ%d (pirq %d)\n", > - d->domain_id, irq, pirq); > - > done: > return ret; > } > --- a/xen/arch/x86/msi.c > +++ b/xen/arch/x86/msi.c > @@ -236,6 +236,11 @@ static int write_msi_msg(struct msi_desc > u8 bus = dev->bus; > u8 slot = PCI_SLOT(dev->devfn); > u8 func = PCI_FUNC(dev->devfn); > + int nr = entry->msi_attrib.entry_nr; > + > + ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr); > + if ( nr ) > + return 0; > > pci_conf_write32(seg, bus, slot, func, msi_lower_address_reg(pos), > msg->address_lo); > @@ -359,8 +364,8 @@ static void msi_set_mask_bit(struct irq_ > u8 func = PCI_FUNC(entry->dev->devfn); > > mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos); > - mask_bits &= ~(1); > - mask_bits |= flag; > + mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr); > + mask_bits |= (u32)flag << entry->msi_attrib.entry_nr; > pci_conf_write32(seg, bus, slot, func, entry->msi.mpos, mask_bits); > } > break; > @@ -384,10 +389,11 @@ static int msi_get_mask_bit(const struct > case PCI_CAP_ID_MSI: > if (!entry->dev || !entry->msi_attrib.maskbit) > break; > - return pci_conf_read32(entry->dev->seg, entry->dev->bus, > - PCI_SLOT(entry->dev->devfn), > - PCI_FUNC(entry->dev->devfn), > - entry->msi.mpos) & 1; > + return (pci_conf_read32(entry->dev->seg, entry->dev->bus, > + PCI_SLOT(entry->dev->devfn), > + PCI_FUNC(entry->dev->devfn), > + entry->msi.mpos) >> > + entry->msi_attrib.entry_nr) & 1; > case PCI_CAP_ID_MSIX: > return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1; > } > @@ -453,17 +459,20 @@ static hw_irq_controller pci_msi_nonmask > .set_affinity = set_msi_affinity > }; > > -static struct msi_desc* alloc_msi_entry(void) > +static struct msi_desc *alloc_msi_entry(unsigned int nr) > { > struct msi_desc *entry; > > - entry = xmalloc(struct msi_desc); > + entry = xmalloc_array(struct msi_desc, nr); > if ( !entry ) > return NULL; > > INIT_LIST_HEAD(&entry->list); > - entry->dev = NULL; > - entry->remap_index = -1; > + while ( nr-- ) > + { > + entry[nr].dev = NULL; > + entry[nr].remap_index = -1; > + } > > return entry; > } > @@ -488,17 +497,24 @@ int __setup_msi_irq(struct irq_desc *des > > int msi_free_irq(struct msi_desc *entry) > { > - destroy_irq(entry->irq); > + unsigned int nr = entry->msi.nvec; > + > if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) > { > unsigned long start; > start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1); > msix_put_fixmap(entry->dev, virt_to_fix(start)); > + nr = 1; > } > > - /* Free the unused IRTE if intr remap enabled */ > - if ( iommu_intremap ) > - iommu_update_ire_from_msi(entry, NULL); > + while ( nr-- ) > + { > + destroy_irq(entry[nr].irq); > + > + /* Free the unused IRTE if intr remap enabled */ > + if ( iommu_intremap ) > + iommu_update_ire_from_msi(entry + nr, NULL); > + } > > list_del(&entry->list); > xfree(entry); > @@ -531,11 +547,12 @@ static struct msi_desc *find_msi_entry(s > **/ > static int msi_capability_init(struct pci_dev *dev, > int irq, > - struct msi_desc **desc) > + struct msi_desc **desc, > + unsigned int nvec) > { > struct msi_desc *entry; > int pos; > - unsigned int maxvec, mpos; > + unsigned int i, maxvec, mpos; > u16 control, seg = dev->seg; > u8 bus = dev->bus; > u8 slot = PCI_SLOT(dev->devfn); > @@ -545,27 +562,34 @@ static int msi_capability_init(struct pc > pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI); > control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos)); > maxvec = multi_msi_capable(control); > + if ( nvec > maxvec ) > + return maxvec; > control &= ~PCI_MSI_FLAGS_QSIZE; > + multi_msi_enable(control, nvec); > > /* MSI Entry Initialization */ > msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */ > > - entry = alloc_msi_entry(); > + entry = alloc_msi_entry(nvec); > if ( !entry ) > return -ENOMEM; > > - entry->msi_attrib.type = PCI_CAP_ID_MSI; > - entry->msi_attrib.is_64 = is_64bit_address(control); > - entry->msi_attrib.entry_nr = 0; > - entry->msi_attrib.maskbit = is_mask_bit_support(control); > - entry->msi_attrib.masked = 1; > - entry->msi_attrib.pos = pos; > mpos = msi_mask_bits_reg(pos, is_64bit_address(control)); > - entry->msi.nvec = 1; > + for ( i = 0; i < nvec; ++i ) > + { > + entry[i].msi_attrib.type = PCI_CAP_ID_MSI; > + entry[i].msi_attrib.is_64 = is_64bit_address(control); > + entry[i].msi_attrib.entry_nr = i; > + entry[i].msi_attrib.maskbit = is_mask_bit_support(control); > + entry[i].msi_attrib.masked = 1; > + entry[i].msi_attrib.pos = pos; > + if ( entry[i].msi_attrib.maskbit ) > + entry[i].msi.mpos = mpos; > + entry[i].msi.nvec = 0; > + entry[i].dev = dev; > + } > + entry->msi.nvec = nvec; > entry->irq = irq; > - if ( is_mask_bit_support(control) ) > - entry->msi.mpos = mpos; > - entry->dev = dev; > if ( entry->msi_attrib.maskbit ) > { > u32 maskbits; > @@ -693,7 +717,7 @@ static int msix_capability_init(struct p > > if ( desc ) > { > - entry = alloc_msi_entry(); > + entry = alloc_msi_entry(1); > if ( !entry ) > return -ENOMEM; > ASSERT(msi); > @@ -851,7 +875,6 @@ static int msix_capability_init(struct p > > static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc) > { > - int status; > struct pci_dev *pdev; > struct msi_desc *old_desc; > > @@ -880,8 +903,7 @@ static int __pci_enable_msi(struct msi_i > pci_disable_msi(old_desc); > } > > - status = msi_capability_init(pdev, msi->irq, desc); > - return status; > + return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr); > } > > static void __pci_disable_msi(struct msi_desc *entry) > @@ -1101,6 +1123,8 @@ int pci_restore_msi_state(struct pci_dev > > list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list ) > { > + unsigned int i = 0, nr = 1; > + > irq = entry->irq; > desc = &irq_desc[irq]; > > @@ -1110,30 +1134,58 @@ int pci_restore_msi_state(struct pci_dev > > if (desc->msi_desc != entry) > { > + bogus: > dprintk(XENLOG_ERR, > - "Restore MSI for dev %04x:%02x:%02x:%x not set before?\n", > + "Restore MSI for %04x:%02x:%02x:%u entry %u not set?\n", > pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), > - PCI_FUNC(pdev->devfn)); > + PCI_FUNC(pdev->devfn), i); > spin_unlock_irqrestore(&desc->lock, flags); > return -EINVAL; > } > > if ( entry->msi_attrib.type == PCI_CAP_ID_MSI ) > + { > msi_set_enable(pdev, 0); > + nr = entry->msi.nvec; > + } > else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) > msix_set_enable(pdev, 0); > > msg = entry->msg; > write_msi_msg(entry, &msg); > > - msi_set_mask_bit(desc, entry->msi_attrib.masked); > + for ( i = 0; ; ) > + { > + msi_set_mask_bit(desc, entry[i].msi_attrib.masked); > + spin_unlock_irqrestore(&desc->lock, flags); > + > + if ( !--nr ) > + break; > + > + desc = &irq_desc[entry[++i].irq]; > + spin_lock_irqsave(&desc->lock, flags); > + if ( desc->msi_desc != entry + i ) > + goto bogus; > + } > + > + spin_unlock_irqrestore(&desc->lock, flags); > > if ( entry->msi_attrib.type == PCI_CAP_ID_MSI ) > + { > + unsigned int cpos = msi_control_reg(entry->msi_attrib.pos); > + u16 control = pci_conf_read16(pdev->seg, pdev->bus, > + PCI_SLOT(pdev->devfn), > + PCI_FUNC(pdev->devfn), cpos); > + > + control &= ~PCI_MSI_FLAGS_QSIZE; > + multi_msi_enable(control, entry->msi.nvec); > + pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), > + PCI_FUNC(pdev->devfn), cpos, control); > + > msi_set_enable(pdev, 1); > + } > else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) > msix_set_enable(pdev, 1); > - > - spin_unlock_irqrestore(&desc->lock, flags); > } > > return 0; > --- a/xen/arch/x86/physdev.c > +++ b/xen/arch/x86/physdev.c > @@ -140,8 +140,11 @@ int physdev_map_pirq(domid_t domid, int > break; > > case MAP_PIRQ_TYPE_MSI: > + if ( !msi->table_base ) > + msi->entry_nr = 1; > irq = *index; > if ( irq == -1 ) > + case MAP_PIRQ_TYPE_MULTI_MSI: > irq = create_irq(NUMA_NO_NODE); > > if ( irq < nr_irqs_gsi || irq >= nr_irqs ) > @@ -179,6 +182,30 @@ int physdev_map_pirq(domid_t domid, int > goto done; > } > } > + else if ( type == MAP_PIRQ_TYPE_MULTI_MSI ) > + { > + if ( msi->entry_nr <= 0 || msi->entry_nr > 32 ) > + ret = -EDOM; > + else if ( msi->entry_nr != 1 && !iommu_intremap ) > + ret = -EOPNOTSUPP; > + else > + { > + while ( msi->entry_nr & (msi->entry_nr - 1) ) > + msi->entry_nr += msi->entry_nr & -msi->entry_nr; > + pirq = get_free_pirqs(d, msi->entry_nr); > + if ( pirq < 0 ) > + { > + while ( (msi->entry_nr >>= 1) > 1 ) > + if ( get_free_pirqs(d, msi->entry_nr) > 0 ) > + break; > + dprintk(XENLOG_G_ERR, "dom%d: no block of %d free pirqs\n", > + d->domain_id, msi->entry_nr << 1); > + ret = pirq; > + } > + } > + if ( ret < 0 ) > + goto done; > + } > else > { > pirq = get_free_pirq(d, type); > @@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int > done: > spin_unlock(&d->event_lock); > spin_unlock(&pcidevs_lock); > - if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) ) > - destroy_irq(irq); > + if ( ret != 0 ) > + switch ( type ) > + { > + case MAP_PIRQ_TYPE_MSI: > + if ( *index == -1 ) > + case MAP_PIRQ_TYPE_MULTI_MSI: > + destroy_irq(irq); > + break; > + }Do we not need to create and destroy entry_nr irqs in this function, or is a multi-vector-msi now considered as just as single irq ? I ask because this appears to lack the "repeating certain operations for all involved IRQs" described in the comment. ~Andrew> free_domain: > rcu_unlock_domain(d); > return ret; > @@ -390,14 +424,22 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H > if ( copy_from_guest(&map, arg, 1) != 0 ) > break; > > - if ( map.type == MAP_PIRQ_TYPE_MSI_SEG ) > + switch ( map.type ) > { > + case MAP_PIRQ_TYPE_MSI_SEG: > map.type = MAP_PIRQ_TYPE_MSI; > msi.seg = map.bus >> 16; > - } > - else > - { > + break; > + > + case MAP_PIRQ_TYPE_MULTI_MSI: > + if ( map.table_base ) > + return -EINVAL; > + msi.seg = map.bus >> 16; > + break; > + > + default: > msi.seg = 0; > + break; > } > msi.bus = map.bus; > msi.devfn = map.devfn; > @@ -406,6 +448,8 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H > ret = physdev_map_pirq(map.domid, map.type, &map.index, &map.pirq, > &msi); > > + if ( map.type == MAP_PIRQ_TYPE_MULTI_MSI ) > + map.entry_nr = msi.entry_nr; > if ( __copy_to_guest(arg, &map, 1) ) > ret = -EFAULT; > break; > --- a/xen/include/asm-x86/irq.h > +++ b/xen/include/asm-x86/irq.h > @@ -141,6 +141,7 @@ int map_domain_pirq(struct domain *d, in > void *data); > int unmap_domain_pirq(struct domain *d, int pirq); > int get_free_pirq(struct domain *d, int type); > +int get_free_pirqs(struct domain *, unsigned int nr); > void free_domain_pirqs(struct domain *d); > int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq); > int unmap_domain_pirq_emuirq(struct domain *d, int pirq); > --- a/xen/include/asm-x86/msi.h > +++ b/xen/include/asm-x86/msi.h > @@ -148,7 +148,7 @@ int msi_free_irq(struct msi_desc *entry) > #define multi_msi_capable(control) \ > (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1)) > #define multi_msi_enable(control, num) \ > - control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE); > + control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE); > #define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) > #define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) > #define msi_enable(control, num) multi_msi_enable(control, num); \ > --- a/xen/include/public/physdev.h > +++ b/xen/include/public/physdev.h > @@ -151,21 +151,22 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); > #define MAP_PIRQ_TYPE_GSI 0x1 > #define MAP_PIRQ_TYPE_UNKNOWN 0x2 > #define MAP_PIRQ_TYPE_MSI_SEG 0x3 > +#define MAP_PIRQ_TYPE_MULTI_MSI 0x4 > > #define PHYSDEVOP_map_pirq 13 > struct physdev_map_pirq { > domid_t domid; > /* IN */ > int type; > - /* IN */ > + /* IN (ignored for ..._MULTI_MSI) */ > int index; > /* IN or OUT */ > int pirq; > - /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */ > + /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */ > int bus; > /* IN */ > int devfn; > - /* IN */ > + /* IN (also OUT for ..._MULTI_MSI) */ > int entry_nr; > /* IN */ > uint64_t table_base; > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
>>> On 16.07.13 at 13:36, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > On 16/07/13 11:14, Jan Beulich wrote: >> +int get_free_pirqs(struct domain *d, unsigned int nr) >> +{ >> + unsigned int i, found = 0; >> + >> + ASSERT(spin_is_locked(&d->event_lock)); >> + >> + for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i ) >> + if ( is_free_pirq(d, pirq_info(d, i)) ) >> + { >> + pirq_get_info(d, i); >> + if ( ++found == nr ) >> + return i; >> + } >> + else >> + found = 0; >> + >> + return -ENOSPC; >> +} >> + > > Is there any reason why this loop is backwards? Unless I am mistaken, > guests can choose their own pirqs when binding them, reducing the > likelyhood that the top of the available space will be free.This just follows the behavior of get_free_pirq(). I''m not up to having the two behave differently.>> @@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int >> done: >> spin_unlock(&d->event_lock); >> spin_unlock(&pcidevs_lock); >> - if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) ) >> - destroy_irq(irq); >> + if ( ret != 0 ) >> + switch ( type ) >> + { >> + case MAP_PIRQ_TYPE_MSI: >> + if ( *index == -1 ) >> + case MAP_PIRQ_TYPE_MULTI_MSI: >> + destroy_irq(irq); >> + break; >> + } > > Do we not need to create and destroy entry_nr irqs in this function, or > is a multi-vector-msi now considered as just as single irq ? > > I ask because this appears to lack the "repeating certain operations for > all involved IRQs" described in the comment.No, there''s a single create_irq() in the function, and having a single destroy_irq() here matches this. The remaining ones (both!) are in map_domain_pirq(). Also, as a general remark, asking for changes in a series that was posted 2.5 months ago (and deferred just because of the 4.3 release process) seems a little strange to me. I had to repost merely to collect ack-s, and didn''t really expect further requests for adjustments as there was ample time before to do so. Jan
Andrew Cooper
2013-Jul-17 09:02 UTC
Re: [PATCH resend 3/3] pciif: add multi-vector-MSI command
On 16/07/13 12:35, Jan Beulich wrote:>>>> On 16.07.13 at 13:19, Andrew Cooper <andrew.cooper3@citrix.com> wrote: >> On 16/07/13 11:15, Jan Beulich wrote: >>> The requested vector count is to be passed in struct xen_pci_op''s info >>> field. Upon failure, if a smaller vector count might work, the backend >>> will pass that smaller count in the value field (which so far is always >>> being set to zero in the error path). >>> >>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>> >>> --- a/xen/include/public/io/pciif.h >>> +++ b/xen/include/public/io/pciif.h >>> @@ -46,6 +46,7 @@ >>> #define XEN_PCI_OP_aer_resume (7) >>> #define XEN_PCI_OP_aer_mmio (8) >>> #define XEN_PCI_OP_aer_slotreset (9) >>> +#define XEN_PCI_OP_enable_multi_msi (10) >> /* Be sure to bump this number if you change this file */ >> #define XEN_PCI_MAGIC "7" >> >> Should you bump this version, or is the comment stale? The only in-tree >> consumer I can find is MiniOS''s pcifront, which writes it into xenstore. > Whether it''s stale I don''t know (likely it is considering that you > found just a single consumer), but bumping a revision just > because of the (backwards compatible) addition seems > superfluous to me. This would be different if I changed the > existing enable_msi... > > Jan >I suspected that was the answer, but just wanted to check that it hadn''t been overlooked, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Andrew Cooper
2013-Jul-17 09:50 UTC
Re: [PATCH resend 1/3] VT-d: enable for multi-vector MSI
On 16/07/13 12:32, Jan Beulich wrote:>>>> On 16.07.13 at 13:15, Andrew Cooper <andrew.cooper3@citrix.com> wrote: >> On 16/07/13 11:13, Jan Beulich wrote: >>> The main change being to make alloc_remap_entry() capable of allocating >>> a block of entries. >>> >>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>> >>> --- a/xen/drivers/passthrough/vtd/intremap.c >>> +++ b/xen/drivers/passthrough/vtd/intremap.c >>> @@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm >>> } >>> >>> /* >>> - * Look for a free intr remap entry. >>> + * Look for a free intr remap entry (or a contiguous set thereof). >>> * Need hold iremap_lock, and setup returned entry before releasing lock. >>> */ >>> -static int alloc_remap_entry(struct iommu *iommu) >>> +static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr) >> alloc_remap_entries() now that it unconditionally takes a count (and you >> already have to patch all callsites) > Actually I checked with Linux, and the use singular in the function > name too (albeit the name isn''t identical). > >>> @@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry( >>> struct iremap_entry *iremap_entry = NULL, *iremap_entries; >>> struct iremap_entry new_ire; >>> struct msi_msg_remap_entry *remap_rte; >>> - int index; >>> + unsigned int index, i, nr = 1; >> Does this hardcoding of nr=1 defeat the purpose of the following logic? > In what way? > >>> unsigned long flags; >>> struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); >>> >>> - remap_rte = (struct msi_msg_remap_entry *) msg; >>> + if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) >>> + nr = msi_desc->msi.nvec; > The logic here makes the vector count 1 for MSI-X and msi.nvec > for MSI. > > Jan >Ah yes - I see now. Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
On 16/07/13 12:48, Jan Beulich wrote:>>>> On 16.07.13 at 13:36, Andrew Cooper <andrew.cooper3@citrix.com> wrote: >> On 16/07/13 11:14, Jan Beulich wrote: >>> +int get_free_pirqs(struct domain *d, unsigned int nr) >>> +{ >>> + unsigned int i, found = 0; >>> + >>> + ASSERT(spin_is_locked(&d->event_lock)); >>> + >>> + for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i ) >>> + if ( is_free_pirq(d, pirq_info(d, i)) ) >>> + { >>> + pirq_get_info(d, i); >>> + if ( ++found == nr ) >>> + return i; >>> + } >>> + else >>> + found = 0; >>> + >>> + return -ENOSPC; >>> +} >>> + >> Is there any reason why this loop is backwards? Unless I am mistaken, >> guests can choose their own pirqs when binding them, reducing the >> likelyhood that the top of the available space will be free. > This just follows the behavior of get_free_pirq(). I''m not up to > having the two behave differently. > >>> @@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int >>> done: >>> spin_unlock(&d->event_lock); >>> spin_unlock(&pcidevs_lock); >>> - if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) ) >>> - destroy_irq(irq); >>> + if ( ret != 0 ) >>> + switch ( type ) >>> + { >>> + case MAP_PIRQ_TYPE_MSI: >>> + if ( *index == -1 ) >>> + case MAP_PIRQ_TYPE_MULTI_MSI: >>> + destroy_irq(irq); >>> + break; >>> + } >> Do we not need to create and destroy entry_nr irqs in this function, or >> is a multi-vector-msi now considered as just as single irq ? >> >> I ask because this appears to lack the "repeating certain operations for >> all involved IRQs" described in the comment. > No, there''s a single create_irq() in the function, and having a single > destroy_irq() here matches this. The remaining ones (both!) are in > map_domain_pirq().Ok. Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>> > Also, as a general remark, asking for changes in a series that was > posted 2.5 months ago (and deferred just because of the 4.3 > release process) seems a little strange to me. I had to repost > merely to collect ack-s, and didn''t really expect further requests > for adjustments as there was ample time before to do so. > > Jan >2.5 months ago, I was very busy with XSAs, hotfixes and a release of XenServer, so I appologies for not reviewing back then. ~Andrew
Zhang, Xiantao
2013-Jul-18 10:48 UTC
Re: [PATCH resend 1/3] VT-d: enable for multi-vector MSI
Thanks, Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>> -----Original Message----- > From: Andrew Cooper [mailto:andrew.cooper3@citrix.com] > Sent: Wednesday, July 17, 2013 5:50 PM > To: Jan Beulich > Cc: Zhang, Xiantao; xen-devel; Keir Fraser > Subject: Re: [Xen-devel] [PATCH resend 1/3] VT-d: enable for multi-vector > MSI > > On 16/07/13 12:32, Jan Beulich wrote: > >>>> On 16.07.13 at 13:15, Andrew Cooper <andrew.cooper3@citrix.com> > wrote: > >> On 16/07/13 11:13, Jan Beulich wrote: > >>> The main change being to make alloc_remap_entry() capable of > allocating > >>> a block of entries. > >>> > >>> Signed-off-by: Jan Beulich <jbeulich@suse.com> > >>> > >>> --- a/xen/drivers/passthrough/vtd/intremap.c > >>> +++ b/xen/drivers/passthrough/vtd/intremap.c > >>> @@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm > >>> } > >>> > >>> /* > >>> - * Look for a free intr remap entry. > >>> + * Look for a free intr remap entry (or a contiguous set thereof). > >>> * Need hold iremap_lock, and setup returned entry before releasing > lock. > >>> */ > >>> -static int alloc_remap_entry(struct iommu *iommu) > >>> +static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned > int nr) > >> alloc_remap_entries() now that it unconditionally takes a count (and you > >> already have to patch all callsites) > > Actually I checked with Linux, and the use singular in the function > > name too (albeit the name isn''t identical). > > > >>> @@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry( > >>> struct iremap_entry *iremap_entry = NULL, *iremap_entries; > >>> struct iremap_entry new_ire; > >>> struct msi_msg_remap_entry *remap_rte; > >>> - int index; > >>> + unsigned int index, i, nr = 1; > >> Does this hardcoding of nr=1 defeat the purpose of the following logic? > > In what way? > > > >>> unsigned long flags; > >>> struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); > >>> > >>> - remap_rte = (struct msi_msg_remap_entry *) msg; > >>> + if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) > >>> + nr = msi_desc->msi.nvec; > > The logic here makes the vector count 1 for MSI-X and msi.nvec > > for MSI. > > > > Jan > > > > Ah yes - I see now. > > Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Ping (public interface change)?>>> On 16.07.13 at 12:14, "Jan Beulich" <JBeulich@suse.com> wrote: > This implies > - extending the public interface to have a way to request a block of > MSIs > - allocating a block of contiguous pIRQ-s for the target domain (but > note that the Xen IRQs allocated have no need of being contiguous) > - repeating certain operations for all involved IRQs > - fixing multi_msi_enable() > - adjusting the mask bit accesses for maskable MSIs > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/arch/x86/irq.c > +++ b/xen/arch/x86/irq.c > @@ -1863,6 +1863,25 @@ int get_free_pirq(struct domain *d, int > return -ENOSPC; > } > > +int get_free_pirqs(struct domain *d, unsigned int nr) > +{ > + unsigned int i, found = 0; > + > + ASSERT(spin_is_locked(&d->event_lock)); > + > + for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i ) > + if ( is_free_pirq(d, pirq_info(d, i)) ) > + { > + pirq_get_info(d, i); > + if ( ++found == nr ) > + return i; > + } > + else > + found = 0; > + > + return -ENOSPC; > +} > + > int map_domain_pirq( > struct domain *d, int pirq, int irq, int type, void *data) > { > @@ -1918,11 +1937,12 @@ int map_domain_pirq( > > desc = irq_to_desc(irq); > > - if ( type == MAP_PIRQ_TYPE_MSI ) > + if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI ) > { > struct msi_info *msi = (struct msi_info *)data; > struct msi_desc *msi_desc; > struct pci_dev *pdev; > + unsigned int nr = 0; > > ASSERT(spin_is_locked(&pcidevs_lock)); > > @@ -1933,7 +1953,14 @@ int map_domain_pirq( > pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn); > ret = pci_enable_msi(msi, &msi_desc); > if ( ret ) > + { > + if ( ret > 0 ) > + { > + msi->entry_nr = ret; > + ret = -ENFILE; > + } > goto done; > + } > > spin_lock_irqsave(&desc->lock, flags); > > @@ -1947,25 +1974,73 @@ int map_domain_pirq( > goto done; > } > > - ret = setup_msi_irq(desc, msi_desc); > - if ( ret ) > + while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) ) > { > + if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV && > + !desc->arch.used_vectors ) > + { > + desc->arch.used_vectors = &pdev->arch.used_vectors; > + if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED ) > + { > + int vector = desc->arch.vector; > + > + ASSERT(!test_bit(vector, desc->arch.used_vectors)); > + set_bit(vector, desc->arch.used_vectors); > + } > + } > + if ( type == MAP_PIRQ_TYPE_MSI || > + msi_desc->msi_attrib.type != PCI_CAP_ID_MSI || > + ++nr == msi->entry_nr ) > + break; > + > + set_domain_irq_pirq(d, irq, info); > spin_unlock_irqrestore(&desc->lock, flags); > - pci_disable_msi(msi_desc); > - goto done; > + > + info = NULL; > + irq = create_irq(NUMA_NO_NODE); > + ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, > &info) > + : irq; > + if ( ret ) > + break; > + msi_desc[nr].irq = irq; > + > + if ( irq_permit_access(d, irq) != 0 ) > + printk(XENLOG_G_WARNING > + "dom%d: could not permit access to IRQ%d (pirq > %d)\n", > + d->domain_id, irq, pirq); > + > + desc = irq_to_desc(irq); > + spin_lock_irqsave(&desc->lock, flags); > + > + if ( desc->handler != &no_irq_type ) > + { > + dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use > (%s)\n", > + d->domain_id, irq, pirq + nr, desc->handler->typename); > + ret = -EBUSY; > + break; > + } > } > > - if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV > - && !desc->arch.used_vectors ) > + if ( ret ) > { > - desc->arch.used_vectors = &pdev->arch.used_vectors; > - if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED ) > + spin_unlock_irqrestore(&desc->lock, flags); > + while ( nr-- ) > { > - int vector = desc->arch.vector; > - ASSERT(!test_bit(vector, desc->arch.used_vectors)); > - > - set_bit(vector, desc->arch.used_vectors); > + if ( irq >= 0 ) > + { > + if ( irq_deny_access(d, irq) ) > + printk(XENLOG_G_ERR > + "dom%d: could not revoke access to IRQ%d > (pirq %d)\n", > + d->domain_id, irq, pirq); > + destroy_irq(irq); > + } > + if ( info ) > + cleanup_domain_irq_pirq(d, irq, info); > + info = pirq_info(d, pirq + nr); > + irq = info->arch.irq; > } > + pci_disable_msi(msi_desc); > + goto done; > } > > set_domain_irq_pirq(d, irq, info); > @@ -1996,7 +2071,8 @@ int unmap_domain_pirq(struct domain *d, > { > unsigned long flags; > struct irq_desc *desc; > - int irq, ret = 0; > + int irq, ret = 0, rc; > + unsigned int i, nr = 1; > bool_t forced_unbind; > struct pirq *info; > struct msi_desc *msi_desc = NULL; > @@ -2018,6 +2094,18 @@ int unmap_domain_pirq(struct domain *d, > > desc = irq_to_desc(irq); > msi_desc = desc->msi_desc; > + if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) > + { > + if ( msi_desc->msi_attrib.entry_nr ) > + { > + printk(XENLOG_G_ERR > + "dom%d: trying to unmap secondary MSI pirq %d\n", > + d->domain_id, pirq); > + ret = -EBUSY; > + goto done; > + } > + nr = msi_desc->msi.nvec; > + } > > ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc); > if ( ret ) > @@ -2033,37 +2121,83 @@ int unmap_domain_pirq(struct domain *d, > > spin_lock_irqsave(&desc->lock, flags); > > - BUG_ON(irq != domain_pirq_to_irq(d, pirq)); > - > - if ( !forced_unbind ) > - clear_domain_irq_pirq(d, irq, info); > - else > + for ( i = 0; ; ) > { > - info->arch.irq = -irq; > - radix_tree_replace_slot( > - radix_tree_lookup_slot(&d->arch.irq_pirq, irq), > - radix_tree_int_to_ptr(-pirq)); > + BUG_ON(irq != domain_pirq_to_irq(d, pirq + i)); > + > + if ( !forced_unbind ) > + clear_domain_irq_pirq(d, irq, info); > + else > + { > + info->arch.irq = -irq; > + radix_tree_replace_slot( > + radix_tree_lookup_slot(&d->arch.irq_pirq, irq), > + radix_tree_int_to_ptr(-pirq)); > + } > + > + if ( msi_desc ) > + { > + desc->handler = &no_irq_type; > + desc->msi_desc = NULL; > + } > + > + if ( ++i == nr ) > + break; > + > + spin_unlock_irqrestore(&desc->lock, flags); > + > + if ( !forced_unbind ) > + cleanup_domain_irq_pirq(d, irq, info); > + > + rc = irq_deny_access(d, irq); > + if ( rc ) > + { > + printk(XENLOG_G_ERR > + "dom%d: could not deny access to IRQ%d (pirq %d)\n", > + d->domain_id, irq, pirq + i); > + ret = rc; > + } > + > + do { > + info = pirq_info(d, pirq + i); > + if ( info && (irq = info->arch.irq) > 0 ) > + break; > + printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n", > + d->domain_id, pirq + i); > + } while ( ++i < nr ); > + > + if ( i == nr ) > + { > + desc = NULL; > + break; > + } > + > + desc = irq_to_desc(irq); > + BUG_ON(desc->msi_desc != msi_desc + i); > + > + spin_lock_irqsave(&desc->lock, flags); > } > > - if ( msi_desc ) > + if ( desc ) > { > - desc->handler = &no_irq_type; > - desc->msi_desc = NULL; > + spin_unlock_irqrestore(&desc->lock, flags); > + > + if ( !forced_unbind ) > + cleanup_domain_irq_pirq(d, irq, info); > + > + rc = irq_deny_access(d, irq); > + if ( rc ) > + { > + printk(XENLOG_G_ERR > + "dom%d: could not deny access to IRQ%d (pirq %d)\n", > + d->domain_id, irq, pirq + nr - 1); > + ret = rc; > + } > } > > - spin_unlock_irqrestore(&desc->lock, flags); > if (msi_desc) > msi_free_irq(msi_desc); > > - if ( !forced_unbind ) > - cleanup_domain_irq_pirq(d, irq, info); > - > - ret = irq_deny_access(d, irq); > - if ( ret ) > - printk(XENLOG_G_ERR > - "dom%d: could not deny access to IRQ%d (pirq %d)\n", > - d->domain_id, irq, pirq); > - > done: > return ret; > } > --- a/xen/arch/x86/msi.c > +++ b/xen/arch/x86/msi.c > @@ -236,6 +236,11 @@ static int write_msi_msg(struct msi_desc > u8 bus = dev->bus; > u8 slot = PCI_SLOT(dev->devfn); > u8 func = PCI_FUNC(dev->devfn); > + int nr = entry->msi_attrib.entry_nr; > + > + ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr); > + if ( nr ) > + return 0; > > pci_conf_write32(seg, bus, slot, func, msi_lower_address_reg(pos), > msg->address_lo); > @@ -359,8 +364,8 @@ static void msi_set_mask_bit(struct irq_ > u8 func = PCI_FUNC(entry->dev->devfn); > > mask_bits = pci_conf_read32(seg, bus, slot, func, > entry->msi.mpos); > - mask_bits &= ~(1); > - mask_bits |= flag; > + mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr); > + mask_bits |= (u32)flag << entry->msi_attrib.entry_nr; > pci_conf_write32(seg, bus, slot, func, entry->msi.mpos, > mask_bits); > } > break; > @@ -384,10 +389,11 @@ static int msi_get_mask_bit(const struct > case PCI_CAP_ID_MSI: > if (!entry->dev || !entry->msi_attrib.maskbit) > break; > - return pci_conf_read32(entry->dev->seg, entry->dev->bus, > - PCI_SLOT(entry->dev->devfn), > - PCI_FUNC(entry->dev->devfn), > - entry->msi.mpos) & 1; > + return (pci_conf_read32(entry->dev->seg, entry->dev->bus, > + PCI_SLOT(entry->dev->devfn), > + PCI_FUNC(entry->dev->devfn), > + entry->msi.mpos) >> > + entry->msi_attrib.entry_nr) & 1; > case PCI_CAP_ID_MSIX: > return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & > 1; > } > @@ -453,17 +459,20 @@ static hw_irq_controller pci_msi_nonmask > .set_affinity = set_msi_affinity > }; > > -static struct msi_desc* alloc_msi_entry(void) > +static struct msi_desc *alloc_msi_entry(unsigned int nr) > { > struct msi_desc *entry; > > - entry = xmalloc(struct msi_desc); > + entry = xmalloc_array(struct msi_desc, nr); > if ( !entry ) > return NULL; > > INIT_LIST_HEAD(&entry->list); > - entry->dev = NULL; > - entry->remap_index = -1; > + while ( nr-- ) > + { > + entry[nr].dev = NULL; > + entry[nr].remap_index = -1; > + } > > return entry; > } > @@ -488,17 +497,24 @@ int __setup_msi_irq(struct irq_desc *des > > int msi_free_irq(struct msi_desc *entry) > { > - destroy_irq(entry->irq); > + unsigned int nr = entry->msi.nvec; > + > if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) > { > unsigned long start; > start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1); > msix_put_fixmap(entry->dev, virt_to_fix(start)); > + nr = 1; > } > > - /* Free the unused IRTE if intr remap enabled */ > - if ( iommu_intremap ) > - iommu_update_ire_from_msi(entry, NULL); > + while ( nr-- ) > + { > + destroy_irq(entry[nr].irq); > + > + /* Free the unused IRTE if intr remap enabled */ > + if ( iommu_intremap ) > + iommu_update_ire_from_msi(entry + nr, NULL); > + } > > list_del(&entry->list); > xfree(entry); > @@ -531,11 +547,12 @@ static struct msi_desc *find_msi_entry(s > **/ > static int msi_capability_init(struct pci_dev *dev, > int irq, > - struct msi_desc **desc) > + struct msi_desc **desc, > + unsigned int nvec) > { > struct msi_desc *entry; > int pos; > - unsigned int maxvec, mpos; > + unsigned int i, maxvec, mpos; > u16 control, seg = dev->seg; > u8 bus = dev->bus; > u8 slot = PCI_SLOT(dev->devfn); > @@ -545,27 +562,34 @@ static int msi_capability_init(struct pc > pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI); > control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos)); > maxvec = multi_msi_capable(control); > + if ( nvec > maxvec ) > + return maxvec; > control &= ~PCI_MSI_FLAGS_QSIZE; > + multi_msi_enable(control, nvec); > > /* MSI Entry Initialization */ > msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */ > > - entry = alloc_msi_entry(); > + entry = alloc_msi_entry(nvec); > if ( !entry ) > return -ENOMEM; > > - entry->msi_attrib.type = PCI_CAP_ID_MSI; > - entry->msi_attrib.is_64 = is_64bit_address(control); > - entry->msi_attrib.entry_nr = 0; > - entry->msi_attrib.maskbit = is_mask_bit_support(control); > - entry->msi_attrib.masked = 1; > - entry->msi_attrib.pos = pos; > mpos = msi_mask_bits_reg(pos, is_64bit_address(control)); > - entry->msi.nvec = 1; > + for ( i = 0; i < nvec; ++i ) > + { > + entry[i].msi_attrib.type = PCI_CAP_ID_MSI; > + entry[i].msi_attrib.is_64 = is_64bit_address(control); > + entry[i].msi_attrib.entry_nr = i; > + entry[i].msi_attrib.maskbit = is_mask_bit_support(control); > + entry[i].msi_attrib.masked = 1; > + entry[i].msi_attrib.pos = pos; > + if ( entry[i].msi_attrib.maskbit ) > + entry[i].msi.mpos = mpos; > + entry[i].msi.nvec = 0; > + entry[i].dev = dev; > + } > + entry->msi.nvec = nvec; > entry->irq = irq; > - if ( is_mask_bit_support(control) ) > - entry->msi.mpos = mpos; > - entry->dev = dev; > if ( entry->msi_attrib.maskbit ) > { > u32 maskbits; > @@ -693,7 +717,7 @@ static int msix_capability_init(struct p > > if ( desc ) > { > - entry = alloc_msi_entry(); > + entry = alloc_msi_entry(1); > if ( !entry ) > return -ENOMEM; > ASSERT(msi); > @@ -851,7 +875,6 @@ static int msix_capability_init(struct p > > static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc) > { > - int status; > struct pci_dev *pdev; > struct msi_desc *old_desc; > > @@ -880,8 +903,7 @@ static int __pci_enable_msi(struct msi_i > pci_disable_msi(old_desc); > } > > - status = msi_capability_init(pdev, msi->irq, desc); > - return status; > + return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr); > } > > static void __pci_disable_msi(struct msi_desc *entry) > @@ -1101,6 +1123,8 @@ int pci_restore_msi_state(struct pci_dev > > list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list ) > { > + unsigned int i = 0, nr = 1; > + > irq = entry->irq; > desc = &irq_desc[irq]; > > @@ -1110,30 +1134,58 @@ int pci_restore_msi_state(struct pci_dev > > if (desc->msi_desc != entry) > { > + bogus: > dprintk(XENLOG_ERR, > - "Restore MSI for dev %04x:%02x:%02x:%x not set > before?\n", > + "Restore MSI for %04x:%02x:%02x:%u entry %u not > set?\n", > pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), > - PCI_FUNC(pdev->devfn)); > + PCI_FUNC(pdev->devfn), i); > spin_unlock_irqrestore(&desc->lock, flags); > return -EINVAL; > } > > if ( entry->msi_attrib.type == PCI_CAP_ID_MSI ) > + { > msi_set_enable(pdev, 0); > + nr = entry->msi.nvec; > + } > else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) > msix_set_enable(pdev, 0); > > msg = entry->msg; > write_msi_msg(entry, &msg); > > - msi_set_mask_bit(desc, entry->msi_attrib.masked); > + for ( i = 0; ; ) > + { > + msi_set_mask_bit(desc, entry[i].msi_attrib.masked); > + spin_unlock_irqrestore(&desc->lock, flags); > + > + if ( !--nr ) > + break; > + > + desc = &irq_desc[entry[++i].irq]; > + spin_lock_irqsave(&desc->lock, flags); > + if ( desc->msi_desc != entry + i ) > + goto bogus; > + } > + > + spin_unlock_irqrestore(&desc->lock, flags); > > if ( entry->msi_attrib.type == PCI_CAP_ID_MSI ) > + { > + unsigned int cpos = msi_control_reg(entry->msi_attrib.pos); > + u16 control = pci_conf_read16(pdev->seg, pdev->bus, > + PCI_SLOT(pdev->devfn), > + PCI_FUNC(pdev->devfn), cpos); > + > + control &= ~PCI_MSI_FLAGS_QSIZE; > + multi_msi_enable(control, entry->msi.nvec); > + pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), > + PCI_FUNC(pdev->devfn), cpos, control); > + > msi_set_enable(pdev, 1); > + } > else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) > msix_set_enable(pdev, 1); > - > - spin_unlock_irqrestore(&desc->lock, flags); > } > > return 0; > --- a/xen/arch/x86/physdev.c > +++ b/xen/arch/x86/physdev.c > @@ -140,8 +140,11 @@ int physdev_map_pirq(domid_t domid, int > break; > > case MAP_PIRQ_TYPE_MSI: > + if ( !msi->table_base ) > + msi->entry_nr = 1; > irq = *index; > if ( irq == -1 ) > + case MAP_PIRQ_TYPE_MULTI_MSI: > irq = create_irq(NUMA_NO_NODE); > > if ( irq < nr_irqs_gsi || irq >= nr_irqs ) > @@ -179,6 +182,30 @@ int physdev_map_pirq(domid_t domid, int > goto done; > } > } > + else if ( type == MAP_PIRQ_TYPE_MULTI_MSI ) > + { > + if ( msi->entry_nr <= 0 || msi->entry_nr > 32 ) > + ret = -EDOM; > + else if ( msi->entry_nr != 1 && !iommu_intremap ) > + ret = -EOPNOTSUPP; > + else > + { > + while ( msi->entry_nr & (msi->entry_nr - 1) ) > + msi->entry_nr += msi->entry_nr & -msi->entry_nr; > + pirq = get_free_pirqs(d, msi->entry_nr); > + if ( pirq < 0 ) > + { > + while ( (msi->entry_nr >>= 1) > 1 ) > + if ( get_free_pirqs(d, msi->entry_nr) > 0 ) > + break; > + dprintk(XENLOG_G_ERR, "dom%d: no block of %d free > pirqs\n", > + d->domain_id, msi->entry_nr << 1); > + ret = pirq; > + } > + } > + if ( ret < 0 ) > + goto done; > + } > else > { > pirq = get_free_pirq(d, type); > @@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int > done: > spin_unlock(&d->event_lock); > spin_unlock(&pcidevs_lock); > - if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) ) > - destroy_irq(irq); > + if ( ret != 0 ) > + switch ( type ) > + { > + case MAP_PIRQ_TYPE_MSI: > + if ( *index == -1 ) > + case MAP_PIRQ_TYPE_MULTI_MSI: > + destroy_irq(irq); > + break; > + } > free_domain: > rcu_unlock_domain(d); > return ret; > @@ -390,14 +424,22 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H > if ( copy_from_guest(&map, arg, 1) != 0 ) > break; > > - if ( map.type == MAP_PIRQ_TYPE_MSI_SEG ) > + switch ( map.type ) > { > + case MAP_PIRQ_TYPE_MSI_SEG: > map.type = MAP_PIRQ_TYPE_MSI; > msi.seg = map.bus >> 16; > - } > - else > - { > + break; > + > + case MAP_PIRQ_TYPE_MULTI_MSI: > + if ( map.table_base ) > + return -EINVAL; > + msi.seg = map.bus >> 16; > + break; > + > + default: > msi.seg = 0; > + break; > } > msi.bus = map.bus; > msi.devfn = map.devfn; > @@ -406,6 +448,8 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H > ret = physdev_map_pirq(map.domid, map.type, &map.index, &map.pirq, > &msi); > > + if ( map.type == MAP_PIRQ_TYPE_MULTI_MSI ) > + map.entry_nr = msi.entry_nr; > if ( __copy_to_guest(arg, &map, 1) ) > ret = -EFAULT; > break; > --- a/xen/include/asm-x86/irq.h > +++ b/xen/include/asm-x86/irq.h > @@ -141,6 +141,7 @@ int map_domain_pirq(struct domain *d, in > void *data); > int unmap_domain_pirq(struct domain *d, int pirq); > int get_free_pirq(struct domain *d, int type); > +int get_free_pirqs(struct domain *, unsigned int nr); > void free_domain_pirqs(struct domain *d); > int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq); > int unmap_domain_pirq_emuirq(struct domain *d, int pirq); > --- a/xen/include/asm-x86/msi.h > +++ b/xen/include/asm-x86/msi.h > @@ -148,7 +148,7 @@ int msi_free_irq(struct msi_desc *entry) > #define multi_msi_capable(control) \ > (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1)) > #define multi_msi_enable(control, num) \ > - control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE); > + control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE); > #define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) > #define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) > #define msi_enable(control, num) multi_msi_enable(control, num); \ > --- a/xen/include/public/physdev.h > +++ b/xen/include/public/physdev.h > @@ -151,21 +151,22 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); > #define MAP_PIRQ_TYPE_GSI 0x1 > #define MAP_PIRQ_TYPE_UNKNOWN 0x2 > #define MAP_PIRQ_TYPE_MSI_SEG 0x3 > +#define MAP_PIRQ_TYPE_MULTI_MSI 0x4 > > #define PHYSDEVOP_map_pirq 13 > struct physdev_map_pirq { > domid_t domid; > /* IN */ > int type; > - /* IN */ > + /* IN (ignored for ..._MULTI_MSI) */ > int index; > /* IN or OUT */ > int pirq; > - /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */ > + /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */ > int bus; > /* IN */ > int devfn; > - /* IN */ > + /* IN (also OUT for ..._MULTI_MSI) */ > int entry_nr; > /* IN */ > uint64_t table_base;
Jan Beulich
2013-Aug-05 13:12 UTC
Ping: [PATCH resend 3/3] pciif: add multi-vector-MSI command
Ping (I/O interface change, however trivial it might be)?>>> On 16.07.13 at 12:15, "Jan Beulich" <JBeulich@suse.com> wrote: > The requested vector count is to be passed in struct xen_pci_op''s info > field. Upon failure, if a smaller vector count might work, the backend > will pass that smaller count in the value field (which so far is always > being set to zero in the error path). > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/include/public/io/pciif.h > +++ b/xen/include/public/io/pciif.h > @@ -46,6 +46,7 @@ > #define XEN_PCI_OP_aer_resume (7) > #define XEN_PCI_OP_aer_mmio (8) > #define XEN_PCI_OP_aer_slotreset (9) > +#define XEN_PCI_OP_enable_multi_msi (10) > > /* xen_pci_op error numbers */ > #define XEN_PCI_ERR_success (0)
On 16/07/2013 11:14, "Jan Beulich" <JBeulich@suse.com> wrote:> This implies > - extending the public interface to have a way to request a block of > MSIs > - allocating a block of contiguous pIRQ-s for the target domain (but > note that the Xen IRQs allocated have no need of being contiguous) > - repeating certain operations for all involved IRQs > - fixing multi_msi_enable() > - adjusting the mask bit accesses for maskable MSIs > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Acked-by: Keir Fraser <keir@xen.org>
Keir Fraser
2013-Aug-08 09:02 UTC
Re: [PATCH resend 3/3] pciif: add multi-vector-MSI command
On 16/07/2013 11:15, "Jan Beulich" <JBeulich@suse.com> wrote:> The requested vector count is to be passed in struct xen_pci_op''s info > field. Upon failure, if a smaller vector count might work, the backend > will pass that smaller count in the value field (which so far is always > being set to zero in the error path). > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Acked-by: Keir Fraser <keir@xen.org>