These are the patches from an earlier submitted series that haven''t got ack-ed by anyone so far. 1: VT-d: enable for multi-vector MSI 2: x86: enable multi-vector MSI 3: pciif: add multi-vector-MSI command Signed-off-by: Jan Beulich <jbeulich@suse.com>
The main change being to make alloc_remap_entry() capable of allocating
a block of entries.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm
}
/*
- * Look for a free intr remap entry.
+ * Look for a free intr remap entry (or a contiguous set thereof).
* Need hold iremap_lock, and setup returned entry before releasing lock.
*/
-static int alloc_remap_entry(struct iommu *iommu)
+static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr)
{
struct iremap_entry *iremap_entries = NULL;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
- int i;
+ unsigned int i, found;
ASSERT( spin_is_locked(&ir_ctrl->iremap_lock) );
- for ( i = 0; i < IREMAP_ENTRY_NR; i++ )
+ for ( found = i = 0; i < IREMAP_ENTRY_NR; i++ )
{
struct iremap_entry *p;
if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
@@ -220,7 +220,9 @@ static int alloc_remap_entry(struct iomm
else
p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
- if ( p->lo_val == 0 && p->hi_val == 0 ) /* a free entry
*/
+ if ( p->lo_val || p->hi_val ) /* not a free entry */
+ found = 0;
+ else if ( ++found == nr )
break;
}
@@ -228,7 +230,7 @@ static int alloc_remap_entry(struct iomm
unmap_vtd_domain_page(iremap_entries);
if ( i < IREMAP_ENTRY_NR )
- ir_ctrl->iremap_num++;
+ ir_ctrl->iremap_num += nr;
return i;
}
@@ -293,7 +295,7 @@ static int ioapic_rte_to_remap_entry(str
index = apic_pin_2_ir_idx[apic][ioapic_pin];
if ( index < 0 )
{
- index = alloc_remap_entry(iommu);
+ index = alloc_remap_entry(iommu, 1);
if ( index < IREMAP_ENTRY_NR )
apic_pin_2_ir_idx[apic][ioapic_pin] = index;
}
@@ -485,19 +487,18 @@ static void set_msi_source_id(struct pci
}
static int remap_entry_to_msi_msg(
- struct iommu *iommu, struct msi_msg *msg)
+ struct iommu *iommu, struct msi_msg *msg, unsigned int index)
{
struct iremap_entry *iremap_entry = NULL, *iremap_entries;
struct msi_msg_remap_entry *remap_rte;
- int index;
unsigned long flags;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
remap_rte = (struct msi_msg_remap_entry *) msg;
- index = (remap_rte->address_lo.index_15 << 15) |
+ index += (remap_rte->address_lo.index_15 << 15) |
remap_rte->address_lo.index_0_14;
- if ( index < 0 || index > IREMAP_ENTRY_NR - 1 )
+ if ( index >= IREMAP_ENTRY_NR )
{
dprintk(XENLOG_ERR VTDPREFIX,
"%s: index (%d) for remap table is invalid !\n",
@@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry(
struct iremap_entry *iremap_entry = NULL, *iremap_entries;
struct iremap_entry new_ire;
struct msi_msg_remap_entry *remap_rte;
- int index;
+ unsigned int index, i, nr = 1;
unsigned long flags;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
- remap_rte = (struct msi_msg_remap_entry *) msg;
+ if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+ nr = msi_desc->msi.nvec;
+
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
if ( msg == NULL )
{
- /* Free specified unused IRTE */
- free_remap_entry(iommu, msi_desc->remap_index);
+ /* Free specified unused IRTEs */
+ for ( i = 0; i < nr; ++i )
+ free_remap_entry(iommu, msi_desc->remap_index + i);
spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
return 0;
}
if ( msi_desc->remap_index < 0 )
{
- /*
- * TODO: Multiple-vector MSI requires allocating multiple continuous
- * entries and configuring addr/data of msi_msg in different way. So
- * alloca_remap_entry will be changed if enabling multiple-vector MSI
- * in future.
- */
- index = alloc_remap_entry(iommu);
- msi_desc->remap_index = index;
+ index = alloc_remap_entry(iommu, nr);
+ for ( i = 0; i < nr; ++i )
+ msi_desc[i].remap_index = index + i;
}
else
index = msi_desc->remap_index;
@@ -590,7 +589,8 @@ static int msi_msg_to_remap_entry(
"%s: intremap index (%d) is larger than"
" the maximum index (%d)!\n",
__func__, index, IREMAP_ENTRY_NR - 1);
- msi_desc->remap_index = -1;
+ for ( i = 0; i < nr; ++i )
+ msi_desc[i].remap_index = -1;
spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
return -EFAULT;
}
@@ -626,14 +626,18 @@ static int msi_msg_to_remap_entry(
new_ire.lo.p = 1; /* finally, set present bit */
/* now construct new MSI/MSI-X rte entry */
+ remap_rte = (struct msi_msg_remap_entry *)msg;
remap_rte->address_lo.dontcare = 0;
- remap_rte->address_lo.index_15 = (index >> 15) & 0x1;
- remap_rte->address_lo.index_0_14 = index & 0x7fff;
+ i = index;
+ if ( !nr )
+ i -= msi_desc->msi_attrib.entry_nr;
+ remap_rte->address_lo.index_15 = (i >> 15) & 0x1;
+ remap_rte->address_lo.index_0_14 = i & 0x7fff;
remap_rte->address_lo.SHV = 1;
remap_rte->address_lo.format = 1;
remap_rte->address_hi = 0;
- remap_rte->data = 0;
+ remap_rte->data = index - i;
memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry));
iommu_flush_cache_entry(iremap_entry, sizeof(struct iremap_entry));
@@ -654,7 +658,9 @@ void msi_msg_read_remap_rte(
drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
: hpet_to_drhd(msi_desc->hpet_id);
if ( drhd )
- remap_entry_to_msi_msg(drhd->iommu, msg);
+ remap_entry_to_msi_msg(drhd->iommu, msg,
+ msi_desc->msi_attrib.type == PCI_CAP_ID_MSI
+ ? msi_desc->msi_attrib.entry_nr : 0);
}
int msi_msg_write_remap_rte(
@@ -680,7 +686,7 @@ int __init intel_setup_hpet_msi(struct m
return 0;
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
- msi_desc->remap_index = alloc_remap_entry(iommu);
+ msi_desc->remap_index = alloc_remap_entry(iommu, 1);
if ( msi_desc->remap_index >= IREMAP_ENTRY_NR )
{
dprintk(XENLOG_ERR VTDPREFIX,
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
This implies
- extending the public interface to have a way to request a block of
MSIs
- allocating a block of contiguous pIRQ-s for the target domain (but
note that the Xen IRQs allocated have no need of being contiguous)
- repeating certain operations for all involved IRQs
- fixing multi_msi_enable()
- adjusting the mask bit accesses for maskable MSIs
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1863,6 +1863,25 @@ int get_free_pirq(struct domain *d, int
return -ENOSPC;
}
+int get_free_pirqs(struct domain *d, unsigned int nr)
+{
+ unsigned int i, found = 0;
+
+ ASSERT(spin_is_locked(&d->event_lock));
+
+ for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i )
+ if ( is_free_pirq(d, pirq_info(d, i)) )
+ {
+ pirq_get_info(d, i);
+ if ( ++found == nr )
+ return i;
+ }
+ else
+ found = 0;
+
+ return -ENOSPC;
+}
+
int map_domain_pirq(
struct domain *d, int pirq, int irq, int type, void *data)
{
@@ -1918,11 +1937,12 @@ int map_domain_pirq(
desc = irq_to_desc(irq);
- if ( type == MAP_PIRQ_TYPE_MSI )
+ if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI )
{
struct msi_info *msi = (struct msi_info *)data;
struct msi_desc *msi_desc;
struct pci_dev *pdev;
+ unsigned int nr = 0;
ASSERT(spin_is_locked(&pcidevs_lock));
@@ -1933,7 +1953,14 @@ int map_domain_pirq(
pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
ret = pci_enable_msi(msi, &msi_desc);
if ( ret )
+ {
+ if ( ret > 0 )
+ {
+ msi->entry_nr = ret;
+ ret = -ENFILE;
+ }
goto done;
+ }
spin_lock_irqsave(&desc->lock, flags);
@@ -1947,25 +1974,73 @@ int map_domain_pirq(
goto done;
}
- ret = setup_msi_irq(desc, msi_desc);
- if ( ret )
+ while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) )
{
+ if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV &&
+ !desc->arch.used_vectors )
+ {
+ desc->arch.used_vectors = &pdev->arch.used_vectors;
+ if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
+ {
+ int vector = desc->arch.vector;
+
+ ASSERT(!test_bit(vector, desc->arch.used_vectors));
+ set_bit(vector, desc->arch.used_vectors);
+ }
+ }
+ if ( type == MAP_PIRQ_TYPE_MSI ||
+ msi_desc->msi_attrib.type != PCI_CAP_ID_MSI ||
+ ++nr == msi->entry_nr )
+ break;
+
+ set_domain_irq_pirq(d, irq, info);
spin_unlock_irqrestore(&desc->lock, flags);
- pci_disable_msi(msi_desc);
- goto done;
+
+ info = NULL;
+ irq = create_irq(NUMA_NO_NODE);
+ ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr,
&info)
+ : irq;
+ if ( ret )
+ break;
+ msi_desc[nr].irq = irq;
+
+ if ( irq_permit_access(d, irq) != 0 )
+ printk(XENLOG_G_WARNING
+ "dom%d: could not permit access to IRQ%d (pirq
%d)\n",
+ d->domain_id, irq, pirq);
+
+ desc = irq_to_desc(irq);
+ spin_lock_irqsave(&desc->lock, flags);
+
+ if ( desc->handler != &no_irq_type )
+ {
+ dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use
(%s)\n",
+ d->domain_id, irq, pirq + nr,
desc->handler->typename);
+ ret = -EBUSY;
+ break;
+ }
}
- if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV
- && !desc->arch.used_vectors )
+ if ( ret )
{
- desc->arch.used_vectors = &pdev->arch.used_vectors;
- if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
+ spin_unlock_irqrestore(&desc->lock, flags);
+ while ( nr-- )
{
- int vector = desc->arch.vector;
- ASSERT(!test_bit(vector, desc->arch.used_vectors));
-
- set_bit(vector, desc->arch.used_vectors);
+ if ( irq >= 0 )
+ {
+ if ( irq_deny_access(d, irq) )
+ printk(XENLOG_G_ERR
+ "dom%d: could not revoke access to IRQ%d
(pirq %d)\n",
+ d->domain_id, irq, pirq);
+ destroy_irq(irq);
+ }
+ if ( info )
+ cleanup_domain_irq_pirq(d, irq, info);
+ info = pirq_info(d, pirq + nr);
+ irq = info->arch.irq;
}
+ pci_disable_msi(msi_desc);
+ goto done;
}
set_domain_irq_pirq(d, irq, info);
@@ -1996,7 +2071,8 @@ int unmap_domain_pirq(struct domain *d,
{
unsigned long flags;
struct irq_desc *desc;
- int irq, ret = 0;
+ int irq, ret = 0, rc;
+ unsigned int i, nr = 1;
bool_t forced_unbind;
struct pirq *info;
struct msi_desc *msi_desc = NULL;
@@ -2018,6 +2094,18 @@ int unmap_domain_pirq(struct domain *d,
desc = irq_to_desc(irq);
msi_desc = desc->msi_desc;
+ if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+ {
+ if ( msi_desc->msi_attrib.entry_nr )
+ {
+ printk(XENLOG_G_ERR
+ "dom%d: trying to unmap secondary MSI pirq %d\n",
+ d->domain_id, pirq);
+ ret = -EBUSY;
+ goto done;
+ }
+ nr = msi_desc->msi.nvec;
+ }
ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc);
if ( ret )
@@ -2033,37 +2121,83 @@ int unmap_domain_pirq(struct domain *d,
spin_lock_irqsave(&desc->lock, flags);
- BUG_ON(irq != domain_pirq_to_irq(d, pirq));
-
- if ( !forced_unbind )
- clear_domain_irq_pirq(d, irq, info);
- else
+ for ( i = 0; ; )
{
- info->arch.irq = -irq;
- radix_tree_replace_slot(
- radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
- radix_tree_int_to_ptr(-pirq));
+ BUG_ON(irq != domain_pirq_to_irq(d, pirq + i));
+
+ if ( !forced_unbind )
+ clear_domain_irq_pirq(d, irq, info);
+ else
+ {
+ info->arch.irq = -irq;
+ radix_tree_replace_slot(
+ radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
+ radix_tree_int_to_ptr(-pirq));
+ }
+
+ if ( msi_desc )
+ {
+ desc->handler = &no_irq_type;
+ desc->msi_desc = NULL;
+ }
+
+ if ( ++i == nr )
+ break;
+
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ if ( !forced_unbind )
+ cleanup_domain_irq_pirq(d, irq, info);
+
+ rc = irq_deny_access(d, irq);
+ if ( rc )
+ {
+ printk(XENLOG_G_ERR
+ "dom%d: could not deny access to IRQ%d (pirq
%d)\n",
+ d->domain_id, irq, pirq + i);
+ ret = rc;
+ }
+
+ do {
+ info = pirq_info(d, pirq + i);
+ if ( info && (irq = info->arch.irq) > 0 )
+ break;
+ printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n",
+ d->domain_id, pirq + i);
+ } while ( ++i < nr );
+
+ if ( i == nr )
+ {
+ desc = NULL;
+ break;
+ }
+
+ desc = irq_to_desc(irq);
+ BUG_ON(desc->msi_desc != msi_desc + i);
+
+ spin_lock_irqsave(&desc->lock, flags);
}
- if ( msi_desc )
+ if ( desc )
{
- desc->handler = &no_irq_type;
- desc->msi_desc = NULL;
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ if ( !forced_unbind )
+ cleanup_domain_irq_pirq(d, irq, info);
+
+ rc = irq_deny_access(d, irq);
+ if ( rc )
+ {
+ printk(XENLOG_G_ERR
+ "dom%d: could not deny access to IRQ%d (pirq
%d)\n",
+ d->domain_id, irq, pirq + nr - 1);
+ ret = rc;
+ }
}
- spin_unlock_irqrestore(&desc->lock, flags);
if (msi_desc)
msi_free_irq(msi_desc);
- if ( !forced_unbind )
- cleanup_domain_irq_pirq(d, irq, info);
-
- ret = irq_deny_access(d, irq);
- if ( ret )
- printk(XENLOG_G_ERR
- "dom%d: could not deny access to IRQ%d (pirq %d)\n",
- d->domain_id, irq, pirq);
-
done:
return ret;
}
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -236,6 +236,11 @@ static int write_msi_msg(struct msi_desc
u8 bus = dev->bus;
u8 slot = PCI_SLOT(dev->devfn);
u8 func = PCI_FUNC(dev->devfn);
+ int nr = entry->msi_attrib.entry_nr;
+
+ ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr);
+ if ( nr )
+ return 0;
pci_conf_write32(seg, bus, slot, func, msi_lower_address_reg(pos),
msg->address_lo);
@@ -359,8 +364,8 @@ static void msi_set_mask_bit(struct irq_
u8 func = PCI_FUNC(entry->dev->devfn);
mask_bits = pci_conf_read32(seg, bus, slot, func,
entry->msi.mpos);
- mask_bits &= ~(1);
- mask_bits |= flag;
+ mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
+ mask_bits |= (u32)flag << entry->msi_attrib.entry_nr;
pci_conf_write32(seg, bus, slot, func, entry->msi.mpos,
mask_bits);
}
break;
@@ -384,10 +389,11 @@ static int msi_get_mask_bit(const struct
case PCI_CAP_ID_MSI:
if (!entry->dev || !entry->msi_attrib.maskbit)
break;
- return pci_conf_read32(entry->dev->seg, entry->dev->bus,
- PCI_SLOT(entry->dev->devfn),
- PCI_FUNC(entry->dev->devfn),
- entry->msi.mpos) & 1;
+ return (pci_conf_read32(entry->dev->seg, entry->dev->bus,
+ PCI_SLOT(entry->dev->devfn),
+ PCI_FUNC(entry->dev->devfn),
+ entry->msi.mpos) >>
+ entry->msi_attrib.entry_nr) & 1;
case PCI_CAP_ID_MSIX:
return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET)
& 1;
}
@@ -453,17 +459,20 @@ static hw_irq_controller pci_msi_nonmask
.set_affinity = set_msi_affinity
};
-static struct msi_desc* alloc_msi_entry(void)
+static struct msi_desc *alloc_msi_entry(unsigned int nr)
{
struct msi_desc *entry;
- entry = xmalloc(struct msi_desc);
+ entry = xmalloc_array(struct msi_desc, nr);
if ( !entry )
return NULL;
INIT_LIST_HEAD(&entry->list);
- entry->dev = NULL;
- entry->remap_index = -1;
+ while ( nr-- )
+ {
+ entry[nr].dev = NULL;
+ entry[nr].remap_index = -1;
+ }
return entry;
}
@@ -488,17 +497,24 @@ int __setup_msi_irq(struct irq_desc *des
int msi_free_irq(struct msi_desc *entry)
{
- destroy_irq(entry->irq);
+ unsigned int nr = entry->msi.nvec;
+
if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
{
unsigned long start;
start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1);
msix_put_fixmap(entry->dev, virt_to_fix(start));
+ nr = 1;
}
- /* Free the unused IRTE if intr remap enabled */
- if ( iommu_intremap )
- iommu_update_ire_from_msi(entry, NULL);
+ while ( nr-- )
+ {
+ destroy_irq(entry[nr].irq);
+
+ /* Free the unused IRTE if intr remap enabled */
+ if ( iommu_intremap )
+ iommu_update_ire_from_msi(entry + nr, NULL);
+ }
list_del(&entry->list);
xfree(entry);
@@ -531,11 +547,12 @@ static struct msi_desc *find_msi_entry(s
**/
static int msi_capability_init(struct pci_dev *dev,
int irq,
- struct msi_desc **desc)
+ struct msi_desc **desc,
+ unsigned int nvec)
{
struct msi_desc *entry;
int pos;
- unsigned int maxvec, mpos;
+ unsigned int i, maxvec, mpos;
u16 control, seg = dev->seg;
u8 bus = dev->bus;
u8 slot = PCI_SLOT(dev->devfn);
@@ -545,27 +562,34 @@ static int msi_capability_init(struct pc
pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
maxvec = multi_msi_capable(control);
+ if ( nvec > maxvec )
+ return maxvec;
control &= ~PCI_MSI_FLAGS_QSIZE;
+ multi_msi_enable(control, nvec);
/* MSI Entry Initialization */
msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
- entry = alloc_msi_entry();
+ entry = alloc_msi_entry(nvec);
if ( !entry )
return -ENOMEM;
- entry->msi_attrib.type = PCI_CAP_ID_MSI;
- entry->msi_attrib.is_64 = is_64bit_address(control);
- entry->msi_attrib.entry_nr = 0;
- entry->msi_attrib.maskbit = is_mask_bit_support(control);
- entry->msi_attrib.masked = 1;
- entry->msi_attrib.pos = pos;
mpos = msi_mask_bits_reg(pos, is_64bit_address(control));
- entry->msi.nvec = 1;
+ for ( i = 0; i < nvec; ++i )
+ {
+ entry[i].msi_attrib.type = PCI_CAP_ID_MSI;
+ entry[i].msi_attrib.is_64 = is_64bit_address(control);
+ entry[i].msi_attrib.entry_nr = i;
+ entry[i].msi_attrib.maskbit = is_mask_bit_support(control);
+ entry[i].msi_attrib.masked = 1;
+ entry[i].msi_attrib.pos = pos;
+ if ( entry[i].msi_attrib.maskbit )
+ entry[i].msi.mpos = mpos;
+ entry[i].msi.nvec = 0;
+ entry[i].dev = dev;
+ }
+ entry->msi.nvec = nvec;
entry->irq = irq;
- if ( is_mask_bit_support(control) )
- entry->msi.mpos = mpos;
- entry->dev = dev;
if ( entry->msi_attrib.maskbit )
{
u32 maskbits;
@@ -693,7 +717,7 @@ static int msix_capability_init(struct p
if ( desc )
{
- entry = alloc_msi_entry();
+ entry = alloc_msi_entry(1);
if ( !entry )
return -ENOMEM;
ASSERT(msi);
@@ -851,7 +875,6 @@ static int msix_capability_init(struct p
static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
{
- int status;
struct pci_dev *pdev;
struct msi_desc *old_desc;
@@ -880,8 +903,7 @@ static int __pci_enable_msi(struct msi_i
pci_disable_msi(old_desc);
}
- status = msi_capability_init(pdev, msi->irq, desc);
- return status;
+ return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr);
}
static void __pci_disable_msi(struct msi_desc *entry)
@@ -1101,6 +1123,8 @@ int pci_restore_msi_state(struct pci_dev
list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
{
+ unsigned int i = 0, nr = 1;
+
irq = entry->irq;
desc = &irq_desc[irq];
@@ -1110,30 +1134,58 @@ int pci_restore_msi_state(struct pci_dev
if (desc->msi_desc != entry)
{
+ bogus:
dprintk(XENLOG_ERR,
- "Restore MSI for dev %04x:%02x:%02x:%x not set
before?\n",
+ "Restore MSI for %04x:%02x:%02x:%u entry %u not
set?\n",
pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn));
+ PCI_FUNC(pdev->devfn), i);
spin_unlock_irqrestore(&desc->lock, flags);
return -EINVAL;
}
if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+ {
msi_set_enable(pdev, 0);
+ nr = entry->msi.nvec;
+ }
else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
msix_set_enable(pdev, 0);
msg = entry->msg;
write_msi_msg(entry, &msg);
- msi_set_mask_bit(desc, entry->msi_attrib.masked);
+ for ( i = 0; ; )
+ {
+ msi_set_mask_bit(desc, entry[i].msi_attrib.masked);
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ if ( !--nr )
+ break;
+
+ desc = &irq_desc[entry[++i].irq];
+ spin_lock_irqsave(&desc->lock, flags);
+ if ( desc->msi_desc != entry + i )
+ goto bogus;
+ }
+
+ spin_unlock_irqrestore(&desc->lock, flags);
if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+ {
+ unsigned int cpos = msi_control_reg(entry->msi_attrib.pos);
+ u16 control = pci_conf_read16(pdev->seg, pdev->bus,
+ PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn), cpos);
+
+ control &= ~PCI_MSI_FLAGS_QSIZE;
+ multi_msi_enable(control, entry->msi.nvec);
+ pci_conf_write16(pdev->seg, pdev->bus,
PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn), cpos, control);
+
msi_set_enable(pdev, 1);
+ }
else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
msix_set_enable(pdev, 1);
-
- spin_unlock_irqrestore(&desc->lock, flags);
}
return 0;
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -140,8 +140,11 @@ int physdev_map_pirq(domid_t domid, int
break;
case MAP_PIRQ_TYPE_MSI:
+ if ( !msi->table_base )
+ msi->entry_nr = 1;
irq = *index;
if ( irq == -1 )
+ case MAP_PIRQ_TYPE_MULTI_MSI:
irq = create_irq(NUMA_NO_NODE);
if ( irq < nr_irqs_gsi || irq >= nr_irqs )
@@ -179,6 +182,30 @@ int physdev_map_pirq(domid_t domid, int
goto done;
}
}
+ else if ( type == MAP_PIRQ_TYPE_MULTI_MSI )
+ {
+ if ( msi->entry_nr <= 0 || msi->entry_nr > 32 )
+ ret = -EDOM;
+ else if ( msi->entry_nr != 1 && !iommu_intremap )
+ ret = -EOPNOTSUPP;
+ else
+ {
+ while ( msi->entry_nr & (msi->entry_nr - 1) )
+ msi->entry_nr += msi->entry_nr &
-msi->entry_nr;
+ pirq = get_free_pirqs(d, msi->entry_nr);
+ if ( pirq < 0 )
+ {
+ while ( (msi->entry_nr >>= 1) > 1 )
+ if ( get_free_pirqs(d, msi->entry_nr) > 0 )
+ break;
+ dprintk(XENLOG_G_ERR, "dom%d: no block of %d free
pirqs\n",
+ d->domain_id, msi->entry_nr << 1);
+ ret = pirq;
+ }
+ }
+ if ( ret < 0 )
+ goto done;
+ }
else
{
pirq = get_free_pirq(d, type);
@@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int
done:
spin_unlock(&d->event_lock);
spin_unlock(&pcidevs_lock);
- if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index
== -1) )
- destroy_irq(irq);
+ if ( ret != 0 )
+ switch ( type )
+ {
+ case MAP_PIRQ_TYPE_MSI:
+ if ( *index == -1 )
+ case MAP_PIRQ_TYPE_MULTI_MSI:
+ destroy_irq(irq);
+ break;
+ }
free_domain:
rcu_unlock_domain(d);
return ret;
@@ -390,14 +424,22 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
if ( copy_from_guest(&map, arg, 1) != 0 )
break;
- if ( map.type == MAP_PIRQ_TYPE_MSI_SEG )
+ switch ( map.type )
{
+ case MAP_PIRQ_TYPE_MSI_SEG:
map.type = MAP_PIRQ_TYPE_MSI;
msi.seg = map.bus >> 16;
- }
- else
- {
+ break;
+
+ case MAP_PIRQ_TYPE_MULTI_MSI:
+ if ( map.table_base )
+ return -EINVAL;
+ msi.seg = map.bus >> 16;
+ break;
+
+ default:
msi.seg = 0;
+ break;
}
msi.bus = map.bus;
msi.devfn = map.devfn;
@@ -406,6 +448,8 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
ret = physdev_map_pirq(map.domid, map.type, &map.index,
&map.pirq,
&msi);
+ if ( map.type == MAP_PIRQ_TYPE_MULTI_MSI )
+ map.entry_nr = msi.entry_nr;
if ( __copy_to_guest(arg, &map, 1) )
ret = -EFAULT;
break;
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -141,6 +141,7 @@ int map_domain_pirq(struct domain *d, in
void *data);
int unmap_domain_pirq(struct domain *d, int pirq);
int get_free_pirq(struct domain *d, int type);
+int get_free_pirqs(struct domain *, unsigned int nr);
void free_domain_pirqs(struct domain *d);
int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq);
int unmap_domain_pirq_emuirq(struct domain *d, int pirq);
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -148,7 +148,7 @@ int msi_free_irq(struct msi_desc *entry)
#define multi_msi_capable(control) \
(1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
#define multi_msi_enable(control, num) \
- control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
+ control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE);
#define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT))
#define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT))
#define msi_enable(control, num) multi_msi_enable(control, num); \
--- a/xen/include/public/physdev.h
+++ b/xen/include/public/physdev.h
@@ -151,21 +151,22 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
#define MAP_PIRQ_TYPE_GSI 0x1
#define MAP_PIRQ_TYPE_UNKNOWN 0x2
#define MAP_PIRQ_TYPE_MSI_SEG 0x3
+#define MAP_PIRQ_TYPE_MULTI_MSI 0x4
#define PHYSDEVOP_map_pirq 13
struct physdev_map_pirq {
domid_t domid;
/* IN */
int type;
- /* IN */
+ /* IN (ignored for ..._MULTI_MSI) */
int index;
/* IN or OUT */
int pirq;
- /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */
+ /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */
int bus;
/* IN */
int devfn;
- /* IN */
+ /* IN (also OUT for ..._MULTI_MSI) */
int entry_nr;
/* IN */
uint64_t table_base;
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
The requested vector count is to be passed in struct xen_pci_op''s info field. Upon failure, if a smaller vector count might work, the backend will pass that smaller count in the value field (which so far is always being set to zero in the error path). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/include/public/io/pciif.h +++ b/xen/include/public/io/pciif.h @@ -46,6 +46,7 @@ #define XEN_PCI_OP_aer_resume (7) #define XEN_PCI_OP_aer_mmio (8) #define XEN_PCI_OP_aer_slotreset (9) +#define XEN_PCI_OP_enable_multi_msi (10) /* xen_pci_op error numbers */ #define XEN_PCI_ERR_success (0) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Andrew Cooper
2013-Jul-16 11:15 UTC
Re: [PATCH resend 1/3] VT-d: enable for multi-vector MSI
On 16/07/13 11:13, Jan Beulich wrote:> The main change being to make alloc_remap_entry() capable of allocating > a block of entries. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/drivers/passthrough/vtd/intremap.c > +++ b/xen/drivers/passthrough/vtd/intremap.c > @@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm > } > > /* > - * Look for a free intr remap entry. > + * Look for a free intr remap entry (or a contiguous set thereof). > * Need hold iremap_lock, and setup returned entry before releasing lock. > */ > -static int alloc_remap_entry(struct iommu *iommu) > +static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr)alloc_remap_entries() now that it unconditionally takes a count (and you already have to patch all callsites)> { > struct iremap_entry *iremap_entries = NULL; > struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); > - int i; > + unsigned int i, found; > > ASSERT( spin_is_locked(&ir_ctrl->iremap_lock) ); > > - for ( i = 0; i < IREMAP_ENTRY_NR; i++ ) > + for ( found = i = 0; i < IREMAP_ENTRY_NR; i++ ) > { > struct iremap_entry *p; > if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 ) > @@ -220,7 +220,9 @@ static int alloc_remap_entry(struct iomm > else > p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)]; > > - if ( p->lo_val == 0 && p->hi_val == 0 ) /* a free entry */ > + if ( p->lo_val || p->hi_val ) /* not a free entry */ > + found = 0; > + else if ( ++found == nr ) > break; > } > > @@ -228,7 +230,7 @@ static int alloc_remap_entry(struct iomm > unmap_vtd_domain_page(iremap_entries); > > if ( i < IREMAP_ENTRY_NR ) > - ir_ctrl->iremap_num++; > + ir_ctrl->iremap_num += nr; > return i; > } > > @@ -293,7 +295,7 @@ static int ioapic_rte_to_remap_entry(str > index = apic_pin_2_ir_idx[apic][ioapic_pin]; > if ( index < 0 ) > { > - index = alloc_remap_entry(iommu); > + index = alloc_remap_entry(iommu, 1); > if ( index < IREMAP_ENTRY_NR ) > apic_pin_2_ir_idx[apic][ioapic_pin] = index; > } > @@ -485,19 +487,18 @@ static void set_msi_source_id(struct pci > } > > static int remap_entry_to_msi_msg( > - struct iommu *iommu, struct msi_msg *msg) > + struct iommu *iommu, struct msi_msg *msg, unsigned int index) > { > struct iremap_entry *iremap_entry = NULL, *iremap_entries; > struct msi_msg_remap_entry *remap_rte; > - int index; > unsigned long flags; > struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); > > remap_rte = (struct msi_msg_remap_entry *) msg; > - index = (remap_rte->address_lo.index_15 << 15) | > + index += (remap_rte->address_lo.index_15 << 15) | > remap_rte->address_lo.index_0_14; > > - if ( index < 0 || index > IREMAP_ENTRY_NR - 1 ) > + if ( index >= IREMAP_ENTRY_NR ) > { > dprintk(XENLOG_ERR VTDPREFIX, > "%s: index (%d) for remap table is invalid !\n", > @@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry( > struct iremap_entry *iremap_entry = NULL, *iremap_entries; > struct iremap_entry new_ire; > struct msi_msg_remap_entry *remap_rte; > - int index; > + unsigned int index, i, nr = 1;Does this hardcoding of nr=1 defeat the purpose of the following logic? ~Andrew> unsigned long flags; > struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); > > - remap_rte = (struct msi_msg_remap_entry *) msg; > + if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) > + nr = msi_desc->msi.nvec; > + > spin_lock_irqsave(&ir_ctrl->iremap_lock, flags); > > if ( msg == NULL ) > { > - /* Free specified unused IRTE */ > - free_remap_entry(iommu, msi_desc->remap_index); > + /* Free specified unused IRTEs */ > + for ( i = 0; i < nr; ++i ) > + free_remap_entry(iommu, msi_desc->remap_index + i); > spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); > return 0; > } > > if ( msi_desc->remap_index < 0 ) > { > - /* > - * TODO: Multiple-vector MSI requires allocating multiple continuous > - * entries and configuring addr/data of msi_msg in different way. So > - * alloca_remap_entry will be changed if enabling multiple-vector MSI > - * in future. > - */ > - index = alloc_remap_entry(iommu); > - msi_desc->remap_index = index; > + index = alloc_remap_entry(iommu, nr); > + for ( i = 0; i < nr; ++i ) > + msi_desc[i].remap_index = index + i; > } > else > index = msi_desc->remap_index; > @@ -590,7 +589,8 @@ static int msi_msg_to_remap_entry( > "%s: intremap index (%d) is larger than" > " the maximum index (%d)!\n", > __func__, index, IREMAP_ENTRY_NR - 1); > - msi_desc->remap_index = -1; > + for ( i = 0; i < nr; ++i ) > + msi_desc[i].remap_index = -1; > spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags); > return -EFAULT; > } > @@ -626,14 +626,18 @@ static int msi_msg_to_remap_entry( > new_ire.lo.p = 1; /* finally, set present bit */ > > /* now construct new MSI/MSI-X rte entry */ > + remap_rte = (struct msi_msg_remap_entry *)msg; > remap_rte->address_lo.dontcare = 0; > - remap_rte->address_lo.index_15 = (index >> 15) & 0x1; > - remap_rte->address_lo.index_0_14 = index & 0x7fff; > + i = index; > + if ( !nr ) > + i -= msi_desc->msi_attrib.entry_nr; > + remap_rte->address_lo.index_15 = (i >> 15) & 0x1; > + remap_rte->address_lo.index_0_14 = i & 0x7fff; > remap_rte->address_lo.SHV = 1; > remap_rte->address_lo.format = 1; > > remap_rte->address_hi = 0; > - remap_rte->data = 0; > + remap_rte->data = index - i; > > memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry)); > iommu_flush_cache_entry(iremap_entry, sizeof(struct iremap_entry)); > @@ -654,7 +658,9 @@ void msi_msg_read_remap_rte( > drhd = pdev ? acpi_find_matched_drhd_unit(pdev) > : hpet_to_drhd(msi_desc->hpet_id); > if ( drhd ) > - remap_entry_to_msi_msg(drhd->iommu, msg); > + remap_entry_to_msi_msg(drhd->iommu, msg, > + msi_desc->msi_attrib.type == PCI_CAP_ID_MSI > + ? msi_desc->msi_attrib.entry_nr : 0); > } > > int msi_msg_write_remap_rte( > @@ -680,7 +686,7 @@ int __init intel_setup_hpet_msi(struct m > return 0; > > spin_lock_irqsave(&ir_ctrl->iremap_lock, flags); > - msi_desc->remap_index = alloc_remap_entry(iommu); > + msi_desc->remap_index = alloc_remap_entry(iommu, 1); > if ( msi_desc->remap_index >= IREMAP_ENTRY_NR ) > { > dprintk(XENLOG_ERR VTDPREFIX, > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Andrew Cooper
2013-Jul-16 11:19 UTC
Re: [PATCH resend 3/3] pciif: add multi-vector-MSI command
On 16/07/13 11:15, Jan Beulich wrote:> The requested vector count is to be passed in struct xen_pci_op''s info > field. Upon failure, if a smaller vector count might work, the backend > will pass that smaller count in the value field (which so far is always > being set to zero in the error path). > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/include/public/io/pciif.h > +++ b/xen/include/public/io/pciif.h > @@ -46,6 +46,7 @@ > #define XEN_PCI_OP_aer_resume (7) > #define XEN_PCI_OP_aer_mmio (8) > #define XEN_PCI_OP_aer_slotreset (9) > +#define XEN_PCI_OP_enable_multi_msi (10)/* Be sure to bump this number if you change this file */ #define XEN_PCI_MAGIC "7" Should you bump this version, or is the comment stale? The only in-tree consumer I can find is MiniOS''s pcifront, which writes it into xenstore. ~Andrew> > /* xen_pci_op error numbers */ > #define XEN_PCI_ERR_success (0) > > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Jul-16 11:32 UTC
Re: [PATCH resend 1/3] VT-d: enable for multi-vector MSI
>>> On 16.07.13 at 13:15, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > On 16/07/13 11:13, Jan Beulich wrote: >> The main change being to make alloc_remap_entry() capable of allocating >> a block of entries. >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> >> --- a/xen/drivers/passthrough/vtd/intremap.c >> +++ b/xen/drivers/passthrough/vtd/intremap.c >> @@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm >> } >> >> /* >> - * Look for a free intr remap entry. >> + * Look for a free intr remap entry (or a contiguous set thereof). >> * Need hold iremap_lock, and setup returned entry before releasing lock. >> */ >> -static int alloc_remap_entry(struct iommu *iommu) >> +static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr) > > alloc_remap_entries() now that it unconditionally takes a count (and you > already have to patch all callsites)Actually I checked with Linux, and the use singular in the function name too (albeit the name isn''t identical).>> @@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry( >> struct iremap_entry *iremap_entry = NULL, *iremap_entries; >> struct iremap_entry new_ire; >> struct msi_msg_remap_entry *remap_rte; >> - int index; >> + unsigned int index, i, nr = 1; > > Does this hardcoding of nr=1 defeat the purpose of the following logic?In what way?>> unsigned long flags; >> struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); >> >> - remap_rte = (struct msi_msg_remap_entry *) msg; >> + if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) >> + nr = msi_desc->msi.nvec;The logic here makes the vector count 1 for MSI-X and msi.nvec for MSI. Jan
Jan Beulich
2013-Jul-16 11:35 UTC
Re: [PATCH resend 3/3] pciif: add multi-vector-MSI command
>>> On 16.07.13 at 13:19, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > On 16/07/13 11:15, Jan Beulich wrote: >> The requested vector count is to be passed in struct xen_pci_op''s info >> field. Upon failure, if a smaller vector count might work, the backend >> will pass that smaller count in the value field (which so far is always >> being set to zero in the error path). >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> >> --- a/xen/include/public/io/pciif.h >> +++ b/xen/include/public/io/pciif.h >> @@ -46,6 +46,7 @@ >> #define XEN_PCI_OP_aer_resume (7) >> #define XEN_PCI_OP_aer_mmio (8) >> #define XEN_PCI_OP_aer_slotreset (9) >> +#define XEN_PCI_OP_enable_multi_msi (10) > > /* Be sure to bump this number if you change this file */ > #define XEN_PCI_MAGIC "7" > > Should you bump this version, or is the comment stale? The only in-tree > consumer I can find is MiniOS''s pcifront, which writes it into xenstore.Whether it''s stale I don''t know (likely it is considering that you found just a single consumer), but bumping a revision just because of the (backwards compatible) addition seems superfluous to me. This would be different if I changed the existing enable_msi... Jan
On 16/07/13 11:14, Jan Beulich wrote:> This implies > - extending the public interface to have a way to request a block of > MSIs > - allocating a block of contiguous pIRQ-s for the target domain (but > note that the Xen IRQs allocated have no need of being contiguous) > - repeating certain operations for all involved IRQs > - fixing multi_msi_enable() > - adjusting the mask bit accesses for maskable MSIs > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/arch/x86/irq.c > +++ b/xen/arch/x86/irq.c > @@ -1863,6 +1863,25 @@ int get_free_pirq(struct domain *d, int > return -ENOSPC; > } > > +int get_free_pirqs(struct domain *d, unsigned int nr) > +{ > + unsigned int i, found = 0; > + > + ASSERT(spin_is_locked(&d->event_lock)); > + > + for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i ) > + if ( is_free_pirq(d, pirq_info(d, i)) ) > + { > + pirq_get_info(d, i); > + if ( ++found == nr ) > + return i; > + } > + else > + found = 0; > + > + return -ENOSPC; > +} > +Is there any reason why this loop is backwards? Unless I am mistaken, guests can choose their own pirqs when binding them, reducing the likelyhood that the top of the available space will be free.> int map_domain_pirq( > struct domain *d, int pirq, int irq, int type, void *data) > { > @@ -1918,11 +1937,12 @@ int map_domain_pirq( > > desc = irq_to_desc(irq); > > - if ( type == MAP_PIRQ_TYPE_MSI ) > + if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI ) > { > struct msi_info *msi = (struct msi_info *)data; > struct msi_desc *msi_desc; > struct pci_dev *pdev; > + unsigned int nr = 0; > > ASSERT(spin_is_locked(&pcidevs_lock)); > > @@ -1933,7 +1953,14 @@ int map_domain_pirq( > pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn); > ret = pci_enable_msi(msi, &msi_desc); > if ( ret ) > + { > + if ( ret > 0 ) > + { > + msi->entry_nr = ret; > + ret = -ENFILE; > + } > goto done; > + } > > spin_lock_irqsave(&desc->lock, flags); > > @@ -1947,25 +1974,73 @@ int map_domain_pirq( > goto done; > } > > - ret = setup_msi_irq(desc, msi_desc); > - if ( ret ) > + while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) ) > { > + if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV && > + !desc->arch.used_vectors ) > + { > + desc->arch.used_vectors = &pdev->arch.used_vectors; > + if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED ) > + { > + int vector = desc->arch.vector; > + > + ASSERT(!test_bit(vector, desc->arch.used_vectors)); > + set_bit(vector, desc->arch.used_vectors); > + } > + } > + if ( type == MAP_PIRQ_TYPE_MSI || > + msi_desc->msi_attrib.type != PCI_CAP_ID_MSI || > + ++nr == msi->entry_nr ) > + break; > + > + set_domain_irq_pirq(d, irq, info); > spin_unlock_irqrestore(&desc->lock, flags); > - pci_disable_msi(msi_desc); > - goto done; > + > + info = NULL; > + irq = create_irq(NUMA_NO_NODE); > + ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info) > + : irq; > + if ( ret ) > + break; > + msi_desc[nr].irq = irq; > + > + if ( irq_permit_access(d, irq) != 0 ) > + printk(XENLOG_G_WARNING > + "dom%d: could not permit access to IRQ%d (pirq %d)\n", > + d->domain_id, irq, pirq); > + > + desc = irq_to_desc(irq); > + spin_lock_irqsave(&desc->lock, flags); > + > + if ( desc->handler != &no_irq_type ) > + { > + dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use (%s)\n", > + d->domain_id, irq, pirq + nr, desc->handler->typename); > + ret = -EBUSY; > + break; > + } > } > > - if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV > - && !desc->arch.used_vectors ) > + if ( ret ) > { > - desc->arch.used_vectors = &pdev->arch.used_vectors; > - if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED ) > + spin_unlock_irqrestore(&desc->lock, flags); > + while ( nr-- ) > { > - int vector = desc->arch.vector; > - ASSERT(!test_bit(vector, desc->arch.used_vectors)); > - > - set_bit(vector, desc->arch.used_vectors); > + if ( irq >= 0 ) > + { > + if ( irq_deny_access(d, irq) ) > + printk(XENLOG_G_ERR > + "dom%d: could not revoke access to IRQ%d (pirq %d)\n", > + d->domain_id, irq, pirq); > + destroy_irq(irq); > + } > + if ( info ) > + cleanup_domain_irq_pirq(d, irq, info); > + info = pirq_info(d, pirq + nr); > + irq = info->arch.irq; > } > + pci_disable_msi(msi_desc); > + goto done; > } > > set_domain_irq_pirq(d, irq, info); > @@ -1996,7 +2071,8 @@ int unmap_domain_pirq(struct domain *d, > { > unsigned long flags; > struct irq_desc *desc; > - int irq, ret = 0; > + int irq, ret = 0, rc; > + unsigned int i, nr = 1; > bool_t forced_unbind; > struct pirq *info; > struct msi_desc *msi_desc = NULL; > @@ -2018,6 +2094,18 @@ int unmap_domain_pirq(struct domain *d, > > desc = irq_to_desc(irq); > msi_desc = desc->msi_desc; > + if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) > + { > + if ( msi_desc->msi_attrib.entry_nr ) > + { > + printk(XENLOG_G_ERR > + "dom%d: trying to unmap secondary MSI pirq %d\n", > + d->domain_id, pirq); > + ret = -EBUSY; > + goto done; > + } > + nr = msi_desc->msi.nvec; > + } > > ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc); > if ( ret ) > @@ -2033,37 +2121,83 @@ int unmap_domain_pirq(struct domain *d, > > spin_lock_irqsave(&desc->lock, flags); > > - BUG_ON(irq != domain_pirq_to_irq(d, pirq)); > - > - if ( !forced_unbind ) > - clear_domain_irq_pirq(d, irq, info); > - else > + for ( i = 0; ; ) > { > - info->arch.irq = -irq; > - radix_tree_replace_slot( > - radix_tree_lookup_slot(&d->arch.irq_pirq, irq), > - radix_tree_int_to_ptr(-pirq)); > + BUG_ON(irq != domain_pirq_to_irq(d, pirq + i)); > + > + if ( !forced_unbind ) > + clear_domain_irq_pirq(d, irq, info); > + else > + { > + info->arch.irq = -irq; > + radix_tree_replace_slot( > + radix_tree_lookup_slot(&d->arch.irq_pirq, irq), > + radix_tree_int_to_ptr(-pirq)); > + } > + > + if ( msi_desc ) > + { > + desc->handler = &no_irq_type; > + desc->msi_desc = NULL; > + } > + > + if ( ++i == nr ) > + break; > + > + spin_unlock_irqrestore(&desc->lock, flags); > + > + if ( !forced_unbind ) > + cleanup_domain_irq_pirq(d, irq, info); > + > + rc = irq_deny_access(d, irq); > + if ( rc ) > + { > + printk(XENLOG_G_ERR > + "dom%d: could not deny access to IRQ%d (pirq %d)\n", > + d->domain_id, irq, pirq + i); > + ret = rc; > + } > + > + do { > + info = pirq_info(d, pirq + i); > + if ( info && (irq = info->arch.irq) > 0 ) > + break; > + printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n", > + d->domain_id, pirq + i); > + } while ( ++i < nr ); > + > + if ( i == nr ) > + { > + desc = NULL; > + break; > + } > + > + desc = irq_to_desc(irq); > + BUG_ON(desc->msi_desc != msi_desc + i); > + > + spin_lock_irqsave(&desc->lock, flags); > } > > - if ( msi_desc ) > + if ( desc ) > { > - desc->handler = &no_irq_type; > - desc->msi_desc = NULL; > + spin_unlock_irqrestore(&desc->lock, flags); > + > + if ( !forced_unbind ) > + cleanup_domain_irq_pirq(d, irq, info); > + > + rc = irq_deny_access(d, irq); > + if ( rc ) > + { > + printk(XENLOG_G_ERR > + "dom%d: could not deny access to IRQ%d (pirq %d)\n", > + d->domain_id, irq, pirq + nr - 1); > + ret = rc; > + } > } > > - spin_unlock_irqrestore(&desc->lock, flags); > if (msi_desc) > msi_free_irq(msi_desc); > > - if ( !forced_unbind ) > - cleanup_domain_irq_pirq(d, irq, info); > - > - ret = irq_deny_access(d, irq); > - if ( ret ) > - printk(XENLOG_G_ERR > - "dom%d: could not deny access to IRQ%d (pirq %d)\n", > - d->domain_id, irq, pirq); > - > done: > return ret; > } > --- a/xen/arch/x86/msi.c > +++ b/xen/arch/x86/msi.c > @@ -236,6 +236,11 @@ static int write_msi_msg(struct msi_desc > u8 bus = dev->bus; > u8 slot = PCI_SLOT(dev->devfn); > u8 func = PCI_FUNC(dev->devfn); > + int nr = entry->msi_attrib.entry_nr; > + > + ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr); > + if ( nr ) > + return 0; > > pci_conf_write32(seg, bus, slot, func, msi_lower_address_reg(pos), > msg->address_lo); > @@ -359,8 +364,8 @@ static void msi_set_mask_bit(struct irq_ > u8 func = PCI_FUNC(entry->dev->devfn); > > mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos); > - mask_bits &= ~(1); > - mask_bits |= flag; > + mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr); > + mask_bits |= (u32)flag << entry->msi_attrib.entry_nr; > pci_conf_write32(seg, bus, slot, func, entry->msi.mpos, mask_bits); > } > break; > @@ -384,10 +389,11 @@ static int msi_get_mask_bit(const struct > case PCI_CAP_ID_MSI: > if (!entry->dev || !entry->msi_attrib.maskbit) > break; > - return pci_conf_read32(entry->dev->seg, entry->dev->bus, > - PCI_SLOT(entry->dev->devfn), > - PCI_FUNC(entry->dev->devfn), > - entry->msi.mpos) & 1; > + return (pci_conf_read32(entry->dev->seg, entry->dev->bus, > + PCI_SLOT(entry->dev->devfn), > + PCI_FUNC(entry->dev->devfn), > + entry->msi.mpos) >> > + entry->msi_attrib.entry_nr) & 1; > case PCI_CAP_ID_MSIX: > return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1; > } > @@ -453,17 +459,20 @@ static hw_irq_controller pci_msi_nonmask > .set_affinity = set_msi_affinity > }; > > -static struct msi_desc* alloc_msi_entry(void) > +static struct msi_desc *alloc_msi_entry(unsigned int nr) > { > struct msi_desc *entry; > > - entry = xmalloc(struct msi_desc); > + entry = xmalloc_array(struct msi_desc, nr); > if ( !entry ) > return NULL; > > INIT_LIST_HEAD(&entry->list); > - entry->dev = NULL; > - entry->remap_index = -1; > + while ( nr-- ) > + { > + entry[nr].dev = NULL; > + entry[nr].remap_index = -1; > + } > > return entry; > } > @@ -488,17 +497,24 @@ int __setup_msi_irq(struct irq_desc *des > > int msi_free_irq(struct msi_desc *entry) > { > - destroy_irq(entry->irq); > + unsigned int nr = entry->msi.nvec; > + > if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) > { > unsigned long start; > start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1); > msix_put_fixmap(entry->dev, virt_to_fix(start)); > + nr = 1; > } > > - /* Free the unused IRTE if intr remap enabled */ > - if ( iommu_intremap ) > - iommu_update_ire_from_msi(entry, NULL); > + while ( nr-- ) > + { > + destroy_irq(entry[nr].irq); > + > + /* Free the unused IRTE if intr remap enabled */ > + if ( iommu_intremap ) > + iommu_update_ire_from_msi(entry + nr, NULL); > + } > > list_del(&entry->list); > xfree(entry); > @@ -531,11 +547,12 @@ static struct msi_desc *find_msi_entry(s > **/ > static int msi_capability_init(struct pci_dev *dev, > int irq, > - struct msi_desc **desc) > + struct msi_desc **desc, > + unsigned int nvec) > { > struct msi_desc *entry; > int pos; > - unsigned int maxvec, mpos; > + unsigned int i, maxvec, mpos; > u16 control, seg = dev->seg; > u8 bus = dev->bus; > u8 slot = PCI_SLOT(dev->devfn); > @@ -545,27 +562,34 @@ static int msi_capability_init(struct pc > pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI); > control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos)); > maxvec = multi_msi_capable(control); > + if ( nvec > maxvec ) > + return maxvec; > control &= ~PCI_MSI_FLAGS_QSIZE; > + multi_msi_enable(control, nvec); > > /* MSI Entry Initialization */ > msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */ > > - entry = alloc_msi_entry(); > + entry = alloc_msi_entry(nvec); > if ( !entry ) > return -ENOMEM; > > - entry->msi_attrib.type = PCI_CAP_ID_MSI; > - entry->msi_attrib.is_64 = is_64bit_address(control); > - entry->msi_attrib.entry_nr = 0; > - entry->msi_attrib.maskbit = is_mask_bit_support(control); > - entry->msi_attrib.masked = 1; > - entry->msi_attrib.pos = pos; > mpos = msi_mask_bits_reg(pos, is_64bit_address(control)); > - entry->msi.nvec = 1; > + for ( i = 0; i < nvec; ++i ) > + { > + entry[i].msi_attrib.type = PCI_CAP_ID_MSI; > + entry[i].msi_attrib.is_64 = is_64bit_address(control); > + entry[i].msi_attrib.entry_nr = i; > + entry[i].msi_attrib.maskbit = is_mask_bit_support(control); > + entry[i].msi_attrib.masked = 1; > + entry[i].msi_attrib.pos = pos; > + if ( entry[i].msi_attrib.maskbit ) > + entry[i].msi.mpos = mpos; > + entry[i].msi.nvec = 0; > + entry[i].dev = dev; > + } > + entry->msi.nvec = nvec; > entry->irq = irq; > - if ( is_mask_bit_support(control) ) > - entry->msi.mpos = mpos; > - entry->dev = dev; > if ( entry->msi_attrib.maskbit ) > { > u32 maskbits; > @@ -693,7 +717,7 @@ static int msix_capability_init(struct p > > if ( desc ) > { > - entry = alloc_msi_entry(); > + entry = alloc_msi_entry(1); > if ( !entry ) > return -ENOMEM; > ASSERT(msi); > @@ -851,7 +875,6 @@ static int msix_capability_init(struct p > > static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc) > { > - int status; > struct pci_dev *pdev; > struct msi_desc *old_desc; > > @@ -880,8 +903,7 @@ static int __pci_enable_msi(struct msi_i > pci_disable_msi(old_desc); > } > > - status = msi_capability_init(pdev, msi->irq, desc); > - return status; > + return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr); > } > > static void __pci_disable_msi(struct msi_desc *entry) > @@ -1101,6 +1123,8 @@ int pci_restore_msi_state(struct pci_dev > > list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list ) > { > + unsigned int i = 0, nr = 1; > + > irq = entry->irq; > desc = &irq_desc[irq]; > > @@ -1110,30 +1134,58 @@ int pci_restore_msi_state(struct pci_dev > > if (desc->msi_desc != entry) > { > + bogus: > dprintk(XENLOG_ERR, > - "Restore MSI for dev %04x:%02x:%02x:%x not set before?\n", > + "Restore MSI for %04x:%02x:%02x:%u entry %u not set?\n", > pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), > - PCI_FUNC(pdev->devfn)); > + PCI_FUNC(pdev->devfn), i); > spin_unlock_irqrestore(&desc->lock, flags); > return -EINVAL; > } > > if ( entry->msi_attrib.type == PCI_CAP_ID_MSI ) > + { > msi_set_enable(pdev, 0); > + nr = entry->msi.nvec; > + } > else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) > msix_set_enable(pdev, 0); > > msg = entry->msg; > write_msi_msg(entry, &msg); > > - msi_set_mask_bit(desc, entry->msi_attrib.masked); > + for ( i = 0; ; ) > + { > + msi_set_mask_bit(desc, entry[i].msi_attrib.masked); > + spin_unlock_irqrestore(&desc->lock, flags); > + > + if ( !--nr ) > + break; > + > + desc = &irq_desc[entry[++i].irq]; > + spin_lock_irqsave(&desc->lock, flags); > + if ( desc->msi_desc != entry + i ) > + goto bogus; > + } > + > + spin_unlock_irqrestore(&desc->lock, flags); > > if ( entry->msi_attrib.type == PCI_CAP_ID_MSI ) > + { > + unsigned int cpos = msi_control_reg(entry->msi_attrib.pos); > + u16 control = pci_conf_read16(pdev->seg, pdev->bus, > + PCI_SLOT(pdev->devfn), > + PCI_FUNC(pdev->devfn), cpos); > + > + control &= ~PCI_MSI_FLAGS_QSIZE; > + multi_msi_enable(control, entry->msi.nvec); > + pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), > + PCI_FUNC(pdev->devfn), cpos, control); > + > msi_set_enable(pdev, 1); > + } > else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) > msix_set_enable(pdev, 1); > - > - spin_unlock_irqrestore(&desc->lock, flags); > } > > return 0; > --- a/xen/arch/x86/physdev.c > +++ b/xen/arch/x86/physdev.c > @@ -140,8 +140,11 @@ int physdev_map_pirq(domid_t domid, int > break; > > case MAP_PIRQ_TYPE_MSI: > + if ( !msi->table_base ) > + msi->entry_nr = 1; > irq = *index; > if ( irq == -1 ) > + case MAP_PIRQ_TYPE_MULTI_MSI: > irq = create_irq(NUMA_NO_NODE); > > if ( irq < nr_irqs_gsi || irq >= nr_irqs ) > @@ -179,6 +182,30 @@ int physdev_map_pirq(domid_t domid, int > goto done; > } > } > + else if ( type == MAP_PIRQ_TYPE_MULTI_MSI ) > + { > + if ( msi->entry_nr <= 0 || msi->entry_nr > 32 ) > + ret = -EDOM; > + else if ( msi->entry_nr != 1 && !iommu_intremap ) > + ret = -EOPNOTSUPP; > + else > + { > + while ( msi->entry_nr & (msi->entry_nr - 1) ) > + msi->entry_nr += msi->entry_nr & -msi->entry_nr; > + pirq = get_free_pirqs(d, msi->entry_nr); > + if ( pirq < 0 ) > + { > + while ( (msi->entry_nr >>= 1) > 1 ) > + if ( get_free_pirqs(d, msi->entry_nr) > 0 ) > + break; > + dprintk(XENLOG_G_ERR, "dom%d: no block of %d free pirqs\n", > + d->domain_id, msi->entry_nr << 1); > + ret = pirq; > + } > + } > + if ( ret < 0 ) > + goto done; > + } > else > { > pirq = get_free_pirq(d, type); > @@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int > done: > spin_unlock(&d->event_lock); > spin_unlock(&pcidevs_lock); > - if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) ) > - destroy_irq(irq); > + if ( ret != 0 ) > + switch ( type ) > + { > + case MAP_PIRQ_TYPE_MSI: > + if ( *index == -1 ) > + case MAP_PIRQ_TYPE_MULTI_MSI: > + destroy_irq(irq); > + break; > + }Do we not need to create and destroy entry_nr irqs in this function, or is a multi-vector-msi now considered as just as single irq ? I ask because this appears to lack the "repeating certain operations for all involved IRQs" described in the comment. ~Andrew> free_domain: > rcu_unlock_domain(d); > return ret; > @@ -390,14 +424,22 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H > if ( copy_from_guest(&map, arg, 1) != 0 ) > break; > > - if ( map.type == MAP_PIRQ_TYPE_MSI_SEG ) > + switch ( map.type ) > { > + case MAP_PIRQ_TYPE_MSI_SEG: > map.type = MAP_PIRQ_TYPE_MSI; > msi.seg = map.bus >> 16; > - } > - else > - { > + break; > + > + case MAP_PIRQ_TYPE_MULTI_MSI: > + if ( map.table_base ) > + return -EINVAL; > + msi.seg = map.bus >> 16; > + break; > + > + default: > msi.seg = 0; > + break; > } > msi.bus = map.bus; > msi.devfn = map.devfn; > @@ -406,6 +448,8 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H > ret = physdev_map_pirq(map.domid, map.type, &map.index, &map.pirq, > &msi); > > + if ( map.type == MAP_PIRQ_TYPE_MULTI_MSI ) > + map.entry_nr = msi.entry_nr; > if ( __copy_to_guest(arg, &map, 1) ) > ret = -EFAULT; > break; > --- a/xen/include/asm-x86/irq.h > +++ b/xen/include/asm-x86/irq.h > @@ -141,6 +141,7 @@ int map_domain_pirq(struct domain *d, in > void *data); > int unmap_domain_pirq(struct domain *d, int pirq); > int get_free_pirq(struct domain *d, int type); > +int get_free_pirqs(struct domain *, unsigned int nr); > void free_domain_pirqs(struct domain *d); > int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq); > int unmap_domain_pirq_emuirq(struct domain *d, int pirq); > --- a/xen/include/asm-x86/msi.h > +++ b/xen/include/asm-x86/msi.h > @@ -148,7 +148,7 @@ int msi_free_irq(struct msi_desc *entry) > #define multi_msi_capable(control) \ > (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1)) > #define multi_msi_enable(control, num) \ > - control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE); > + control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE); > #define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) > #define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) > #define msi_enable(control, num) multi_msi_enable(control, num); \ > --- a/xen/include/public/physdev.h > +++ b/xen/include/public/physdev.h > @@ -151,21 +151,22 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); > #define MAP_PIRQ_TYPE_GSI 0x1 > #define MAP_PIRQ_TYPE_UNKNOWN 0x2 > #define MAP_PIRQ_TYPE_MSI_SEG 0x3 > +#define MAP_PIRQ_TYPE_MULTI_MSI 0x4 > > #define PHYSDEVOP_map_pirq 13 > struct physdev_map_pirq { > domid_t domid; > /* IN */ > int type; > - /* IN */ > + /* IN (ignored for ..._MULTI_MSI) */ > int index; > /* IN or OUT */ > int pirq; > - /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */ > + /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */ > int bus; > /* IN */ > int devfn; > - /* IN */ > + /* IN (also OUT for ..._MULTI_MSI) */ > int entry_nr; > /* IN */ > uint64_t table_base; > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
>>> On 16.07.13 at 13:36, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > On 16/07/13 11:14, Jan Beulich wrote: >> +int get_free_pirqs(struct domain *d, unsigned int nr) >> +{ >> + unsigned int i, found = 0; >> + >> + ASSERT(spin_is_locked(&d->event_lock)); >> + >> + for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i ) >> + if ( is_free_pirq(d, pirq_info(d, i)) ) >> + { >> + pirq_get_info(d, i); >> + if ( ++found == nr ) >> + return i; >> + } >> + else >> + found = 0; >> + >> + return -ENOSPC; >> +} >> + > > Is there any reason why this loop is backwards? Unless I am mistaken, > guests can choose their own pirqs when binding them, reducing the > likelyhood that the top of the available space will be free.This just follows the behavior of get_free_pirq(). I''m not up to having the two behave differently.>> @@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int >> done: >> spin_unlock(&d->event_lock); >> spin_unlock(&pcidevs_lock); >> - if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) ) >> - destroy_irq(irq); >> + if ( ret != 0 ) >> + switch ( type ) >> + { >> + case MAP_PIRQ_TYPE_MSI: >> + if ( *index == -1 ) >> + case MAP_PIRQ_TYPE_MULTI_MSI: >> + destroy_irq(irq); >> + break; >> + } > > Do we not need to create and destroy entry_nr irqs in this function, or > is a multi-vector-msi now considered as just as single irq ? > > I ask because this appears to lack the "repeating certain operations for > all involved IRQs" described in the comment.No, there''s a single create_irq() in the function, and having a single destroy_irq() here matches this. The remaining ones (both!) are in map_domain_pirq(). Also, as a general remark, asking for changes in a series that was posted 2.5 months ago (and deferred just because of the 4.3 release process) seems a little strange to me. I had to repost merely to collect ack-s, and didn''t really expect further requests for adjustments as there was ample time before to do so. Jan
Andrew Cooper
2013-Jul-17 09:02 UTC
Re: [PATCH resend 3/3] pciif: add multi-vector-MSI command
On 16/07/13 12:35, Jan Beulich wrote:>>>> On 16.07.13 at 13:19, Andrew Cooper <andrew.cooper3@citrix.com> wrote: >> On 16/07/13 11:15, Jan Beulich wrote: >>> The requested vector count is to be passed in struct xen_pci_op''s info >>> field. Upon failure, if a smaller vector count might work, the backend >>> will pass that smaller count in the value field (which so far is always >>> being set to zero in the error path). >>> >>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>> >>> --- a/xen/include/public/io/pciif.h >>> +++ b/xen/include/public/io/pciif.h >>> @@ -46,6 +46,7 @@ >>> #define XEN_PCI_OP_aer_resume (7) >>> #define XEN_PCI_OP_aer_mmio (8) >>> #define XEN_PCI_OP_aer_slotreset (9) >>> +#define XEN_PCI_OP_enable_multi_msi (10) >> /* Be sure to bump this number if you change this file */ >> #define XEN_PCI_MAGIC "7" >> >> Should you bump this version, or is the comment stale? The only in-tree >> consumer I can find is MiniOS''s pcifront, which writes it into xenstore. > Whether it''s stale I don''t know (likely it is considering that you > found just a single consumer), but bumping a revision just > because of the (backwards compatible) addition seems > superfluous to me. This would be different if I changed the > existing enable_msi... > > Jan >I suspected that was the answer, but just wanted to check that it hadn''t been overlooked, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Andrew Cooper
2013-Jul-17 09:50 UTC
Re: [PATCH resend 1/3] VT-d: enable for multi-vector MSI
On 16/07/13 12:32, Jan Beulich wrote:>>>> On 16.07.13 at 13:15, Andrew Cooper <andrew.cooper3@citrix.com> wrote: >> On 16/07/13 11:13, Jan Beulich wrote: >>> The main change being to make alloc_remap_entry() capable of allocating >>> a block of entries. >>> >>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>> >>> --- a/xen/drivers/passthrough/vtd/intremap.c >>> +++ b/xen/drivers/passthrough/vtd/intremap.c >>> @@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm >>> } >>> >>> /* >>> - * Look for a free intr remap entry. >>> + * Look for a free intr remap entry (or a contiguous set thereof). >>> * Need hold iremap_lock, and setup returned entry before releasing lock. >>> */ >>> -static int alloc_remap_entry(struct iommu *iommu) >>> +static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr) >> alloc_remap_entries() now that it unconditionally takes a count (and you >> already have to patch all callsites) > Actually I checked with Linux, and the use singular in the function > name too (albeit the name isn''t identical). > >>> @@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry( >>> struct iremap_entry *iremap_entry = NULL, *iremap_entries; >>> struct iremap_entry new_ire; >>> struct msi_msg_remap_entry *remap_rte; >>> - int index; >>> + unsigned int index, i, nr = 1; >> Does this hardcoding of nr=1 defeat the purpose of the following logic? > In what way? > >>> unsigned long flags; >>> struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); >>> >>> - remap_rte = (struct msi_msg_remap_entry *) msg; >>> + if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) >>> + nr = msi_desc->msi.nvec; > The logic here makes the vector count 1 for MSI-X and msi.nvec > for MSI. > > Jan >Ah yes - I see now. Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
On 16/07/13 12:48, Jan Beulich wrote:>>>> On 16.07.13 at 13:36, Andrew Cooper <andrew.cooper3@citrix.com> wrote: >> On 16/07/13 11:14, Jan Beulich wrote: >>> +int get_free_pirqs(struct domain *d, unsigned int nr) >>> +{ >>> + unsigned int i, found = 0; >>> + >>> + ASSERT(spin_is_locked(&d->event_lock)); >>> + >>> + for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i ) >>> + if ( is_free_pirq(d, pirq_info(d, i)) ) >>> + { >>> + pirq_get_info(d, i); >>> + if ( ++found == nr ) >>> + return i; >>> + } >>> + else >>> + found = 0; >>> + >>> + return -ENOSPC; >>> +} >>> + >> Is there any reason why this loop is backwards? Unless I am mistaken, >> guests can choose their own pirqs when binding them, reducing the >> likelyhood that the top of the available space will be free. > This just follows the behavior of get_free_pirq(). I''m not up to > having the two behave differently. > >>> @@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int >>> done: >>> spin_unlock(&d->event_lock); >>> spin_unlock(&pcidevs_lock); >>> - if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) ) >>> - destroy_irq(irq); >>> + if ( ret != 0 ) >>> + switch ( type ) >>> + { >>> + case MAP_PIRQ_TYPE_MSI: >>> + if ( *index == -1 ) >>> + case MAP_PIRQ_TYPE_MULTI_MSI: >>> + destroy_irq(irq); >>> + break; >>> + } >> Do we not need to create and destroy entry_nr irqs in this function, or >> is a multi-vector-msi now considered as just as single irq ? >> >> I ask because this appears to lack the "repeating certain operations for >> all involved IRQs" described in the comment. > No, there''s a single create_irq() in the function, and having a single > destroy_irq() here matches this. The remaining ones (both!) are in > map_domain_pirq().Ok. Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>> > Also, as a general remark, asking for changes in a series that was > posted 2.5 months ago (and deferred just because of the 4.3 > release process) seems a little strange to me. I had to repost > merely to collect ack-s, and didn''t really expect further requests > for adjustments as there was ample time before to do so. > > Jan >2.5 months ago, I was very busy with XSAs, hotfixes and a release of XenServer, so I appologies for not reviewing back then. ~Andrew
Zhang, Xiantao
2013-Jul-18 10:48 UTC
Re: [PATCH resend 1/3] VT-d: enable for multi-vector MSI
Thanks, Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>> -----Original Message----- > From: Andrew Cooper [mailto:andrew.cooper3@citrix.com] > Sent: Wednesday, July 17, 2013 5:50 PM > To: Jan Beulich > Cc: Zhang, Xiantao; xen-devel; Keir Fraser > Subject: Re: [Xen-devel] [PATCH resend 1/3] VT-d: enable for multi-vector > MSI > > On 16/07/13 12:32, Jan Beulich wrote: > >>>> On 16.07.13 at 13:15, Andrew Cooper <andrew.cooper3@citrix.com> > wrote: > >> On 16/07/13 11:13, Jan Beulich wrote: > >>> The main change being to make alloc_remap_entry() capable of > allocating > >>> a block of entries. > >>> > >>> Signed-off-by: Jan Beulich <jbeulich@suse.com> > >>> > >>> --- a/xen/drivers/passthrough/vtd/intremap.c > >>> +++ b/xen/drivers/passthrough/vtd/intremap.c > >>> @@ -194,18 +194,18 @@ static void free_remap_entry(struct iomm > >>> } > >>> > >>> /* > >>> - * Look for a free intr remap entry. > >>> + * Look for a free intr remap entry (or a contiguous set thereof). > >>> * Need hold iremap_lock, and setup returned entry before releasing > lock. > >>> */ > >>> -static int alloc_remap_entry(struct iommu *iommu) > >>> +static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned > int nr) > >> alloc_remap_entries() now that it unconditionally takes a count (and you > >> already have to patch all callsites) > > Actually I checked with Linux, and the use singular in the function > > name too (albeit the name isn''t identical). > > > >>> @@ -555,31 +556,29 @@ static int msi_msg_to_remap_entry( > >>> struct iremap_entry *iremap_entry = NULL, *iremap_entries; > >>> struct iremap_entry new_ire; > >>> struct msi_msg_remap_entry *remap_rte; > >>> - int index; > >>> + unsigned int index, i, nr = 1; > >> Does this hardcoding of nr=1 defeat the purpose of the following logic? > > In what way? > > > >>> unsigned long flags; > >>> struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu); > >>> > >>> - remap_rte = (struct msi_msg_remap_entry *) msg; > >>> + if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) > >>> + nr = msi_desc->msi.nvec; > > The logic here makes the vector count 1 for MSI-X and msi.nvec > > for MSI. > > > > Jan > > > > Ah yes - I see now. > > Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Ping (public interface change)?>>> On 16.07.13 at 12:14, "Jan Beulich" <JBeulich@suse.com> wrote: > This implies > - extending the public interface to have a way to request a block of > MSIs > - allocating a block of contiguous pIRQ-s for the target domain (but > note that the Xen IRQs allocated have no need of being contiguous) > - repeating certain operations for all involved IRQs > - fixing multi_msi_enable() > - adjusting the mask bit accesses for maskable MSIs > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/arch/x86/irq.c > +++ b/xen/arch/x86/irq.c > @@ -1863,6 +1863,25 @@ int get_free_pirq(struct domain *d, int > return -ENOSPC; > } > > +int get_free_pirqs(struct domain *d, unsigned int nr) > +{ > + unsigned int i, found = 0; > + > + ASSERT(spin_is_locked(&d->event_lock)); > + > + for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i ) > + if ( is_free_pirq(d, pirq_info(d, i)) ) > + { > + pirq_get_info(d, i); > + if ( ++found == nr ) > + return i; > + } > + else > + found = 0; > + > + return -ENOSPC; > +} > + > int map_domain_pirq( > struct domain *d, int pirq, int irq, int type, void *data) > { > @@ -1918,11 +1937,12 @@ int map_domain_pirq( > > desc = irq_to_desc(irq); > > - if ( type == MAP_PIRQ_TYPE_MSI ) > + if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI ) > { > struct msi_info *msi = (struct msi_info *)data; > struct msi_desc *msi_desc; > struct pci_dev *pdev; > + unsigned int nr = 0; > > ASSERT(spin_is_locked(&pcidevs_lock)); > > @@ -1933,7 +1953,14 @@ int map_domain_pirq( > pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn); > ret = pci_enable_msi(msi, &msi_desc); > if ( ret ) > + { > + if ( ret > 0 ) > + { > + msi->entry_nr = ret; > + ret = -ENFILE; > + } > goto done; > + } > > spin_lock_irqsave(&desc->lock, flags); > > @@ -1947,25 +1974,73 @@ int map_domain_pirq( > goto done; > } > > - ret = setup_msi_irq(desc, msi_desc); > - if ( ret ) > + while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) ) > { > + if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV && > + !desc->arch.used_vectors ) > + { > + desc->arch.used_vectors = &pdev->arch.used_vectors; > + if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED ) > + { > + int vector = desc->arch.vector; > + > + ASSERT(!test_bit(vector, desc->arch.used_vectors)); > + set_bit(vector, desc->arch.used_vectors); > + } > + } > + if ( type == MAP_PIRQ_TYPE_MSI || > + msi_desc->msi_attrib.type != PCI_CAP_ID_MSI || > + ++nr == msi->entry_nr ) > + break; > + > + set_domain_irq_pirq(d, irq, info); > spin_unlock_irqrestore(&desc->lock, flags); > - pci_disable_msi(msi_desc); > - goto done; > + > + info = NULL; > + irq = create_irq(NUMA_NO_NODE); > + ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, > &info) > + : irq; > + if ( ret ) > + break; > + msi_desc[nr].irq = irq; > + > + if ( irq_permit_access(d, irq) != 0 ) > + printk(XENLOG_G_WARNING > + "dom%d: could not permit access to IRQ%d (pirq > %d)\n", > + d->domain_id, irq, pirq); > + > + desc = irq_to_desc(irq); > + spin_lock_irqsave(&desc->lock, flags); > + > + if ( desc->handler != &no_irq_type ) > + { > + dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use > (%s)\n", > + d->domain_id, irq, pirq + nr, desc->handler->typename); > + ret = -EBUSY; > + break; > + } > } > > - if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV > - && !desc->arch.used_vectors ) > + if ( ret ) > { > - desc->arch.used_vectors = &pdev->arch.used_vectors; > - if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED ) > + spin_unlock_irqrestore(&desc->lock, flags); > + while ( nr-- ) > { > - int vector = desc->arch.vector; > - ASSERT(!test_bit(vector, desc->arch.used_vectors)); > - > - set_bit(vector, desc->arch.used_vectors); > + if ( irq >= 0 ) > + { > + if ( irq_deny_access(d, irq) ) > + printk(XENLOG_G_ERR > + "dom%d: could not revoke access to IRQ%d > (pirq %d)\n", > + d->domain_id, irq, pirq); > + destroy_irq(irq); > + } > + if ( info ) > + cleanup_domain_irq_pirq(d, irq, info); > + info = pirq_info(d, pirq + nr); > + irq = info->arch.irq; > } > + pci_disable_msi(msi_desc); > + goto done; > } > > set_domain_irq_pirq(d, irq, info); > @@ -1996,7 +2071,8 @@ int unmap_domain_pirq(struct domain *d, > { > unsigned long flags; > struct irq_desc *desc; > - int irq, ret = 0; > + int irq, ret = 0, rc; > + unsigned int i, nr = 1; > bool_t forced_unbind; > struct pirq *info; > struct msi_desc *msi_desc = NULL; > @@ -2018,6 +2094,18 @@ int unmap_domain_pirq(struct domain *d, > > desc = irq_to_desc(irq); > msi_desc = desc->msi_desc; > + if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) > + { > + if ( msi_desc->msi_attrib.entry_nr ) > + { > + printk(XENLOG_G_ERR > + "dom%d: trying to unmap secondary MSI pirq %d\n", > + d->domain_id, pirq); > + ret = -EBUSY; > + goto done; > + } > + nr = msi_desc->msi.nvec; > + } > > ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc); > if ( ret ) > @@ -2033,37 +2121,83 @@ int unmap_domain_pirq(struct domain *d, > > spin_lock_irqsave(&desc->lock, flags); > > - BUG_ON(irq != domain_pirq_to_irq(d, pirq)); > - > - if ( !forced_unbind ) > - clear_domain_irq_pirq(d, irq, info); > - else > + for ( i = 0; ; ) > { > - info->arch.irq = -irq; > - radix_tree_replace_slot( > - radix_tree_lookup_slot(&d->arch.irq_pirq, irq), > - radix_tree_int_to_ptr(-pirq)); > + BUG_ON(irq != domain_pirq_to_irq(d, pirq + i)); > + > + if ( !forced_unbind ) > + clear_domain_irq_pirq(d, irq, info); > + else > + { > + info->arch.irq = -irq; > + radix_tree_replace_slot( > + radix_tree_lookup_slot(&d->arch.irq_pirq, irq), > + radix_tree_int_to_ptr(-pirq)); > + } > + > + if ( msi_desc ) > + { > + desc->handler = &no_irq_type; > + desc->msi_desc = NULL; > + } > + > + if ( ++i == nr ) > + break; > + > + spin_unlock_irqrestore(&desc->lock, flags); > + > + if ( !forced_unbind ) > + cleanup_domain_irq_pirq(d, irq, info); > + > + rc = irq_deny_access(d, irq); > + if ( rc ) > + { > + printk(XENLOG_G_ERR > + "dom%d: could not deny access to IRQ%d (pirq %d)\n", > + d->domain_id, irq, pirq + i); > + ret = rc; > + } > + > + do { > + info = pirq_info(d, pirq + i); > + if ( info && (irq = info->arch.irq) > 0 ) > + break; > + printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n", > + d->domain_id, pirq + i); > + } while ( ++i < nr ); > + > + if ( i == nr ) > + { > + desc = NULL; > + break; > + } > + > + desc = irq_to_desc(irq); > + BUG_ON(desc->msi_desc != msi_desc + i); > + > + spin_lock_irqsave(&desc->lock, flags); > } > > - if ( msi_desc ) > + if ( desc ) > { > - desc->handler = &no_irq_type; > - desc->msi_desc = NULL; > + spin_unlock_irqrestore(&desc->lock, flags); > + > + if ( !forced_unbind ) > + cleanup_domain_irq_pirq(d, irq, info); > + > + rc = irq_deny_access(d, irq); > + if ( rc ) > + { > + printk(XENLOG_G_ERR > + "dom%d: could not deny access to IRQ%d (pirq %d)\n", > + d->domain_id, irq, pirq + nr - 1); > + ret = rc; > + } > } > > - spin_unlock_irqrestore(&desc->lock, flags); > if (msi_desc) > msi_free_irq(msi_desc); > > - if ( !forced_unbind ) > - cleanup_domain_irq_pirq(d, irq, info); > - > - ret = irq_deny_access(d, irq); > - if ( ret ) > - printk(XENLOG_G_ERR > - "dom%d: could not deny access to IRQ%d (pirq %d)\n", > - d->domain_id, irq, pirq); > - > done: > return ret; > } > --- a/xen/arch/x86/msi.c > +++ b/xen/arch/x86/msi.c > @@ -236,6 +236,11 @@ static int write_msi_msg(struct msi_desc > u8 bus = dev->bus; > u8 slot = PCI_SLOT(dev->devfn); > u8 func = PCI_FUNC(dev->devfn); > + int nr = entry->msi_attrib.entry_nr; > + > + ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr); > + if ( nr ) > + return 0; > > pci_conf_write32(seg, bus, slot, func, msi_lower_address_reg(pos), > msg->address_lo); > @@ -359,8 +364,8 @@ static void msi_set_mask_bit(struct irq_ > u8 func = PCI_FUNC(entry->dev->devfn); > > mask_bits = pci_conf_read32(seg, bus, slot, func, > entry->msi.mpos); > - mask_bits &= ~(1); > - mask_bits |= flag; > + mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr); > + mask_bits |= (u32)flag << entry->msi_attrib.entry_nr; > pci_conf_write32(seg, bus, slot, func, entry->msi.mpos, > mask_bits); > } > break; > @@ -384,10 +389,11 @@ static int msi_get_mask_bit(const struct > case PCI_CAP_ID_MSI: > if (!entry->dev || !entry->msi_attrib.maskbit) > break; > - return pci_conf_read32(entry->dev->seg, entry->dev->bus, > - PCI_SLOT(entry->dev->devfn), > - PCI_FUNC(entry->dev->devfn), > - entry->msi.mpos) & 1; > + return (pci_conf_read32(entry->dev->seg, entry->dev->bus, > + PCI_SLOT(entry->dev->devfn), > + PCI_FUNC(entry->dev->devfn), > + entry->msi.mpos) >> > + entry->msi_attrib.entry_nr) & 1; > case PCI_CAP_ID_MSIX: > return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & > 1; > } > @@ -453,17 +459,20 @@ static hw_irq_controller pci_msi_nonmask > .set_affinity = set_msi_affinity > }; > > -static struct msi_desc* alloc_msi_entry(void) > +static struct msi_desc *alloc_msi_entry(unsigned int nr) > { > struct msi_desc *entry; > > - entry = xmalloc(struct msi_desc); > + entry = xmalloc_array(struct msi_desc, nr); > if ( !entry ) > return NULL; > > INIT_LIST_HEAD(&entry->list); > - entry->dev = NULL; > - entry->remap_index = -1; > + while ( nr-- ) > + { > + entry[nr].dev = NULL; > + entry[nr].remap_index = -1; > + } > > return entry; > } > @@ -488,17 +497,24 @@ int __setup_msi_irq(struct irq_desc *des > > int msi_free_irq(struct msi_desc *entry) > { > - destroy_irq(entry->irq); > + unsigned int nr = entry->msi.nvec; > + > if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) > { > unsigned long start; > start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1); > msix_put_fixmap(entry->dev, virt_to_fix(start)); > + nr = 1; > } > > - /* Free the unused IRTE if intr remap enabled */ > - if ( iommu_intremap ) > - iommu_update_ire_from_msi(entry, NULL); > + while ( nr-- ) > + { > + destroy_irq(entry[nr].irq); > + > + /* Free the unused IRTE if intr remap enabled */ > + if ( iommu_intremap ) > + iommu_update_ire_from_msi(entry + nr, NULL); > + } > > list_del(&entry->list); > xfree(entry); > @@ -531,11 +547,12 @@ static struct msi_desc *find_msi_entry(s > **/ > static int msi_capability_init(struct pci_dev *dev, > int irq, > - struct msi_desc **desc) > + struct msi_desc **desc, > + unsigned int nvec) > { > struct msi_desc *entry; > int pos; > - unsigned int maxvec, mpos; > + unsigned int i, maxvec, mpos; > u16 control, seg = dev->seg; > u8 bus = dev->bus; > u8 slot = PCI_SLOT(dev->devfn); > @@ -545,27 +562,34 @@ static int msi_capability_init(struct pc > pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI); > control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos)); > maxvec = multi_msi_capable(control); > + if ( nvec > maxvec ) > + return maxvec; > control &= ~PCI_MSI_FLAGS_QSIZE; > + multi_msi_enable(control, nvec); > > /* MSI Entry Initialization */ > msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */ > > - entry = alloc_msi_entry(); > + entry = alloc_msi_entry(nvec); > if ( !entry ) > return -ENOMEM; > > - entry->msi_attrib.type = PCI_CAP_ID_MSI; > - entry->msi_attrib.is_64 = is_64bit_address(control); > - entry->msi_attrib.entry_nr = 0; > - entry->msi_attrib.maskbit = is_mask_bit_support(control); > - entry->msi_attrib.masked = 1; > - entry->msi_attrib.pos = pos; > mpos = msi_mask_bits_reg(pos, is_64bit_address(control)); > - entry->msi.nvec = 1; > + for ( i = 0; i < nvec; ++i ) > + { > + entry[i].msi_attrib.type = PCI_CAP_ID_MSI; > + entry[i].msi_attrib.is_64 = is_64bit_address(control); > + entry[i].msi_attrib.entry_nr = i; > + entry[i].msi_attrib.maskbit = is_mask_bit_support(control); > + entry[i].msi_attrib.masked = 1; > + entry[i].msi_attrib.pos = pos; > + if ( entry[i].msi_attrib.maskbit ) > + entry[i].msi.mpos = mpos; > + entry[i].msi.nvec = 0; > + entry[i].dev = dev; > + } > + entry->msi.nvec = nvec; > entry->irq = irq; > - if ( is_mask_bit_support(control) ) > - entry->msi.mpos = mpos; > - entry->dev = dev; > if ( entry->msi_attrib.maskbit ) > { > u32 maskbits; > @@ -693,7 +717,7 @@ static int msix_capability_init(struct p > > if ( desc ) > { > - entry = alloc_msi_entry(); > + entry = alloc_msi_entry(1); > if ( !entry ) > return -ENOMEM; > ASSERT(msi); > @@ -851,7 +875,6 @@ static int msix_capability_init(struct p > > static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc) > { > - int status; > struct pci_dev *pdev; > struct msi_desc *old_desc; > > @@ -880,8 +903,7 @@ static int __pci_enable_msi(struct msi_i > pci_disable_msi(old_desc); > } > > - status = msi_capability_init(pdev, msi->irq, desc); > - return status; > + return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr); > } > > static void __pci_disable_msi(struct msi_desc *entry) > @@ -1101,6 +1123,8 @@ int pci_restore_msi_state(struct pci_dev > > list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list ) > { > + unsigned int i = 0, nr = 1; > + > irq = entry->irq; > desc = &irq_desc[irq]; > > @@ -1110,30 +1134,58 @@ int pci_restore_msi_state(struct pci_dev > > if (desc->msi_desc != entry) > { > + bogus: > dprintk(XENLOG_ERR, > - "Restore MSI for dev %04x:%02x:%02x:%x not set > before?\n", > + "Restore MSI for %04x:%02x:%02x:%u entry %u not > set?\n", > pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), > - PCI_FUNC(pdev->devfn)); > + PCI_FUNC(pdev->devfn), i); > spin_unlock_irqrestore(&desc->lock, flags); > return -EINVAL; > } > > if ( entry->msi_attrib.type == PCI_CAP_ID_MSI ) > + { > msi_set_enable(pdev, 0); > + nr = entry->msi.nvec; > + } > else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) > msix_set_enable(pdev, 0); > > msg = entry->msg; > write_msi_msg(entry, &msg); > > - msi_set_mask_bit(desc, entry->msi_attrib.masked); > + for ( i = 0; ; ) > + { > + msi_set_mask_bit(desc, entry[i].msi_attrib.masked); > + spin_unlock_irqrestore(&desc->lock, flags); > + > + if ( !--nr ) > + break; > + > + desc = &irq_desc[entry[++i].irq]; > + spin_lock_irqsave(&desc->lock, flags); > + if ( desc->msi_desc != entry + i ) > + goto bogus; > + } > + > + spin_unlock_irqrestore(&desc->lock, flags); > > if ( entry->msi_attrib.type == PCI_CAP_ID_MSI ) > + { > + unsigned int cpos = msi_control_reg(entry->msi_attrib.pos); > + u16 control = pci_conf_read16(pdev->seg, pdev->bus, > + PCI_SLOT(pdev->devfn), > + PCI_FUNC(pdev->devfn), cpos); > + > + control &= ~PCI_MSI_FLAGS_QSIZE; > + multi_msi_enable(control, entry->msi.nvec); > + pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), > + PCI_FUNC(pdev->devfn), cpos, control); > + > msi_set_enable(pdev, 1); > + } > else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) > msix_set_enable(pdev, 1); > - > - spin_unlock_irqrestore(&desc->lock, flags); > } > > return 0; > --- a/xen/arch/x86/physdev.c > +++ b/xen/arch/x86/physdev.c > @@ -140,8 +140,11 @@ int physdev_map_pirq(domid_t domid, int > break; > > case MAP_PIRQ_TYPE_MSI: > + if ( !msi->table_base ) > + msi->entry_nr = 1; > irq = *index; > if ( irq == -1 ) > + case MAP_PIRQ_TYPE_MULTI_MSI: > irq = create_irq(NUMA_NO_NODE); > > if ( irq < nr_irqs_gsi || irq >= nr_irqs ) > @@ -179,6 +182,30 @@ int physdev_map_pirq(domid_t domid, int > goto done; > } > } > + else if ( type == MAP_PIRQ_TYPE_MULTI_MSI ) > + { > + if ( msi->entry_nr <= 0 || msi->entry_nr > 32 ) > + ret = -EDOM; > + else if ( msi->entry_nr != 1 && !iommu_intremap ) > + ret = -EOPNOTSUPP; > + else > + { > + while ( msi->entry_nr & (msi->entry_nr - 1) ) > + msi->entry_nr += msi->entry_nr & -msi->entry_nr; > + pirq = get_free_pirqs(d, msi->entry_nr); > + if ( pirq < 0 ) > + { > + while ( (msi->entry_nr >>= 1) > 1 ) > + if ( get_free_pirqs(d, msi->entry_nr) > 0 ) > + break; > + dprintk(XENLOG_G_ERR, "dom%d: no block of %d free > pirqs\n", > + d->domain_id, msi->entry_nr << 1); > + ret = pirq; > + } > + } > + if ( ret < 0 ) > + goto done; > + } > else > { > pirq = get_free_pirq(d, type); > @@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int > done: > spin_unlock(&d->event_lock); > spin_unlock(&pcidevs_lock); > - if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) ) > - destroy_irq(irq); > + if ( ret != 0 ) > + switch ( type ) > + { > + case MAP_PIRQ_TYPE_MSI: > + if ( *index == -1 ) > + case MAP_PIRQ_TYPE_MULTI_MSI: > + destroy_irq(irq); > + break; > + } > free_domain: > rcu_unlock_domain(d); > return ret; > @@ -390,14 +424,22 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H > if ( copy_from_guest(&map, arg, 1) != 0 ) > break; > > - if ( map.type == MAP_PIRQ_TYPE_MSI_SEG ) > + switch ( map.type ) > { > + case MAP_PIRQ_TYPE_MSI_SEG: > map.type = MAP_PIRQ_TYPE_MSI; > msi.seg = map.bus >> 16; > - } > - else > - { > + break; > + > + case MAP_PIRQ_TYPE_MULTI_MSI: > + if ( map.table_base ) > + return -EINVAL; > + msi.seg = map.bus >> 16; > + break; > + > + default: > msi.seg = 0; > + break; > } > msi.bus = map.bus; > msi.devfn = map.devfn; > @@ -406,6 +448,8 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H > ret = physdev_map_pirq(map.domid, map.type, &map.index, &map.pirq, > &msi); > > + if ( map.type == MAP_PIRQ_TYPE_MULTI_MSI ) > + map.entry_nr = msi.entry_nr; > if ( __copy_to_guest(arg, &map, 1) ) > ret = -EFAULT; > break; > --- a/xen/include/asm-x86/irq.h > +++ b/xen/include/asm-x86/irq.h > @@ -141,6 +141,7 @@ int map_domain_pirq(struct domain *d, in > void *data); > int unmap_domain_pirq(struct domain *d, int pirq); > int get_free_pirq(struct domain *d, int type); > +int get_free_pirqs(struct domain *, unsigned int nr); > void free_domain_pirqs(struct domain *d); > int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq); > int unmap_domain_pirq_emuirq(struct domain *d, int pirq); > --- a/xen/include/asm-x86/msi.h > +++ b/xen/include/asm-x86/msi.h > @@ -148,7 +148,7 @@ int msi_free_irq(struct msi_desc *entry) > #define multi_msi_capable(control) \ > (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1)) > #define multi_msi_enable(control, num) \ > - control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE); > + control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE); > #define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) > #define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) > #define msi_enable(control, num) multi_msi_enable(control, num); \ > --- a/xen/include/public/physdev.h > +++ b/xen/include/public/physdev.h > @@ -151,21 +151,22 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); > #define MAP_PIRQ_TYPE_GSI 0x1 > #define MAP_PIRQ_TYPE_UNKNOWN 0x2 > #define MAP_PIRQ_TYPE_MSI_SEG 0x3 > +#define MAP_PIRQ_TYPE_MULTI_MSI 0x4 > > #define PHYSDEVOP_map_pirq 13 > struct physdev_map_pirq { > domid_t domid; > /* IN */ > int type; > - /* IN */ > + /* IN (ignored for ..._MULTI_MSI) */ > int index; > /* IN or OUT */ > int pirq; > - /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */ > + /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */ > int bus; > /* IN */ > int devfn; > - /* IN */ > + /* IN (also OUT for ..._MULTI_MSI) */ > int entry_nr; > /* IN */ > uint64_t table_base;
Jan Beulich
2013-Aug-05 13:12 UTC
Ping: [PATCH resend 3/3] pciif: add multi-vector-MSI command
Ping (I/O interface change, however trivial it might be)?>>> On 16.07.13 at 12:15, "Jan Beulich" <JBeulich@suse.com> wrote: > The requested vector count is to be passed in struct xen_pci_op''s info > field. Upon failure, if a smaller vector count might work, the backend > will pass that smaller count in the value field (which so far is always > being set to zero in the error path). > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/include/public/io/pciif.h > +++ b/xen/include/public/io/pciif.h > @@ -46,6 +46,7 @@ > #define XEN_PCI_OP_aer_resume (7) > #define XEN_PCI_OP_aer_mmio (8) > #define XEN_PCI_OP_aer_slotreset (9) > +#define XEN_PCI_OP_enable_multi_msi (10) > > /* xen_pci_op error numbers */ > #define XEN_PCI_ERR_success (0)
On 16/07/2013 11:14, "Jan Beulich" <JBeulich@suse.com> wrote:> This implies > - extending the public interface to have a way to request a block of > MSIs > - allocating a block of contiguous pIRQ-s for the target domain (but > note that the Xen IRQs allocated have no need of being contiguous) > - repeating certain operations for all involved IRQs > - fixing multi_msi_enable() > - adjusting the mask bit accesses for maskable MSIs > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Acked-by: Keir Fraser <keir@xen.org>
Keir Fraser
2013-Aug-08 09:02 UTC
Re: [PATCH resend 3/3] pciif: add multi-vector-MSI command
On 16/07/2013 11:15, "Jan Beulich" <JBeulich@suse.com> wrote:> The requested vector count is to be passed in struct xen_pci_op''s info > field. Upon failure, if a smaller vector count might work, the backend > will pass that smaller count in the value field (which so far is always > being set to zero in the error path). > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Acked-by: Keir Fraser <keir@xen.org>