Jan Beulich
2013-Apr-12 10:18 UTC
[PATCH v3 0/3] x86/IOMMU: multi-vector MSI prerequisites
1: IOMMU: allow MSI message to IRTE propagation to fail 2: AMD IOMMU: allocate IRTE entries instead of using a static mapping 3: AMD IOMMU: untie remap and vector maps See the individual patches for what, if anything, has changed from v2. Signed-off-by: Jan Beulich <jbeulich@suse.com>
Jan Beulich
2013-Apr-12 10:22 UTC
[PATCH v3 1/3] IOMMU: allow MSI message to IRTE propagation to fail
With the need to allocate multiple contiguous IRTEs for multi-vector MSI, the chance of failure here increases. While on the AMD side there''s no allocation of IRTEs at present at all (and hence no way for this allocation to fail, which is going to change with a later patch in this series), VT-d already ignores an eventual error here, which this patch fixes. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v3: Introduce _find_iommu_for_device() to take care of filtering out the case where an MSI is being set up for the IOMMU itself. --- a/xen/arch/x86/hpet.c +++ b/xen/arch/x86/hpet.c @@ -254,13 +254,22 @@ static void hpet_msi_mask(struct irq_des ch->msi.msi_attrib.masked = 1; } -static void hpet_msi_write(struct hpet_event_channel *ch, struct msi_msg *msg) +static int hpet_msi_write(struct hpet_event_channel *ch, struct msi_msg *msg) { ch->msi.msg = *msg; + if ( iommu_intremap ) - iommu_update_ire_from_msi(&ch->msi, msg); + { + int rc = iommu_update_ire_from_msi(&ch->msi, msg); + + if ( rc ) + return rc; + } + hpet_write32(msg->data, HPET_Tn_ROUTE(ch->idx)); hpet_write32(msg->address_lo, HPET_Tn_ROUTE(ch->idx) + 4); + + return 0; } static void __maybe_unused @@ -318,12 +327,12 @@ static hw_irq_controller hpet_msi_type .set_affinity = hpet_msi_set_affinity, }; -static void __hpet_setup_msi_irq(struct irq_desc *desc) +static int __hpet_setup_msi_irq(struct irq_desc *desc) { struct msi_msg msg; msi_compose_msg(desc, &msg); - hpet_msi_write(desc->action->dev_id, &msg); + return hpet_msi_write(desc->action->dev_id, &msg); } static int __init hpet_setup_msi_irq(struct hpet_event_channel *ch) @@ -347,6 +356,8 @@ static int __init hpet_setup_msi_irq(str desc->handler = &hpet_msi_type; ret = request_irq(ch->msi.irq, hpet_interrupt_handler, 0, "HPET", ch); + if ( ret >= 0 ) + ret = __hpet_setup_msi_irq(desc); if ( ret < 0 ) { if ( iommu_intremap ) @@ -354,7 +365,6 @@ static int __init hpet_setup_msi_irq(str return ret; } - __hpet_setup_msi_irq(desc); desc->msi_desc = &ch->msi; return 0; --- a/xen/arch/x86/irq.c +++ b/xen/arch/x86/irq.c @@ -1938,7 +1938,14 @@ int map_domain_pirq( if ( desc->handler != &no_irq_type ) dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n", d->domain_id, irq); - setup_msi_handler(desc, msi_desc); + + ret = setup_msi_irq(desc, msi_desc); + if ( ret ) + { + spin_unlock_irqrestore(&desc->lock, flags); + pci_disable_msi(msi_desc); + goto done; + } if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV && !desc->arch.used_vectors ) @@ -1954,7 +1961,6 @@ int map_domain_pirq( } set_domain_irq_pirq(d, irq, info); - setup_msi_irq(desc); spin_unlock_irqrestore(&desc->lock, flags); } else --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -214,14 +214,18 @@ static void read_msi_msg(struct msi_desc iommu_read_msi_from_ire(entry, msg); } -static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) +static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) { entry->msg = *msg; if ( iommu_intremap ) { + int rc; + ASSERT(msg != &entry->msg); - iommu_update_ire_from_msi(entry, msg); + rc = iommu_update_ire_from_msi(entry, msg); + if ( rc ) + return rc; } switch ( entry->msi_attrib.type ) @@ -264,6 +268,8 @@ static void write_msi_msg(struct msi_des default: BUG(); } + + return 0; } void set_msi_affinity(struct irq_desc *desc, const cpumask_t *mask) @@ -464,19 +470,15 @@ static struct msi_desc* alloc_msi_entry( return entry; } -void setup_msi_handler(struct irq_desc *desc, struct msi_desc *msidesc) +int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc) { + struct msi_msg msg; + desc->msi_desc = msidesc; desc->handler = msi_maskable_irq(msidesc) ? &pci_msi_maskable : &pci_msi_nonmaskable; -} - -void setup_msi_irq(struct irq_desc *desc) -{ - struct msi_msg msg; - msi_compose_msg(desc, &msg); - write_msi_msg(desc->msi_desc, &msg); + return write_msi_msg(msidesc, &msg); } int msi_free_irq(struct msi_desc *entry) --- a/xen/drivers/passthrough/amd/iommu_intr.c +++ b/xen/drivers/passthrough/amd/iommu_intr.c @@ -17,6 +17,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <xen/err.h> #include <xen/sched.h> #include <xen/hvm/iommu.h> #include <asm/amd-iommu.h> @@ -359,25 +360,35 @@ done: } } -void amd_iommu_msi_msg_update_ire( +static struct amd_iommu *_find_iommu_for_device(int seg, int bdf) +{ + struct amd_iommu *iommu = find_iommu_for_device(seg, bdf); + + if ( iommu ) + return iommu; + + list_for_each_entry ( iommu, &amd_iommu_head, list ) + if ( iommu->seg == seg && iommu->bdf == bdf ) + return NULL; + + AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %04x:%02x:%02x.%u\n", + seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf)); + return ERR_PTR(-EINVAL); +} + +int amd_iommu_msi_msg_update_ire( struct msi_desc *msi_desc, struct msi_msg *msg) { struct pci_dev *pdev = msi_desc->dev; int bdf, seg; struct amd_iommu *iommu; - if ( !iommu_intremap ) - return; - bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf; seg = pdev ? pdev->seg : hpet_sbdf.seg; - iommu = find_iommu_for_device(seg, bdf); - if ( !iommu ) - { - AMD_IOMMU_DEBUG("Fail to find iommu for MSI device id = %#x\n", bdf); - return; - } + iommu = _find_iommu_for_device(seg, bdf); + if ( IS_ERR_OR_NULL(iommu) ) + return PTR_ERR(iommu); if ( msi_desc->remap_index >= 0 ) { @@ -395,7 +406,7 @@ void amd_iommu_msi_msg_update_ire( } if ( !msg ) - return; + return 0; do { update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index, @@ -404,6 +415,8 @@ void amd_iommu_msi_msg_update_ire( break; bdf += pdev->phantom_stride; } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) ); + + return 0; } void amd_iommu_read_msi_from_ire( --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -548,18 +548,20 @@ void iommu_update_ire_from_apic( const struct iommu_ops *ops = iommu_get_ops(); ops->update_ire_from_apic(apic, reg, value); } -void iommu_update_ire_from_msi( + +int iommu_update_ire_from_msi( struct msi_desc *msi_desc, struct msi_msg *msg) { const struct iommu_ops *ops = iommu_get_ops(); - ops->update_ire_from_msi(msi_desc, msg); + return iommu_intremap ? ops->update_ire_from_msi(msi_desc, msg) : 0; } void iommu_read_msi_from_ire( struct msi_desc *msi_desc, struct msi_msg *msg) { const struct iommu_ops *ops = iommu_get_ops(); - ops->read_msi_from_ire(msi_desc, msg); + if ( iommu_intremap ) + ops->read_msi_from_ire(msi_desc, msg); } unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg) --- a/xen/drivers/passthrough/vtd/extern.h +++ b/xen/drivers/passthrough/vtd/extern.h @@ -90,7 +90,7 @@ void io_apic_write_remap_rte(unsigned in struct msi_desc; struct msi_msg; void msi_msg_read_remap_rte(struct msi_desc *, struct msi_msg *); -void msi_msg_write_remap_rte(struct msi_desc *, struct msi_msg *); +int msi_msg_write_remap_rte(struct msi_desc *, struct msi_msg *); int intel_setup_hpet_msi(struct msi_desc *); --- a/xen/drivers/passthrough/vtd/intremap.c +++ b/xen/drivers/passthrough/vtd/intremap.c @@ -653,7 +653,7 @@ void msi_msg_read_remap_rte( remap_entry_to_msi_msg(drhd->iommu, msg); } -void msi_msg_write_remap_rte( +int msi_msg_write_remap_rte( struct msi_desc *msi_desc, struct msi_msg *msg) { struct pci_dev *pdev = msi_desc->dev; @@ -661,8 +661,8 @@ void msi_msg_write_remap_rte( drhd = pdev ? acpi_find_matched_drhd_unit(pdev) : hpet_to_drhd(msi_desc->hpet_id); - if ( drhd ) - msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg); + return drhd ? msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg) + : -EINVAL; } int __init intel_setup_hpet_msi(struct msi_desc *msi_desc) --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h @@ -93,7 +93,7 @@ void *amd_iommu_alloc_intremap_table(voi int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *); void amd_iommu_ioapic_update_ire( unsigned int apic, unsigned int reg, unsigned int value); -void amd_iommu_msi_msg_update_ire( +int amd_iommu_msi_msg_update_ire( struct msi_desc *msi_desc, struct msi_msg *msg); void amd_iommu_read_msi_from_ire( struct msi_desc *msi_desc, struct msi_msg *msg); --- a/xen/include/asm-x86/msi.h +++ b/xen/include/asm-x86/msi.h @@ -78,8 +78,7 @@ extern int pci_enable_msi(struct msi_inf extern void pci_disable_msi(struct msi_desc *desc); extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off); extern void pci_cleanup_msi(struct pci_dev *pdev); -extern void setup_msi_handler(struct irq_desc *, struct msi_desc *); -extern void setup_msi_irq(struct irq_desc *); +extern int setup_msi_irq(struct irq_desc *, struct msi_desc *); extern void teardown_msi_irq(int irq); extern int msi_free_vector(struct msi_desc *entry); extern int pci_restore_msi_state(struct pci_dev *pdev); --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -106,7 +106,7 @@ struct iommu_ops { u8 devfn, struct pci_dev *); int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn); void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned int value); - void (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg); + int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg); void (*read_msi_from_ire)(struct msi_desc *msi_desc, struct msi_msg *msg); unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg); int (*setup_hpet_msi)(struct msi_desc *); @@ -120,7 +120,7 @@ struct iommu_ops { }; void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value); -void iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg); +int iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg); void iommu_read_msi_from_ire(struct msi_desc *msi_desc, struct msi_msg *msg); unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg); int iommu_setup_hpet_msi(struct msi_desc *); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Apr-12 10:23 UTC
[PATCH v3 2/3] AMD IOMMU: allocate IRTE entries instead of using a static mapping
For multi-vector MSI, where we surely don''t want to allocate contiguous vectors and be able to set affinities of the individual vectors separately, we need to drop the use of the tuple of vector and delivery mode to determine the IRTE to use, and instead allocate IRTEs (which imo should have been done from the beginning). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v3: Adjust _find_iommu_for_device() to check for the IOMMU itself being the subject _before_ looking up the matching IOMMU (since now that we alter the MSI message, we need to specifically care about skipping the modification when no remapping is to occur), and use the function also in the MSI message read path. This assumes that regardless of whether there is an IVRS mapping for the corresponding PCI device, no remapping occurs for such MSIs. If that isn''t correct, the "return NULL" in the function would need to be changed to "return iommu". --- One thing I surely need confirmation on is whether this BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table ! get_ivrs_mappings(iommu->seg)[alias_id].intremap_table); in update_intremap_entry_from_msi_msg() is valid. If it isn''t, it''s not clear to me how to properly set up things for affected devices, as we would need an identical index allocated for two different remap table instances (which can hardly be expected to work out well). --- a/xen/drivers/passthrough/amd/iommu_acpi.c +++ b/xen/drivers/passthrough/amd/iommu_acpi.c @@ -72,12 +72,15 @@ static void __init add_ivrs_mapping_entr /* allocate per-device interrupt remapping table */ if ( amd_iommu_perdev_intremap ) ivrs_mappings[alias_id].intremap_table - amd_iommu_alloc_intremap_table(); + amd_iommu_alloc_intremap_table( + &ivrs_mappings[alias_id].intremap_inuse); else { if ( shared_intremap_table == NULL ) - shared_intremap_table = amd_iommu_alloc_intremap_table(); + shared_intremap_table = amd_iommu_alloc_intremap_table( + &shared_intremap_inuse); ivrs_mappings[alias_id].intremap_table = shared_intremap_table; + ivrs_mappings[alias_id].intremap_inuse = shared_intremap_inuse; } } /* assgin iommu hardware */ @@ -671,7 +674,7 @@ static u16 __init parse_ivhd_device_spec if ( IO_APIC_ID(apic) != special->handle ) continue; - if ( ioapic_sbdf[special->handle].pin_setup ) + if ( ioapic_sbdf[special->handle].pin_2_idx ) { if ( ioapic_sbdf[special->handle].bdf == bdf && ioapic_sbdf[special->handle].seg == seg ) @@ -691,14 +694,16 @@ static u16 __init parse_ivhd_device_spec ioapic_sbdf[special->handle].bdf = bdf; ioapic_sbdf[special->handle].seg = seg; - ioapic_sbdf[special->handle].pin_setup = xzalloc_array( - unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic])); + ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array( + u16, nr_ioapic_entries[apic]); if ( nr_ioapic_entries[apic] && - !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup ) + !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx ) { printk(XENLOG_ERR "IVHD Error: Out of memory\n"); return 0; } + memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1, + nr_ioapic_entries[apic]); } break; } @@ -926,7 +931,7 @@ static int __init parse_ivrs_table(struc for ( apic = 0; !error && iommu_intremap && apic < nr_ioapics; ++apic ) { if ( !nr_ioapic_entries[apic] || - ioapic_sbdf[IO_APIC_ID(apic)].pin_setup ) + ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx ) continue; printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n", @@ -935,13 +940,15 @@ static int __init parse_ivrs_table(struc error = -ENXIO; else { - ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array( - unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic])); - if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup ) + ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx = xmalloc_array( + u16, nr_ioapic_entries[apic]); + if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx ) { printk(XENLOG_ERR "IVHD Error: Out of memory\n"); error = -ENOMEM; } + memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1, + nr_ioapic_entries[apic]); } } --- a/xen/drivers/passthrough/amd/iommu_intr.c +++ b/xen/drivers/passthrough/amd/iommu_intr.c @@ -31,6 +31,7 @@ struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS]; struct hpet_sbdf hpet_sbdf; void *shared_intremap_table; +unsigned long *shared_intremap_inuse; static DEFINE_SPINLOCK(shared_intremap_lock); static spinlock_t* get_intremap_lock(int seg, int req_id) @@ -46,30 +47,31 @@ static int get_intremap_requestor_id(int return get_ivrs_mappings(seg)[bdf].dte_requestor_id; } -static int get_intremap_offset(u8 vector, u8 dm) +static unsigned int alloc_intremap_entry(int seg, int bdf) { - int offset = 0; - offset = (dm << INT_REMAP_INDEX_DM_SHIFT) & INT_REMAP_INDEX_DM_MASK; - offset |= (vector << INT_REMAP_INDEX_VECTOR_SHIFT ) & - INT_REMAP_INDEX_VECTOR_MASK; - return offset; + unsigned long *inuse = get_ivrs_mappings(seg)[bdf].intremap_inuse; + unsigned int slot = find_first_zero_bit(inuse, INTREMAP_ENTRIES); + + if ( slot < INTREMAP_ENTRIES ) + __set_bit(slot, inuse); + return slot; } -static u8 *get_intremap_entry(int seg, int bdf, int offset) +static u32 *get_intremap_entry(int seg, int bdf, int offset) { - u8 *table; + u32 *table = get_ivrs_mappings(seg)[bdf].intremap_table; - table = (u8*)get_ivrs_mappings(seg)[bdf].intremap_table; ASSERT( (table != NULL) && (offset < INTREMAP_ENTRIES) ); - return (u8*) (table + offset); + return table + offset; } static void free_intremap_entry(int seg, int bdf, int offset) { - u32* entry; - entry = (u32*)get_intremap_entry(seg, bdf, offset); + u32 *entry = get_intremap_entry(seg, bdf, offset); + memset(entry, 0, sizeof(u32)); + __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse); } static void update_intremap_entry(u32* entry, u8 vector, u8 int_type, @@ -98,18 +100,24 @@ static void update_intremap_entry(u32* e INT_REMAP_ENTRY_VECTOR_SHIFT, entry); } -static void update_intremap_entry_from_ioapic( +static void set_rte_index(struct IO_APIC_route_entry *rte, int offset) +{ + rte->vector = (u8)offset; + rte->delivery_mode = offset >> 8; +} + +static int update_intremap_entry_from_ioapic( int bdf, struct amd_iommu *iommu, - const struct IO_APIC_route_entry *rte, - const struct IO_APIC_route_entry *old_rte) + struct IO_APIC_route_entry *rte, + u16 *index) { unsigned long flags; u32* entry; u8 delivery_mode, dest, vector, dest_mode; int req_id; spinlock_t *lock; - int offset; + unsigned int offset; req_id = get_intremap_requestor_id(iommu->seg, bdf); lock = get_intremap_lock(iommu->seg, req_id); @@ -121,16 +129,20 @@ static void update_intremap_entry_from_i spin_lock_irqsave(lock, flags); - offset = get_intremap_offset(vector, delivery_mode); - if ( old_rte ) + offset = *index; + if ( offset >= INTREMAP_ENTRIES ) { - int old_offset = get_intremap_offset(old_rte->vector, - old_rte->delivery_mode); - - if ( offset != old_offset ) - free_intremap_entry(iommu->seg, bdf, old_offset); + offset = alloc_intremap_entry(iommu->seg, req_id); + if ( offset >= INTREMAP_ENTRIES ) + { + spin_unlock_irqrestore(lock, flags); + rte->mask = 1; + return -ENOSPC; + } + *index = offset; } - entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset); + + entry = get_intremap_entry(iommu->seg, req_id, offset); update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); spin_unlock_irqrestore(lock, flags); @@ -141,6 +153,10 @@ static void update_intremap_entry_from_i amd_iommu_flush_intremap(iommu, req_id); spin_unlock_irqrestore(&iommu->lock, flags); } + + set_rte_index(rte, offset); + + return 0; } int __init amd_iommu_setup_ioapic_remapping(void) @@ -153,7 +169,7 @@ int __init amd_iommu_setup_ioapic_remapp u16 seg, bdf, req_id; struct amd_iommu *iommu; spinlock_t *lock; - int offset; + unsigned int offset; /* Read ioapic entries and update interrupt remapping table accordingly */ for ( apic = 0; apic < nr_ioapics; apic++ ) @@ -184,19 +200,24 @@ int __init amd_iommu_setup_ioapic_remapp dest = rte.dest.logical.logical_dest; spin_lock_irqsave(lock, flags); - offset = get_intremap_offset(vector, delivery_mode); - entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset); + offset = alloc_intremap_entry(seg, req_id); + BUG_ON(offset >= INTREMAP_ENTRIES); + ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset; + entry = get_intremap_entry(iommu->seg, req_id, offset); update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); spin_unlock_irqrestore(lock, flags); + set_rte_index(&rte, offset); + ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset; + __ioapic_write_entry(apic, pin, 1, rte); + if ( iommu->enabled ) { spin_lock_irqsave(&iommu->lock, flags); amd_iommu_flush_intremap(iommu, req_id); spin_unlock_irqrestore(&iommu->lock, flags); } - set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup); } } return 0; @@ -209,7 +230,7 @@ void amd_iommu_ioapic_update_ire( struct IO_APIC_route_entry new_rte = { 0 }; unsigned int rte_lo = (reg & 1) ? reg - 1 : reg; unsigned int pin = (reg - 0x10) / 2; - int saved_mask, seg, bdf; + int saved_mask, seg, bdf, rc; struct amd_iommu *iommu; if ( !iommu_intremap ) @@ -247,7 +268,7 @@ void amd_iommu_ioapic_update_ire( } if ( new_rte.mask && - !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ) + ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] >= INTREMAP_ENTRIES ) { ASSERT(saved_mask); __io_apic_write(apic, reg, value); @@ -262,14 +283,19 @@ void amd_iommu_ioapic_update_ire( } /* Update interrupt remapping entry */ - update_intremap_entry_from_ioapic( - bdf, iommu, &new_rte, - test_and_set_bit(pin, - ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ? &old_rte - : NULL); + rc = update_intremap_entry_from_ioapic( + bdf, iommu, &new_rte, + &ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin]); - /* Forward write access to IO-APIC RTE */ - __io_apic_write(apic, reg, value); + __io_apic_write(apic, reg, ((u32 *)&new_rte)[reg != rte_lo]); + + if ( rc ) + { + /* Keep the entry masked. */ + printk(XENLOG_ERR "Remapping IO-APIC %#x pin %u failed (%d)\n", + IO_APIC_ID(apic), pin, rc); + return; + } /* For lower bits access, return directly to avoid double writes */ if ( reg == rte_lo ) @@ -283,16 +309,41 @@ void amd_iommu_ioapic_update_ire( } } -static void update_intremap_entry_from_msi_msg( +unsigned int amd_iommu_read_ioapic_from_ire( + unsigned int apic, unsigned int reg) +{ + unsigned int val = __io_apic_read(apic, reg); + + if ( !(reg & 1) ) + { + unsigned int offset = val & (INTREMAP_ENTRIES - 1); + u16 bdf = ioapic_sbdf[IO_APIC_ID(apic)].bdf; + u16 seg = ioapic_sbdf[IO_APIC_ID(apic)].seg; + u16 req_id = get_intremap_requestor_id(seg, bdf); + const u32 *entry = get_intremap_entry(seg, req_id, offset); + + val &= ~(INTREMAP_ENTRIES - 1); + val |= get_field_from_reg_u32(*entry, + INT_REMAP_ENTRY_INTTYPE_MASK, + INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8; + val |= get_field_from_reg_u32(*entry, + INT_REMAP_ENTRY_VECTOR_MASK, + INT_REMAP_ENTRY_VECTOR_SHIFT); + } + + return val; +} + +static int update_intremap_entry_from_msi_msg( struct amd_iommu *iommu, u16 bdf, - int *remap_index, const struct msi_msg *msg) + int *remap_index, const struct msi_msg *msg, u32 *data) { unsigned long flags; u32* entry; u16 req_id, alias_id; u8 delivery_mode, dest, vector, dest_mode; spinlock_t *lock; - int offset; + unsigned int offset; req_id = get_dma_requestor_id(iommu->seg, bdf); alias_id = get_intremap_requestor_id(iommu->seg, bdf); @@ -303,15 +354,6 @@ static void update_intremap_entry_from_m spin_lock_irqsave(lock, flags); free_intremap_entry(iommu->seg, req_id, *remap_index); spin_unlock_irqrestore(lock, flags); - - if ( ( req_id != alias_id ) && - get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL ) - { - lock = get_intremap_lock(iommu->seg, alias_id); - spin_lock_irqsave(lock, flags); - free_intremap_entry(iommu->seg, alias_id, *remap_index); - spin_unlock_irqrestore(lock, flags); - } goto done; } @@ -322,16 +364,24 @@ static void update_intremap_entry_from_m delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1; vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK; dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff; - offset = get_intremap_offset(vector, delivery_mode); - if ( *remap_index < 0) + offset = *remap_index; + if ( offset >= INTREMAP_ENTRIES ) + { + offset = alloc_intremap_entry(iommu->seg, bdf); + if ( offset >= INTREMAP_ENTRIES ) + { + spin_unlock_irqrestore(lock, flags); + return -ENOSPC; + } *remap_index = offset; - else - BUG_ON(*remap_index != offset); + } - entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset); + entry = get_intremap_entry(iommu->seg, req_id, offset); update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); spin_unlock_irqrestore(lock, flags); + *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset; + /* * In some special cases, a pci-e device(e.g SATA controller in IDE mode) * will use alias id to index interrupt remapping table. @@ -343,10 +393,8 @@ static void update_intremap_entry_from_m if ( ( req_id != alias_id ) && get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL ) { - spin_lock_irqsave(lock, flags); - entry = (u32*)get_intremap_entry(iommu->seg, alias_id, offset); - update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); - spin_unlock_irqrestore(lock, flags); + BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !+ get_ivrs_mappings(iommu->seg)[alias_id].intremap_table); } done: @@ -358,19 +406,22 @@ done: amd_iommu_flush_intremap(iommu, alias_id); spin_unlock_irqrestore(&iommu->lock, flags); } + + return 0; } static struct amd_iommu *_find_iommu_for_device(int seg, int bdf) { - struct amd_iommu *iommu = find_iommu_for_device(seg, bdf); - - if ( iommu ) - return iommu; + struct amd_iommu *iommu; list_for_each_entry ( iommu, &amd_iommu_head, list ) if ( iommu->seg == seg && iommu->bdf == bdf ) return NULL; + iommu = find_iommu_for_device(seg, bdf); + if ( iommu ) + return iommu; + AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %04x:%02x:%02x.%u\n", seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf)); return ERR_PTR(-EINVAL); @@ -380,8 +431,9 @@ int amd_iommu_msi_msg_update_ire( struct msi_desc *msi_desc, struct msi_msg *msg) { struct pci_dev *pdev = msi_desc->dev; - int bdf, seg; + int bdf, seg, rc; struct amd_iommu *iommu; + u32 data; bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf; seg = pdev ? pdev->seg : hpet_sbdf.seg; @@ -390,11 +442,12 @@ int amd_iommu_msi_msg_update_ire( if ( IS_ERR_OR_NULL(iommu) ) return PTR_ERR(iommu); - if ( msi_desc->remap_index >= 0 ) + if ( msi_desc->remap_index >= 0 && !msg ) { do { update_intremap_entry_from_msi_msg(iommu, bdf, - &msi_desc->remap_index, NULL); + &msi_desc->remap_index, + NULL, NULL); if ( !pdev || !pdev->phantom_stride ) break; bdf += pdev->phantom_stride; @@ -409,19 +462,39 @@ int amd_iommu_msi_msg_update_ire( return 0; do { - update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index, - msg); - if ( !pdev || !pdev->phantom_stride ) + rc = update_intremap_entry_from_msi_msg(iommu, bdf, + &msi_desc->remap_index, + msg, &data); + if ( rc || !pdev || !pdev->phantom_stride ) break; bdf += pdev->phantom_stride; } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) ); - return 0; + msg->data = data; + return rc; } void amd_iommu_read_msi_from_ire( struct msi_desc *msi_desc, struct msi_msg *msg) { + unsigned int offset = msg->data & (INTREMAP_ENTRIES - 1); + const struct pci_dev *pdev = msi_desc->dev; + u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf; + u16 seg = pdev ? pdev->seg : hpet_sbdf.seg; + const u32 *entry; + + if ( IS_ERR_OR_NULL(_find_iommu_for_device(seg, bdf)) ) + return; + + entry = get_intremap_entry(seg, get_dma_requestor_id(seg, bdf), offset); + + msg->data &= ~(INTREMAP_ENTRIES - 1); + msg->data |= get_field_from_reg_u32(*entry, + INT_REMAP_ENTRY_INTTYPE_MASK, + INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8; + msg->data |= get_field_from_reg_u32(*entry, + INT_REMAP_ENTRY_VECTOR_MASK, + INT_REMAP_ENTRY_VECTOR_SHIFT); } int __init amd_iommu_free_intremap_table( @@ -438,12 +511,14 @@ int __init amd_iommu_free_intremap_table return 0; } -void* __init amd_iommu_alloc_intremap_table(void) +void* __init amd_iommu_alloc_intremap_table(unsigned long **inuse_map) { void *tb; tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER); BUG_ON(tb == NULL); memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER)); + *inuse_map = xzalloc_array(unsigned long, BITS_TO_LONGS(INTREMAP_ENTRIES)); + BUG_ON(*inuse_map == NULL); return tb; } --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -622,7 +622,7 @@ const struct iommu_ops amd_iommu_ops = { .get_device_group_id = amd_iommu_group_id, .update_ire_from_apic = amd_iommu_ioapic_update_ire, .update_ire_from_msi = amd_iommu_msi_msg_update_ire, - .read_apic_from_ire = __io_apic_read, + .read_apic_from_ire = amd_iommu_read_ioapic_from_ire, .read_msi_from_ire = amd_iommu_read_msi_from_ire, .setup_hpet_msi = amd_setup_hpet_msi, .suspend = amd_iommu_suspend, --- a/xen/include/asm-x86/amd-iommu.h +++ b/xen/include/asm-x86/amd-iommu.h @@ -119,6 +119,7 @@ struct ivrs_mappings { /* per device interrupt remapping table */ void *intremap_table; + unsigned long *intremap_inuse; spinlock_t intremap_lock; /* ivhd device data settings */ --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h @@ -458,10 +458,6 @@ #define MAX_AMD_IOMMUS 32 /* interrupt remapping table */ -#define INT_REMAP_INDEX_DM_MASK 0x1C00 -#define INT_REMAP_INDEX_DM_SHIFT 10 -#define INT_REMAP_INDEX_VECTOR_MASK 0x3FC -#define INT_REMAP_INDEX_VECTOR_SHIFT 2 #define INT_REMAP_ENTRY_REMAPEN_MASK 0x00000001 #define INT_REMAP_ENTRY_REMAPEN_SHIFT 0 #define INT_REMAP_ENTRY_SUPIOPF_MASK 0x00000002 --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h @@ -89,10 +89,12 @@ struct amd_iommu *find_iommu_for_device( /* interrupt remapping */ int amd_iommu_setup_ioapic_remapping(void); -void *amd_iommu_alloc_intremap_table(void); +void *amd_iommu_alloc_intremap_table(unsigned long **); int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *); void amd_iommu_ioapic_update_ire( unsigned int apic, unsigned int reg, unsigned int value); +unsigned int amd_iommu_read_ioapic_from_ire( + unsigned int apic, unsigned int reg); int amd_iommu_msi_msg_update_ire( struct msi_desc *msi_desc, struct msi_msg *msg); void amd_iommu_read_msi_from_ire( @@ -101,15 +103,17 @@ int amd_setup_hpet_msi(struct msi_desc * extern struct ioapic_sbdf { u16 bdf, seg; - unsigned long *pin_setup; + u16 *pin_2_idx; } ioapic_sbdf[MAX_IO_APICS]; -extern void *shared_intremap_table; extern struct hpet_sbdf { u16 bdf, seg, id; struct amd_iommu *iommu; } hpet_sbdf; +extern void *shared_intremap_table; +extern unsigned long *shared_intremap_inuse; + /* power management support */ void amd_iommu_resume(void); void amd_iommu_suspend(void); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
With the specific IRTEs used for an interrupt no longer depending on the vector, there''s no need to tie the remap sharing model to the vector sharing one. Signed-off-by: Jan Beulich <jbeulich@suse.com> Acked-by: George Dunlap <george.dunlap@eu.citrix.com> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -207,35 +207,6 @@ int __init amd_iov_detect(void) init_done = 1; - /* - * AMD IOMMUs don''t distinguish between vectors destined for - * different cpus when doing interrupt remapping. This means - * that interrupts going through the same intremap table - * can''t share the same vector. - * - * If irq_vector_map isn''t specified, choose a sensible default: - * - If we''re using per-device interemap tables, per-device - * vector non-sharing maps - * - If we''re using a global interemap table, global vector - * non-sharing map - */ - if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_DEFAULT ) - { - if ( amd_iommu_perdev_intremap ) - { - printk("AMD-Vi: Enabling per-device vector maps\n"); - opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_PERDEV; - } - else - { - printk("AMD-Vi: Enabling global vector map\n"); - opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL; - } - } - else - { - printk("AMD-Vi: Not overriding irq_vector_map setting\n"); - } if ( !amd_iommu_perdev_intremap ) printk(XENLOG_WARNING "AMD-Vi: Using global interrupt remap table is not recommended (see XSA-36)!\n"); return scan_pci_devices(); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Suravee Suthikulpanit
2013-Apr-13 01:16 UTC
Re: [PATCH v3 0/3] x86/IOMMU: multi-vector MSI prerequisites
On 4/12/2013 5:18 AM, Jan Beulich wrote:> 1: IOMMU: allow MSI message to IRTE propagation to fail > 2: AMD IOMMU: allocate IRTE entries instead of using a static mapping > 3: AMD IOMMU: untie remap and vector maps > > See the individual patches for what, if anything, has changed from v2. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > >Jan, This patch setfix the previous issue we sawfrom the get_intremap_entry() causing the ASSERT on (table != NULL). Now the system is booting. However, I think there are some issues with the interrupt remapping for the USBdevices. During dom0 booting, it gave error w/ regarding timeout during loadingsome USB drivers. Also, lsmodis showing "hid_generic" module is missing. This resulting in the USBmouse and keyboard are not working. From xl dmesg, I do not see anything differentfrom before patching. I am investigating the issue. Suravee _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Zhang, Xiantao
2013-Apr-15 06:38 UTC
Re: [PATCH v3 1/3] IOMMU: allow MSI message to IRTE propagation to fail
Acked, Thanks! Xiantao> -----Original Message----- > From: Jan Beulich [mailto:JBeulich@suse.com] > Sent: Friday, April 12, 2013 6:23 PM > To: xen-devel; Jan Beulich > Cc: Jacob Shin; suravee.suthikulpanit@amd.com; Zhang, Xiantao > Subject: [PATCH v3 1/3] IOMMU: allow MSI message to IRTE propagation to fail > > With the need to allocate multiple contiguous IRTEs for multi-vector > MSI, the chance of failure here increases. While on the AMD side > there''s no allocation of IRTEs at present at all (and hence no way for > this allocation to fail, which is going to change with a later patch in > this series), VT-d already ignores an eventual error here, which this > patch fixes. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > --- > v3: Introduce _find_iommu_for_device() to take care of filtering out > the case where an MSI is being set up for the IOMMU itself. > > --- a/xen/arch/x86/hpet.c > +++ b/xen/arch/x86/hpet.c > @@ -254,13 +254,22 @@ static void hpet_msi_mask(struct irq_des > ch->msi.msi_attrib.masked = 1; > } > > -static void hpet_msi_write(struct hpet_event_channel *ch, struct msi_msg > *msg) > +static int hpet_msi_write(struct hpet_event_channel *ch, struct msi_msg > *msg) > { > ch->msi.msg = *msg; > + > if ( iommu_intremap ) > - iommu_update_ire_from_msi(&ch->msi, msg); > + { > + int rc = iommu_update_ire_from_msi(&ch->msi, msg); > + > + if ( rc ) > + return rc; > + } > + > hpet_write32(msg->data, HPET_Tn_ROUTE(ch->idx)); > hpet_write32(msg->address_lo, HPET_Tn_ROUTE(ch->idx) + 4); > + > + return 0; > } > > static void __maybe_unused > @@ -318,12 +327,12 @@ static hw_irq_controller hpet_msi_type > .set_affinity = hpet_msi_set_affinity, > }; > > -static void __hpet_setup_msi_irq(struct irq_desc *desc) > +static int __hpet_setup_msi_irq(struct irq_desc *desc) > { > struct msi_msg msg; > > msi_compose_msg(desc, &msg); > - hpet_msi_write(desc->action->dev_id, &msg); > + return hpet_msi_write(desc->action->dev_id, &msg); > } > > static int __init hpet_setup_msi_irq(struct hpet_event_channel *ch) > @@ -347,6 +356,8 @@ static int __init hpet_setup_msi_irq(str > > desc->handler = &hpet_msi_type; > ret = request_irq(ch->msi.irq, hpet_interrupt_handler, 0, "HPET", ch); > + if ( ret >= 0 ) > + ret = __hpet_setup_msi_irq(desc); > if ( ret < 0 ) > { > if ( iommu_intremap ) > @@ -354,7 +365,6 @@ static int __init hpet_setup_msi_irq(str > return ret; > } > > - __hpet_setup_msi_irq(desc); > desc->msi_desc = &ch->msi; > > return 0; > --- a/xen/arch/x86/irq.c > +++ b/xen/arch/x86/irq.c > @@ -1938,7 +1938,14 @@ int map_domain_pirq( > if ( desc->handler != &no_irq_type ) > dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n", > d->domain_id, irq); > - setup_msi_handler(desc, msi_desc); > + > + ret = setup_msi_irq(desc, msi_desc); > + if ( ret ) > + { > + spin_unlock_irqrestore(&desc->lock, flags); > + pci_disable_msi(msi_desc); > + goto done; > + } > > if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV > && !desc->arch.used_vectors ) > @@ -1954,7 +1961,6 @@ int map_domain_pirq( > } > > set_domain_irq_pirq(d, irq, info); > - setup_msi_irq(desc); > spin_unlock_irqrestore(&desc->lock, flags); > } > else > --- a/xen/arch/x86/msi.c > +++ b/xen/arch/x86/msi.c > @@ -214,14 +214,18 @@ static void read_msi_msg(struct msi_desc > iommu_read_msi_from_ire(entry, msg); > } > > -static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) > +static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) > { > entry->msg = *msg; > > if ( iommu_intremap ) > { > + int rc; > + > ASSERT(msg != &entry->msg); > - iommu_update_ire_from_msi(entry, msg); > + rc = iommu_update_ire_from_msi(entry, msg); > + if ( rc ) > + return rc; > } > > switch ( entry->msi_attrib.type ) > @@ -264,6 +268,8 @@ static void write_msi_msg(struct msi_des > default: > BUG(); > } > + > + return 0; > } > > void set_msi_affinity(struct irq_desc *desc, const cpumask_t *mask) > @@ -464,19 +470,15 @@ static struct msi_desc* alloc_msi_entry( > return entry; > } > > -void setup_msi_handler(struct irq_desc *desc, struct msi_desc *msidesc) > +int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc) > { > + struct msi_msg msg; > + > desc->msi_desc = msidesc; > desc->handler = msi_maskable_irq(msidesc) ? &pci_msi_maskable > : &pci_msi_nonmaskable; > -} > - > -void setup_msi_irq(struct irq_desc *desc) > -{ > - struct msi_msg msg; > - > msi_compose_msg(desc, &msg); > - write_msi_msg(desc->msi_desc, &msg); > + return write_msi_msg(msidesc, &msg); > } > > int msi_free_irq(struct msi_desc *entry) > --- a/xen/drivers/passthrough/amd/iommu_intr.c > +++ b/xen/drivers/passthrough/amd/iommu_intr.c > @@ -17,6 +17,7 @@ > * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > */ > > +#include <xen/err.h> > #include <xen/sched.h> > #include <xen/hvm/iommu.h> > #include <asm/amd-iommu.h> > @@ -359,25 +360,35 @@ done: > } > } > > -void amd_iommu_msi_msg_update_ire( > +static struct amd_iommu *_find_iommu_for_device(int seg, int bdf) > +{ > + struct amd_iommu *iommu = find_iommu_for_device(seg, bdf); > + > + if ( iommu ) > + return iommu; > + > + list_for_each_entry ( iommu, &amd_iommu_head, list ) > + if ( iommu->seg == seg && iommu->bdf == bdf ) > + return NULL; > + > + AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %04x:%02x:%02x.%u\n", > + seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf)); > + return ERR_PTR(-EINVAL); > +} > + > +int amd_iommu_msi_msg_update_ire( > struct msi_desc *msi_desc, struct msi_msg *msg) > { > struct pci_dev *pdev = msi_desc->dev; > int bdf, seg; > struct amd_iommu *iommu; > > - if ( !iommu_intremap ) > - return; > - > bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf; > seg = pdev ? pdev->seg : hpet_sbdf.seg; > > - iommu = find_iommu_for_device(seg, bdf); > - if ( !iommu ) > - { > - AMD_IOMMU_DEBUG("Fail to find iommu for MSI device id = %#x\n", > bdf); > - return; > - } > + iommu = _find_iommu_for_device(seg, bdf); > + if ( IS_ERR_OR_NULL(iommu) ) > + return PTR_ERR(iommu); > > if ( msi_desc->remap_index >= 0 ) > { > @@ -395,7 +406,7 @@ void amd_iommu_msi_msg_update_ire( > } > > if ( !msg ) > - return; > + return 0; > > do { > update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc- > >remap_index, > @@ -404,6 +415,8 @@ void amd_iommu_msi_msg_update_ire( > break; > bdf += pdev->phantom_stride; > } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) ); > + > + return 0; > } > > void amd_iommu_read_msi_from_ire( > --- a/xen/drivers/passthrough/iommu.c > +++ b/xen/drivers/passthrough/iommu.c > @@ -548,18 +548,20 @@ void iommu_update_ire_from_apic( > const struct iommu_ops *ops = iommu_get_ops(); > ops->update_ire_from_apic(apic, reg, value); > } > -void iommu_update_ire_from_msi( > + > +int iommu_update_ire_from_msi( > struct msi_desc *msi_desc, struct msi_msg *msg) > { > const struct iommu_ops *ops = iommu_get_ops(); > - ops->update_ire_from_msi(msi_desc, msg); > + return iommu_intremap ? ops->update_ire_from_msi(msi_desc, msg) : 0; > } > > void iommu_read_msi_from_ire( > struct msi_desc *msi_desc, struct msi_msg *msg) > { > const struct iommu_ops *ops = iommu_get_ops(); > - ops->read_msi_from_ire(msi_desc, msg); > + if ( iommu_intremap ) > + ops->read_msi_from_ire(msi_desc, msg); > } > > unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg) > --- a/xen/drivers/passthrough/vtd/extern.h > +++ b/xen/drivers/passthrough/vtd/extern.h > @@ -90,7 +90,7 @@ void io_apic_write_remap_rte(unsigned in > struct msi_desc; > struct msi_msg; > void msi_msg_read_remap_rte(struct msi_desc *, struct msi_msg *); > -void msi_msg_write_remap_rte(struct msi_desc *, struct msi_msg *); > +int msi_msg_write_remap_rte(struct msi_desc *, struct msi_msg *); > > int intel_setup_hpet_msi(struct msi_desc *); > > --- a/xen/drivers/passthrough/vtd/intremap.c > +++ b/xen/drivers/passthrough/vtd/intremap.c > @@ -653,7 +653,7 @@ void msi_msg_read_remap_rte( > remap_entry_to_msi_msg(drhd->iommu, msg); > } > > -void msi_msg_write_remap_rte( > +int msi_msg_write_remap_rte( > struct msi_desc *msi_desc, struct msi_msg *msg) > { > struct pci_dev *pdev = msi_desc->dev; > @@ -661,8 +661,8 @@ void msi_msg_write_remap_rte( > > drhd = pdev ? acpi_find_matched_drhd_unit(pdev) > : hpet_to_drhd(msi_desc->hpet_id); > - if ( drhd ) > - msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg); > + return drhd ? msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, > msg) > + : -EINVAL; > } > > int __init intel_setup_hpet_msi(struct msi_desc *msi_desc) > --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h > +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h > @@ -93,7 +93,7 @@ void *amd_iommu_alloc_intremap_table(voi > int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *); > void amd_iommu_ioapic_update_ire( > unsigned int apic, unsigned int reg, unsigned int value); > -void amd_iommu_msi_msg_update_ire( > +int amd_iommu_msi_msg_update_ire( > struct msi_desc *msi_desc, struct msi_msg *msg); > void amd_iommu_read_msi_from_ire( > struct msi_desc *msi_desc, struct msi_msg *msg); > --- a/xen/include/asm-x86/msi.h > +++ b/xen/include/asm-x86/msi.h > @@ -78,8 +78,7 @@ extern int pci_enable_msi(struct msi_inf > extern void pci_disable_msi(struct msi_desc *desc); > extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off); > extern void pci_cleanup_msi(struct pci_dev *pdev); > -extern void setup_msi_handler(struct irq_desc *, struct msi_desc *); > -extern void setup_msi_irq(struct irq_desc *); > +extern int setup_msi_irq(struct irq_desc *, struct msi_desc *); > extern void teardown_msi_irq(int irq); > extern int msi_free_vector(struct msi_desc *entry); > extern int pci_restore_msi_state(struct pci_dev *pdev); > --- a/xen/include/xen/iommu.h > +++ b/xen/include/xen/iommu.h > @@ -106,7 +106,7 @@ struct iommu_ops { > u8 devfn, struct pci_dev *); > int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn); > void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned > int value); > - void (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg > *msg); > + int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg > *msg); > void (*read_msi_from_ire)(struct msi_desc *msi_desc, struct msi_msg > *msg); > unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg); > int (*setup_hpet_msi)(struct msi_desc *); > @@ -120,7 +120,7 @@ struct iommu_ops { > }; > > void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, > unsigned int value); > -void iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg > *msg); > +int iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg > *msg); > void iommu_read_msi_from_ire(struct msi_desc *msi_desc, struct msi_msg > *msg); > unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg); > int iommu_setup_hpet_msi(struct msi_desc *); >
Jan Beulich
2013-Apr-15 14:43 UTC
Re: [PATCH v3 0/3] x86/IOMMU: multi-vector MSI prerequisites
>>> On 13.04.13 at 03:16, Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> wrote: > On 4/12/2013 5:18 AM, Jan Beulich wrote: >> 1: IOMMU: allow MSI message to IRTE propagation to fail >> 2: AMD IOMMU: allocate IRTE entries instead of using a static mapping >> 3: AMD IOMMU: untie remap and vector maps >> >> See the individual patches for what, if anything, has changed from v2. >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> > > This patch setfix the previous issue we sawfrom the get_intremap_entry() > causing the ASSERT on (table != NULL). Now the system is booting. > > However, I think there are some issues with the interrupt remapping for > the USBdevices. During dom0 booting, it gave error w/ regarding timeout > during loadingsome USB drivers. Also, lsmodis showing "hid_generic" > module is missing. This resulting in the USBmouse and keyboard are not > working.So I would guess something must be wrong with the IO-APIC related code paths then. I just went through them again, but the only thing I spotted was a pointless duplicate assignment to ioapic_sbdf[].pin_2_idx[] in amd_iommu_setup_ioapic_remapping(). Sadly we don''t have the ''V'' debug key for AMD yet, otherwise the combination of ''V'' and ''z'' output would likely tell us quite clearly what''s wrong. Looking at ''z'' might still be worthwhile though... Jan Jan
Suravee Suthikulanit
2013-Apr-15 16:14 UTC
Re: [PATCH v3 0/3] x86/IOMMU: multi-vector MSI prerequisites
On 4/15/2013 9:43 AM, Jan Beulich wrote:>>>> On 13.04.13 at 03:16, Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> wrote: >> On 4/12/2013 5:18 AM, Jan Beulich wrote: >>> 1: IOMMU: allow MSI message to IRTE propagation to fail >>> 2: AMD IOMMU: allocate IRTE entries instead of using a static mapping >>> 3: AMD IOMMU: untie remap and vector maps >>> >>> See the individual patches for what, if anything, has changed from v2. >>> >>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> This patch setfix the previous issue we sawfrom the get_intremap_entry() >> causing the ASSERT on (table != NULL). Now the system is booting. >> >> However, I think there are some issues with the interrupt remapping for >> the USBdevices. During dom0 booting, it gave error w/ regarding timeout >> during loadingsome USB drivers. Also, lsmodis showing "hid_generic" >> module is missing. This resulting in the USBmouse and keyboard are not >> working. > So I would guess something must be wrong with the IO-APIC > related code paths then.Why is this IO-APIC and not MSI or MSIx?> I just went through them again, but the > only thing I spotted was a pointless duplicate assignment to > ioapic_sbdf[].pin_2_idx[] in amd_iommu_setup_ioapic_remapping(). > Sadly we don''t have the ''V'' debug key for AMD yet, otherwise the > combination of ''V'' and ''z'' output would likely tell us quite clearly > what''s wrong. Looking at ''z'' might still be worthwhile though... > > Jan > > Jan > >On another topic, in arch/x86/msi.c, in the function "setup_msi_affinity()", the code does: 1. "read_msi_msg" 2. Modify the affitity mask 3. "write_msi_msg" back the register value. In read, if the interrupt remapping is enabled, from the patch, the function returns the MSI data with remapped information from IOMMU. Then in write, if the interrupt remapping is enabled, the function will update the IOMMU interrupt remapping entries with the already "remapped" vector. In this case, you would be updating the incorrect IOMMU IRTE. Suravee
Jan Beulich
2013-Apr-16 06:43 UTC
Re: [PATCH v3 0/3] x86/IOMMU: multi-vector MSI prerequisites
>>> On 15.04.13 at 18:14, Suravee Suthikulanit <suravee.suthikulpanit@amd.com>wrote:> On 4/15/2013 9:43 AM, Jan Beulich wrote: >>>>> On 13.04.13 at 03:16, Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > wrote: >>> On 4/12/2013 5:18 AM, Jan Beulich wrote: >>>> 1: IOMMU: allow MSI message to IRTE propagation to fail >>>> 2: AMD IOMMU: allocate IRTE entries instead of using a static mapping >>>> 3: AMD IOMMU: untie remap and vector maps >>>> >>>> See the individual patches for what, if anything, has changed from v2. >>>> >>>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>> This patch setfix the previous issue we sawfrom the get_intremap_entry() >>> causing the ASSERT on (table != NULL). Now the system is booting. >>> >>> However, I think there are some issues with the interrupt remapping for >>> the USBdevices. During dom0 booting, it gave error w/ regarding timeout >>> during loadingsome USB drivers. Also, lsmodis showing "hid_generic" >>> module is missing. This resulting in the USBmouse and keyboard are not >>> working. >> So I would guess something must be wrong with the IO-APIC >> related code paths then. > Why is this IO-APIC and not MSI or MSIx?I''m just guessing that your USB controllers, other than the disk and network ones (which apparently work), use pin based interrupts rather than MSI.>> I just went through them again, but the >> only thing I spotted was a pointless duplicate assignment to >> ioapic_sbdf[].pin_2_idx[] in amd_iommu_setup_ioapic_remapping(). >> Sadly we don''t have the ''V'' debug key for AMD yet, otherwise the >> combination of ''V'' and ''z'' output would likely tell us quite clearly >> what''s wrong. Looking at ''z'' might still be worthwhile though... >> > On another topic, in arch/x86/msi.c, in the function > "setup_msi_affinity()", the code does: > 1. "read_msi_msg" > 2. Modify the affitity mask > 3. "write_msi_msg" back the register value. > > In read, if the interrupt remapping is enabled, from the patch, the > function returns the MSI data with remapped information from IOMMU. Then > in write, if the interrupt remapping is enabled, the function will > update the IOMMU interrupt remapping entries with the already "remapped" > vector. In this case, you would be updating the incorrect IOMMU IRTE.Where did you spot that? To prevent this from happening is exactly why amd_iommu_read_msi_from_ire() isn''t empty anymore (this is where the original MSI message information gets reconstructed - or at least is intended to be). The only modification done by update_intremap_entry_from_msi_msg() are the low 11 data bits, and that''s what gets overwritten upon read. Jan
Suravee Suthikulanit
2013-Apr-18 17:25 UTC
Re: [PATCH v3 0/3] x86/IOMMU: multi-vector MSI prerequisites
On 4/16/2013 1:43 AM, Jan Beulich wrote:>> On another topic, in arch/x86/msi.c, in the function >> >"setup_msi_affinity()", the code does: >> >1. "read_msi_msg" >> >2. Modify the affitity mask >> >3. "write_msi_msg" back the register value. >> > >> >In read, if the interrupt remapping is enabled, from the patch, the >> >function returns the MSI data with remapped information from IOMMU. Then >> >in write, if the interrupt remapping is enabled, the function will >> >update the IOMMU interrupt remapping entries with the already "remapped" >> >vector. In this case, you would be updating the incorrect IOMMU IRTE. > Where did you spot that?This is in xen/arch/x86/msi.c> To prevent this from happening is exactly > why amd_iommu_read_msi_from_ire() isn''t empty anymore (this is > where the original MSI message information gets reconstructed - or > at least is intended to be). The only modification done by > update_intremap_entry_from_msi_msg() are the low 11 data bits, > and that''s what gets overwritten upon read.Sorry, I am not quite following this. Why do we need to reconstruct MSI message? Why was not it required in the past? Suravee.
Jan Beulich
2013-Apr-19 07:29 UTC
Re: [PATCH v3 0/3] x86/IOMMU: multi-vector MSI prerequisites
>>> On 18.04.13 at 19:25, Suravee Suthikulanit <suravee.suthikulpanit@amd.com>wrote:> On 4/16/2013 1:43 AM, Jan Beulich wrote: >>> On another topic, in arch/x86/msi.c, in the function >>> >"setup_msi_affinity()", the code does: >>> >1. "read_msi_msg" >>> >2. Modify the affitity mask >>> >3. "write_msi_msg" back the register value. >>> > >>> >In read, if the interrupt remapping is enabled, from the patch, the >>> >function returns the MSI data with remapped information from IOMMU. Then >>> >in write, if the interrupt remapping is enabled, the function will >>> >update the IOMMU interrupt remapping entries with the already "remapped" >>> >vector. In this case, you would be updating the incorrect IOMMU IRTE. >> Where did you spot that? > This is in xen/arch/x86/msi.cThat''s not precise enough, the more that the same model has been working for VT-d for a long time. Are you perhaps getting confused by the slightly odd way things get stored/passed: write_msi_msg() specifically asserts that "msg" doesn''t point to the stored version (entry->msg), i.e. the modification done to *msg by iommu_update_ire_from_msi() won''t be used as input on a subsequent invocation.>> To prevent this from happening is exactly >> why amd_iommu_read_msi_from_ire() isn''t empty anymore (this is >> where the original MSI message information gets reconstructed - or >> at least is intended to be). The only modification done by >> update_intremap_entry_from_msi_msg() are the low 11 data bits, >> and that''s what gets overwritten upon read. > Sorry, I am not quite following this. Why do we need to reconstruct MSI > message? Why was not it required in the past?Previously the write path didn''t modify the message, and hence the read path didn''t need to reconstruct the original. With the switch to allocating IRTEs (rather than calculating the used one from vector and delivery mode), the model now matches VT-d''s, and hence the behavior also needs to be adjusted accordingly. Anyway - did you make any progress towards identifying the problems with the USB controller? I didn''t see any more complete output, so I have nothing to work with to find where the problem is. In any case I''m going to post the full multi-vector MSI series later today, as it''s working fine for me on VT-d. Jan
Suravee Suthikulanit
2013-Apr-22 23:58 UTC
Re: [PATCH v3 0/3] x86/IOMMU: multi-vector MSI prerequisites
On 4/19/2013 2:29 AM, Jan Beulich wrote:>>>> On 18.04.13 at 19:25, Suravee Suthikulanit <suravee.suthikulpanit@amd.com> > wrote: >> On 4/16/2013 1:43 AM, Jan Beulich wrote: >>>> On another topic, in arch/x86/msi.c, in the function >>>>> "setup_msi_affinity()", the code does: >>>>> 1. "read_msi_msg" >>>>> 2. Modify the affitity mask >>>>> 3. "write_msi_msg" back the register value. >>>>> >>>>> In read, if the interrupt remapping is enabled, from the patch, the >>>>> function returns the MSI data with remapped information from IOMMU. Then >>>>> in write, if the interrupt remapping is enabled, the function will >>>>> update the IOMMU interrupt remapping entries with the already "remapped" >>>>> vector. In this case, you would be updating the incorrect IOMMU IRTE. >>> Where did you spot that? >> This is in xen/arch/x86/msi.c > That''s not precise enough, the more that the same model has > been working for VT-d for a long time. > > Are you perhaps getting confused by the slightly odd way > things get stored/passed: write_msi_msg() specifically asserts > that "msg" doesn''t point to the stored version (entry->msg), i.e. > the modification done to *msg by iommu_update_ire_from_msi() > won''t be used as input on a subsequent invocation. > >>> To prevent this from happening is exactly >>> why amd_iommu_read_msi_from_ire() isn''t empty anymore (this is >>> where the original MSI message information gets reconstructed - or >>> at least is intended to be). The only modification done by >>> update_intremap_entry_from_msi_msg() are the low 11 data bits, >>> and that''s what gets overwritten upon read. >> Sorry, I am not quite following this. Why do we need to reconstruct MSI >> message? Why was not it required in the past? > Previously the write path didn''t modify the message, and hence > the read path didn''t need to reconstruct the original. With the > switch to allocating IRTEs (rather than calculating the used one > from vector and delivery mode), the model now matches VT-d''s, > and hence the behavior also needs to be adjusted accordingly.Thanks for clarification. I misunderstood the code that manage the interrupt remapping entry. Sorry for confusion.> > Anyway - did you make any progress towards identifying the > problems with the USB controller? I didn''t see any more complete > output, so I have nothing to work with to find where the > problem is. In any case I''m going to post the full multi-vector > MSI series later today, as it''s working fine for me on VT-d. > > JanSome how I could no longer reproduce this case. I''ll keep an eye on this for the future. Suravee> >