Ian Jackson
2011-Aug-16 11:09 UTC
[Xen-devel] [PATCH] Revert 23733:fbf3768e5934 "AMD IOMMU: remove global ..."
23733:fbf3768e5934 causes xen-unstable not to boot on several of the xen.org AMD test systems. We get an endless series of these: (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0x00a0, fault address = 0xfdf8f10144 I have constructed the attached patch which reverts c/s 23733 (adjusted for conflicts due to subsequent patches). With this reversion Xen once more boots on these machines. 23733 has been in the tree for some time now, causing this breakage, and has already been fingered by the automatic bisector and discussed on xen-devel as the cause of boot failures. I think it is now time to revert it pending a correct fix to the original problem. Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com> diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_acpi.c --- a/xen/drivers/passthrough/amd/iommu_acpi.c Sat Aug 13 10:14:58 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_acpi.c Tue Aug 16 11:57:01 2011 +0100 @@ -66,8 +66,15 @@ static void __init add_ivrs_mapping_entr if (ivrs_mappings[alias_id].intremap_table == NULL ) { /* allocate per-device interrupt remapping table */ - ivrs_mappings[alias_id].intremap_table + if ( amd_iommu_perdev_intremap ) + ivrs_mappings[alias_id].intremap_table amd_iommu_alloc_intremap_table(); + else + { + if ( shared_intremap_table == NULL ) + shared_intremap_table = amd_iommu_alloc_intremap_table(); + ivrs_mappings[alias_id].intremap_table = shared_intremap_table; + } } /* assgin iommu hardware */ ivrs_mappings[bdf].iommu = iommu; diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Sat Aug 13 10:14:58 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_init.c Tue Aug 16 11:57:01 2011 +0100 @@ -500,7 +500,7 @@ static void parse_event_log_entry(u32 en /* Tell the device to stop DMAing; we can''t rely on the guest to * control it for us. */ for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) - if ( get_requestor_id(bdf) == device_id ) + if ( get_dma_requestor_id(bdf) == device_id ) { cword = pci_conf_read16(PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf), PCI_COMMAND); @@ -772,7 +772,8 @@ static int __init init_ivrs_mapping(void ivrs_mappings[bdf].dte_ext_int_pass = IOMMU_CONTROL_DISABLED; ivrs_mappings[bdf].dte_init_pass = IOMMU_CONTROL_DISABLED; - spin_lock_init(&ivrs_mappings[bdf].intremap_lock); + if ( amd_iommu_perdev_intremap ) + spin_lock_init(&ivrs_mappings[bdf].intremap_lock); } return 0; } diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_intr.c --- a/xen/drivers/passthrough/amd/iommu_intr.c Sat Aug 13 10:14:58 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_intr.c Tue Aug 16 11:57:01 2011 +0100 @@ -28,10 +28,20 @@ #define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH) int ioapic_bdf[MAX_IO_APICS]; +void *shared_intremap_table; +static DEFINE_SPINLOCK(shared_intremap_lock); static spinlock_t* get_intremap_lock(int req_id) { - return &ivrs_mappings[req_id].intremap_lock; + return (amd_iommu_perdev_intremap ? + &ivrs_mappings[req_id].intremap_lock: + &shared_intremap_lock); +} + +static int get_intremap_requestor_id(int bdf) +{ + ASSERT( bdf < ivrs_bdf_entries ); + return ivrs_mappings[bdf].dte_requestor_id; } static int get_intremap_offset(u8 vector, u8 dm) @@ -115,7 +125,7 @@ static void update_intremap_entry_from_i spinlock_t *lock; int offset; - req_id = get_requestor_id(bdf); + req_id = get_intremap_requestor_id(bdf); lock = get_intremap_lock(req_id); delivery_mode = rte->delivery_mode; @@ -173,7 +183,7 @@ int __init amd_iommu_setup_ioapic_remapp continue; } - req_id = get_requestor_id(bdf); + req_id = get_intremap_requestor_id(bdf); lock = get_intremap_lock(req_id); delivery_mode = rte.delivery_mode; @@ -273,13 +283,14 @@ static void update_intremap_entry_from_m { unsigned long flags; u32* entry; - u16 bdf, req_id; + u16 bdf, req_id, alias_id; u8 delivery_mode, dest, vector, dest_mode; spinlock_t *lock; int offset; bdf = (pdev->bus << 8) | pdev->devfn; - req_id = get_requestor_id(bdf); + req_id = get_dma_requestor_id(bdf); + alias_id = get_intremap_requestor_id(bdf); if ( msg == NULL ) { @@ -288,6 +299,14 @@ static void update_intremap_entry_from_m free_intremap_entry(req_id, msi_desc->remap_index); spin_unlock_irqrestore(lock, flags); + if ( ( req_id != alias_id ) && + ivrs_mappings[alias_id].intremap_table != NULL ) + { + lock = get_intremap_lock(alias_id); + spin_lock_irqsave(lock, flags); + free_intremap_entry(alias_id, msi_desc->remap_index); + spin_unlock_irqrestore(lock, flags); + } goto done; } @@ -305,11 +324,30 @@ static void update_intremap_entry_from_m update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); spin_unlock_irqrestore(lock, flags); + /* + * In some special cases, a pci-e device(e.g SATA controller in IDE mode) + * will use alias id to index interrupt remapping table. + * We have to setup a secondary interrupt remapping entry to satisfy those + * devices. + */ + + lock = get_intremap_lock(alias_id); + if ( ( req_id != alias_id ) && + ivrs_mappings[alias_id].intremap_table != NULL ) + { + spin_lock_irqsave(lock, flags); + entry = (u32*)get_intremap_entry(alias_id, offset); + update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); + spin_unlock_irqrestore(lock, flags); + } + done: if ( iommu->enabled ) { spin_lock_irqsave(&iommu->lock, flags); invalidate_interrupt_table(iommu, req_id); + if ( alias_id != req_id ) + invalidate_interrupt_table(iommu, alias_id); flush_command_buffer(iommu); spin_unlock_irqrestore(&iommu->lock, flags); } diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_map.c --- a/xen/drivers/passthrough/amd/iommu_map.c Sat Aug 13 10:14:58 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_map.c Tue Aug 16 11:57:01 2011 +0100 @@ -543,7 +543,7 @@ static int update_paging_mode(struct dom for_each_pdev( d, pdev ) { bdf = (pdev->bus << 8) | pdev->devfn; - req_id = get_requestor_id(bdf); + req_id = get_dma_requestor_id(bdf); iommu = find_iommu_for_device(bdf); if ( !iommu ) { diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Sat Aug 13 10:14:58 2011 +0100 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Aug 16 11:57:01 2011 +0100 @@ -34,10 +34,25 @@ struct amd_iommu *find_iommu_for_device( return ivrs_mappings[bdf].iommu; } -int get_requestor_id(u16 bdf) +/* + * Some devices will use alias id and original device id to index interrupt + * table and I/O page table respectively. Such devices will have + * both alias entry and select entry in IVRS structure. + * + * Return original device id, if device has valid interrupt remapping + * table setup for both select entry and alias entry. + */ +int get_dma_requestor_id(u16 bdf) { + int req_id; + BUG_ON ( bdf >= ivrs_bdf_entries ); - return ivrs_mappings[bdf].dte_requestor_id; + req_id = ivrs_mappings[bdf].dte_requestor_id; + if ( (ivrs_mappings[bdf].intremap_table != NULL) && + (ivrs_mappings[req_id].intremap_table != NULL) ) + req_id = bdf; + + return req_id; } static int is_translation_valid(u32 *entry) @@ -79,7 +94,7 @@ static void amd_iommu_setup_domain_devic valid = 0; /* get device-table entry */ - req_id = get_requestor_id(bdf); + req_id = get_dma_requestor_id(bdf); dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE); spin_lock_irqsave(&iommu->lock, flags); @@ -252,7 +267,7 @@ static void amd_iommu_disable_domain_dev int req_id; BUG_ON ( iommu->dev_table.buffer == NULL ); - req_id = get_requestor_id(bdf); + req_id = get_dma_requestor_id(bdf); dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE); spin_lock_irqsave(&iommu->lock, flags); @@ -314,7 +329,7 @@ static int amd_iommu_assign_device(struc static int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn) { int bdf = (bus << 8) | devfn; - int req_id = get_requestor_id(bdf); + int req_id = get_dma_requestor_id(bdf); if ( ivrs_mappings[req_id].unity_map_enable ) { @@ -433,7 +448,7 @@ static int amd_iommu_group_id(u8 bus, u8 int rt; int bdf = (bus << 8) | devfn; rt = ( bdf < ivrs_bdf_entries ) ? - get_requestor_id(bdf) : + get_dma_requestor_id(bdf) : bdf; return rt; } diff -r 8d6edc3d26d2 xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Sat Aug 13 10:14:58 2011 +0100 +++ b/xen/drivers/passthrough/iommu.c Tue Aug 16 11:57:01 2011 +0100 @@ -50,6 +50,7 @@ bool_t __read_mostly iommu_hap_pt_share; bool_t __read_mostly iommu_hap_pt_share; bool_t __read_mostly iommu_debug; bool_t __read_mostly iommu_amd_perdev_vector_map = 1; +bool_t __read_mostly amd_iommu_perdev_intremap; static void __init parse_iommu_param(char *s) { @@ -76,6 +77,8 @@ static void __init parse_iommu_param(cha iommu_intremap = 0; else if ( !strcmp(s, "debug") ) iommu_debug = 1; + else if ( !strcmp(s, "amd-iommu-perdev-intremap") ) + amd_iommu_perdev_intremap = 1; else if ( !strcmp(s, "dom0-passthrough") ) iommu_passthrough = 1; else if ( !strcmp(s, "dom0-strict") ) diff -r 8d6edc3d26d2 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Sat Aug 13 10:14:58 2011 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Tue Aug 16 11:57:01 2011 +0100 @@ -65,7 +65,7 @@ void amd_iommu_share_p2m(struct domain * void amd_iommu_share_p2m(struct domain *d); /* device table functions */ -int get_requestor_id(u16 bdf); +int get_dma_requestor_id(u16 bdf); void amd_iommu_add_dev_table_entry( u32 *dte, u8 sys_mgt, u8 dev_ex, u8 lint1_pass, u8 lint0_pass, u8 nmi_pass, u8 ext_int_pass, u8 init_pass); @@ -97,6 +97,7 @@ unsigned int amd_iommu_read_ioapic_from_ unsigned int apic, unsigned int reg); extern int ioapic_bdf[MAX_IO_APICS]; +extern void *shared_intremap_table; /* power management support */ void amd_iommu_resume(void); diff -r 8d6edc3d26d2 xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Sat Aug 13 10:14:58 2011 +0100 +++ b/xen/include/xen/iommu.h Tue Aug 16 11:57:01 2011 +0100 @@ -32,6 +32,7 @@ extern bool_t iommu_snoop, iommu_qinval, extern bool_t iommu_snoop, iommu_qinval, iommu_intremap; extern bool_t iommu_hap_pt_share; extern bool_t iommu_debug; +extern bool_t amd_iommu_perdev_intremap; extern struct rangeset *mmio_ro_ranges; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
George Dunlap
2011-Aug-16 11:48 UTC
Re: [Xen-devel] [PATCH] Revert 23733:fbf3768e5934 "AMD IOMMU: remove global ..."
Well, this *is* the correct fix to the original problem. It''s likely then that there''s either a bug in a BIOS somewhere, or a bug in the per-device intremap table code that was hidden by global intremap being the default. However, from a practical perspective, if Wei doesn''t have time to work on it, I may not for several weeks; during which time, I think reverting this patch to un-break testing for everyone else makes sense. This bug will likely be affecting our upcoming XenServer as well, so let me see if I can prioritize working on it. -George On Tue, Aug 16, 2011 at 12:09 PM, Ian Jackson <Ian.Jackson@eu.citrix.com> wrote:> 23733:fbf3768e5934 causes xen-unstable not to boot on several of the > xen.org AMD test systems. We get an endless series of these: > > (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0x00a0, fault address = 0xfdf8f10144 > > I have constructed the attached patch which reverts c/s 23733 > (adjusted for conflicts due to subsequent patches). With this > reversion Xen once more boots on these machines. > > 23733 has been in the tree for some time now, causing this breakage, > and has already been fingered by the automatic bisector and discussed > on xen-devel as the cause of boot failures. I think it is now time to > revert it pending a correct fix to the original problem. > > Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com> > > diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_acpi.c > --- a/xen/drivers/passthrough/amd/iommu_acpi.c Sat Aug 13 10:14:58 2011 +0100 > +++ b/xen/drivers/passthrough/amd/iommu_acpi.c Tue Aug 16 11:57:01 2011 +0100 > @@ -66,8 +66,15 @@ static void __init add_ivrs_mapping_entr > if (ivrs_mappings[alias_id].intremap_table == NULL ) > { > /* allocate per-device interrupt remapping table */ > - ivrs_mappings[alias_id].intremap_table > + if ( amd_iommu_perdev_intremap ) > + ivrs_mappings[alias_id].intremap_table > amd_iommu_alloc_intremap_table(); > + else > + { > + if ( shared_intremap_table == NULL ) > + shared_intremap_table = amd_iommu_alloc_intremap_table(); > + ivrs_mappings[alias_id].intremap_table = shared_intremap_table; > + } > } > /* assgin iommu hardware */ > ivrs_mappings[bdf].iommu = iommu; > diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_init.c > --- a/xen/drivers/passthrough/amd/iommu_init.c Sat Aug 13 10:14:58 2011 +0100 > +++ b/xen/drivers/passthrough/amd/iommu_init.c Tue Aug 16 11:57:01 2011 +0100 > @@ -500,7 +500,7 @@ static void parse_event_log_entry(u32 en > /* Tell the device to stop DMAing; we can''t rely on the guest to > * control it for us. */ > for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) > - if ( get_requestor_id(bdf) == device_id ) > + if ( get_dma_requestor_id(bdf) == device_id ) > { > cword = pci_conf_read16(PCI_BUS(bdf), PCI_SLOT(bdf), > PCI_FUNC(bdf), PCI_COMMAND); > @@ -772,7 +772,8 @@ static int __init init_ivrs_mapping(void > ivrs_mappings[bdf].dte_ext_int_pass = IOMMU_CONTROL_DISABLED; > ivrs_mappings[bdf].dte_init_pass = IOMMU_CONTROL_DISABLED; > > - spin_lock_init(&ivrs_mappings[bdf].intremap_lock); > + if ( amd_iommu_perdev_intremap ) > + spin_lock_init(&ivrs_mappings[bdf].intremap_lock); > } > return 0; > } > diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_intr.c > --- a/xen/drivers/passthrough/amd/iommu_intr.c Sat Aug 13 10:14:58 2011 +0100 > +++ b/xen/drivers/passthrough/amd/iommu_intr.c Tue Aug 16 11:57:01 2011 +0100 > @@ -28,10 +28,20 @@ > #define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH) > > int ioapic_bdf[MAX_IO_APICS]; > +void *shared_intremap_table; > +static DEFINE_SPINLOCK(shared_intremap_lock); > > static spinlock_t* get_intremap_lock(int req_id) > { > - return &ivrs_mappings[req_id].intremap_lock; > + return (amd_iommu_perdev_intremap ? > + &ivrs_mappings[req_id].intremap_lock: > + &shared_intremap_lock); > +} > + > +static int get_intremap_requestor_id(int bdf) > +{ > + ASSERT( bdf < ivrs_bdf_entries ); > + return ivrs_mappings[bdf].dte_requestor_id; > } > > static int get_intremap_offset(u8 vector, u8 dm) > @@ -115,7 +125,7 @@ static void update_intremap_entry_from_i > spinlock_t *lock; > int offset; > > - req_id = get_requestor_id(bdf); > + req_id = get_intremap_requestor_id(bdf); > lock = get_intremap_lock(req_id); > > delivery_mode = rte->delivery_mode; > @@ -173,7 +183,7 @@ int __init amd_iommu_setup_ioapic_remapp > continue; > } > > - req_id = get_requestor_id(bdf); > + req_id = get_intremap_requestor_id(bdf); > lock = get_intremap_lock(req_id); > > delivery_mode = rte.delivery_mode; > @@ -273,13 +283,14 @@ static void update_intremap_entry_from_m > { > unsigned long flags; > u32* entry; > - u16 bdf, req_id; > + u16 bdf, req_id, alias_id; > u8 delivery_mode, dest, vector, dest_mode; > spinlock_t *lock; > int offset; > > bdf = (pdev->bus << 8) | pdev->devfn; > - req_id = get_requestor_id(bdf); > + req_id = get_dma_requestor_id(bdf); > + alias_id = get_intremap_requestor_id(bdf); > > if ( msg == NULL ) > { > @@ -288,6 +299,14 @@ static void update_intremap_entry_from_m > free_intremap_entry(req_id, msi_desc->remap_index); > spin_unlock_irqrestore(lock, flags); > > + if ( ( req_id != alias_id ) && > + ivrs_mappings[alias_id].intremap_table != NULL ) > + { > + lock = get_intremap_lock(alias_id); > + spin_lock_irqsave(lock, flags); > + free_intremap_entry(alias_id, msi_desc->remap_index); > + spin_unlock_irqrestore(lock, flags); > + } > goto done; > } > > @@ -305,11 +324,30 @@ static void update_intremap_entry_from_m > update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); > spin_unlock_irqrestore(lock, flags); > > + /* > + * In some special cases, a pci-e device(e.g SATA controller in IDE mode) > + * will use alias id to index interrupt remapping table. > + * We have to setup a secondary interrupt remapping entry to satisfy those > + * devices. > + */ > + > + lock = get_intremap_lock(alias_id); > + if ( ( req_id != alias_id ) && > + ivrs_mappings[alias_id].intremap_table != NULL ) > + { > + spin_lock_irqsave(lock, flags); > + entry = (u32*)get_intremap_entry(alias_id, offset); > + update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); > + spin_unlock_irqrestore(lock, flags); > + } > + > done: > if ( iommu->enabled ) > { > spin_lock_irqsave(&iommu->lock, flags); > invalidate_interrupt_table(iommu, req_id); > + if ( alias_id != req_id ) > + invalidate_interrupt_table(iommu, alias_id); > flush_command_buffer(iommu); > spin_unlock_irqrestore(&iommu->lock, flags); > } > diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_map.c > --- a/xen/drivers/passthrough/amd/iommu_map.c Sat Aug 13 10:14:58 2011 +0100 > +++ b/xen/drivers/passthrough/amd/iommu_map.c Tue Aug 16 11:57:01 2011 +0100 > @@ -543,7 +543,7 @@ static int update_paging_mode(struct dom > for_each_pdev( d, pdev ) > { > bdf = (pdev->bus << 8) | pdev->devfn; > - req_id = get_requestor_id(bdf); > + req_id = get_dma_requestor_id(bdf); > iommu = find_iommu_for_device(bdf); > if ( !iommu ) > { > diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/pci_amd_iommu.c > --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Sat Aug 13 10:14:58 2011 +0100 > +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Aug 16 11:57:01 2011 +0100 > @@ -34,10 +34,25 @@ struct amd_iommu *find_iommu_for_device( > return ivrs_mappings[bdf].iommu; > } > > -int get_requestor_id(u16 bdf) > +/* > + * Some devices will use alias id and original device id to index interrupt > + * table and I/O page table respectively. Such devices will have > + * both alias entry and select entry in IVRS structure. > + * > + * Return original device id, if device has valid interrupt remapping > + * table setup for both select entry and alias entry. > + */ > +int get_dma_requestor_id(u16 bdf) > { > + int req_id; > + > BUG_ON ( bdf >= ivrs_bdf_entries ); > - return ivrs_mappings[bdf].dte_requestor_id; > + req_id = ivrs_mappings[bdf].dte_requestor_id; > + if ( (ivrs_mappings[bdf].intremap_table != NULL) && > + (ivrs_mappings[req_id].intremap_table != NULL) ) > + req_id = bdf; > + > + return req_id; > } > > static int is_translation_valid(u32 *entry) > @@ -79,7 +94,7 @@ static void amd_iommu_setup_domain_devic > valid = 0; > > /* get device-table entry */ > - req_id = get_requestor_id(bdf); > + req_id = get_dma_requestor_id(bdf); > dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE); > > spin_lock_irqsave(&iommu->lock, flags); > @@ -252,7 +267,7 @@ static void amd_iommu_disable_domain_dev > int req_id; > > BUG_ON ( iommu->dev_table.buffer == NULL ); > - req_id = get_requestor_id(bdf); > + req_id = get_dma_requestor_id(bdf); > dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE); > > spin_lock_irqsave(&iommu->lock, flags); > @@ -314,7 +329,7 @@ static int amd_iommu_assign_device(struc > static int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn) > { > int bdf = (bus << 8) | devfn; > - int req_id = get_requestor_id(bdf); > + int req_id = get_dma_requestor_id(bdf); > > if ( ivrs_mappings[req_id].unity_map_enable ) > { > @@ -433,7 +448,7 @@ static int amd_iommu_group_id(u8 bus, u8 > int rt; > int bdf = (bus << 8) | devfn; > rt = ( bdf < ivrs_bdf_entries ) ? > - get_requestor_id(bdf) : > + get_dma_requestor_id(bdf) : > bdf; > return rt; > } > diff -r 8d6edc3d26d2 xen/drivers/passthrough/iommu.c > --- a/xen/drivers/passthrough/iommu.c Sat Aug 13 10:14:58 2011 +0100 > +++ b/xen/drivers/passthrough/iommu.c Tue Aug 16 11:57:01 2011 +0100 > @@ -50,6 +50,7 @@ bool_t __read_mostly iommu_hap_pt_share; > bool_t __read_mostly iommu_hap_pt_share; > bool_t __read_mostly iommu_debug; > bool_t __read_mostly iommu_amd_perdev_vector_map = 1; > +bool_t __read_mostly amd_iommu_perdev_intremap; > > static void __init parse_iommu_param(char *s) > { > @@ -76,6 +77,8 @@ static void __init parse_iommu_param(cha > iommu_intremap = 0; > else if ( !strcmp(s, "debug") ) > iommu_debug = 1; > + else if ( !strcmp(s, "amd-iommu-perdev-intremap") ) > + amd_iommu_perdev_intremap = 1; > else if ( !strcmp(s, "dom0-passthrough") ) > iommu_passthrough = 1; > else if ( !strcmp(s, "dom0-strict") ) > diff -r 8d6edc3d26d2 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h > --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Sat Aug 13 10:14:58 2011 +0100 > +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Tue Aug 16 11:57:01 2011 +0100 > @@ -65,7 +65,7 @@ void amd_iommu_share_p2m(struct domain * > void amd_iommu_share_p2m(struct domain *d); > > /* device table functions */ > -int get_requestor_id(u16 bdf); > +int get_dma_requestor_id(u16 bdf); > void amd_iommu_add_dev_table_entry( > u32 *dte, u8 sys_mgt, u8 dev_ex, u8 lint1_pass, u8 lint0_pass, > u8 nmi_pass, u8 ext_int_pass, u8 init_pass); > @@ -97,6 +97,7 @@ unsigned int amd_iommu_read_ioapic_from_ > unsigned int apic, unsigned int reg); > > extern int ioapic_bdf[MAX_IO_APICS]; > +extern void *shared_intremap_table; > > /* power management support */ > void amd_iommu_resume(void); > diff -r 8d6edc3d26d2 xen/include/xen/iommu.h > --- a/xen/include/xen/iommu.h Sat Aug 13 10:14:58 2011 +0100 > +++ b/xen/include/xen/iommu.h Tue Aug 16 11:57:01 2011 +0100 > @@ -32,6 +32,7 @@ extern bool_t iommu_snoop, iommu_qinval, > extern bool_t iommu_snoop, iommu_qinval, iommu_intremap; > extern bool_t iommu_hap_pt_share; > extern bool_t iommu_debug; > +extern bool_t amd_iommu_perdev_intremap; > > extern struct rangeset *mmio_ro_ranges; > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Jackson
2011-Aug-16 12:30 UTC
Re: [Xen-devel] [PATCH] Revert 23733:fbf3768e5934 "AMD IOMMU: remove global ..."
George Dunlap writes ("Re: [Xen-devel] [PATCH] Revert 23733:fbf3768e5934 "AMD IOMMU: remove global ...""):> Well, this *is* the correct fix to the original problem. It''s likely > then that there''s either a bug in a BIOS somewhere, or a bug in the > per-device intremap table code that was hidden by global intremap > being the default.Right.> However, from a practical perspective, if Wei doesn''t have time to > work on it, I may not for several weeks; during which time, I think > reverting this patch to un-break testing for everyone else makes > sense.Right, I think so.> This bug will likely be affecting our upcoming XenServer as well, so > let me see if I can prioritize working on it.Thank you. Ian. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Wei Wang2
2011-Aug-16 12:42 UTC
[Xen-devel] Re: [PATCH] Revert 23733:fbf3768e5934 "AMD IOMMU: remove global ..."
Ian, On you testing system is it possible to disable IDE combined mode for SATA devices? device id 0xa0 is very likely the id SATA wants to sent interrupt messages to iommu in IDE mode. Enable AHCI mode or IDE native could fix it. Wei On Tuesday 16 August 2011 13:09:36 Ian Jackson wrote:> 23733:fbf3768e5934 causes xen-unstable not to boot on several of the > xen.org AMD test systems. We get an endless series of these: > > (XEN) AMD-Vi: IO_PAGE_FAULT: domain = 0, device id = 0x00a0, fault > address = 0xfdf8f10144 > > I have constructed the attached patch which reverts c/s 23733 > (adjusted for conflicts due to subsequent patches). With this > reversion Xen once more boots on these machines. > > 23733 has been in the tree for some time now, causing this breakage, > and has already been fingered by the automatic bisector and discussed > on xen-devel as the cause of boot failures. I think it is now time to > revert it pending a correct fix to the original problem. > > Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com> > > diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_acpi.c > --- a/xen/drivers/passthrough/amd/iommu_acpi.c Sat Aug 13 10:14:58 2011 > +0100 +++ b/xen/drivers/passthrough/amd/iommu_acpi.c Tue Aug 16 11:57:01 > 2011 +0100 @@ -66,8 +66,15 @@ static void __init add_ivrs_mapping_entr > if (ivrs_mappings[alias_id].intremap_table == NULL ) > { > /* allocate per-device interrupt remapping table */ > - ivrs_mappings[alias_id].intremap_table > + if ( amd_iommu_perdev_intremap ) > + ivrs_mappings[alias_id].intremap_table > amd_iommu_alloc_intremap_table(); > + else > + { > + if ( shared_intremap_table == NULL ) > + shared_intremap_table = amd_iommu_alloc_intremap_table(); > + ivrs_mappings[alias_id].intremap_table > shared_intremap_table; + } > } > /* assgin iommu hardware */ > ivrs_mappings[bdf].iommu = iommu; > diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_init.c > --- a/xen/drivers/passthrough/amd/iommu_init.c Sat Aug 13 10:14:58 2011 > +0100 +++ b/xen/drivers/passthrough/amd/iommu_init.c Tue Aug 16 11:57:01 > 2011 +0100 @@ -500,7 +500,7 @@ static void parse_event_log_entry(u32 en > /* Tell the device to stop DMAing; we can''t rely on the guest to > * control it for us. */ > for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) > - if ( get_requestor_id(bdf) == device_id ) > + if ( get_dma_requestor_id(bdf) == device_id ) > { > cword = pci_conf_read16(PCI_BUS(bdf), PCI_SLOT(bdf), > PCI_FUNC(bdf), PCI_COMMAND); > @@ -772,7 +772,8 @@ static int __init init_ivrs_mapping(void > ivrs_mappings[bdf].dte_ext_int_pass = IOMMU_CONTROL_DISABLED; > ivrs_mappings[bdf].dte_init_pass = IOMMU_CONTROL_DISABLED; > > - spin_lock_init(&ivrs_mappings[bdf].intremap_lock); > + if ( amd_iommu_perdev_intremap ) > + spin_lock_init(&ivrs_mappings[bdf].intremap_lock); > } > return 0; > } > diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_intr.c > --- a/xen/drivers/passthrough/amd/iommu_intr.c Sat Aug 13 10:14:58 2011 > +0100 +++ b/xen/drivers/passthrough/amd/iommu_intr.c Tue Aug 16 11:57:01 > 2011 +0100 @@ -28,10 +28,20 @@ > #define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH) > > int ioapic_bdf[MAX_IO_APICS]; > +void *shared_intremap_table; > +static DEFINE_SPINLOCK(shared_intremap_lock); > > static spinlock_t* get_intremap_lock(int req_id) > { > - return &ivrs_mappings[req_id].intremap_lock; > + return (amd_iommu_perdev_intremap ? > + &ivrs_mappings[req_id].intremap_lock: > + &shared_intremap_lock); > +} > + > +static int get_intremap_requestor_id(int bdf) > +{ > + ASSERT( bdf < ivrs_bdf_entries ); > + return ivrs_mappings[bdf].dte_requestor_id; > } > > static int get_intremap_offset(u8 vector, u8 dm) > @@ -115,7 +125,7 @@ static void update_intremap_entry_from_i > spinlock_t *lock; > int offset; > > - req_id = get_requestor_id(bdf); > + req_id = get_intremap_requestor_id(bdf); > lock = get_intremap_lock(req_id); > > delivery_mode = rte->delivery_mode; > @@ -173,7 +183,7 @@ int __init amd_iommu_setup_ioapic_remapp > continue; > } > > - req_id = get_requestor_id(bdf); > + req_id = get_intremap_requestor_id(bdf); > lock = get_intremap_lock(req_id); > > delivery_mode = rte.delivery_mode; > @@ -273,13 +283,14 @@ static void update_intremap_entry_from_m > { > unsigned long flags; > u32* entry; > - u16 bdf, req_id; > + u16 bdf, req_id, alias_id; > u8 delivery_mode, dest, vector, dest_mode; > spinlock_t *lock; > int offset; > > bdf = (pdev->bus << 8) | pdev->devfn; > - req_id = get_requestor_id(bdf); > + req_id = get_dma_requestor_id(bdf); > + alias_id = get_intremap_requestor_id(bdf); > > if ( msg == NULL ) > { > @@ -288,6 +299,14 @@ static void update_intremap_entry_from_m > free_intremap_entry(req_id, msi_desc->remap_index); > spin_unlock_irqrestore(lock, flags); > > + if ( ( req_id != alias_id ) && > + ivrs_mappings[alias_id].intremap_table != NULL ) > + { > + lock = get_intremap_lock(alias_id); > + spin_lock_irqsave(lock, flags); > + free_intremap_entry(alias_id, msi_desc->remap_index); > + spin_unlock_irqrestore(lock, flags); > + } > goto done; > } > > @@ -305,11 +324,30 @@ static void update_intremap_entry_from_m > update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); > spin_unlock_irqrestore(lock, flags); > > + /* > + * In some special cases, a pci-e device(e.g SATA controller in IDE > mode) + * will use alias id to index interrupt remapping table. > + * We have to setup a secondary interrupt remapping entry to satisfy > those + * devices. > + */ > + > + lock = get_intremap_lock(alias_id); > + if ( ( req_id != alias_id ) && > + ivrs_mappings[alias_id].intremap_table != NULL ) > + { > + spin_lock_irqsave(lock, flags); > + entry = (u32*)get_intremap_entry(alias_id, offset); > + update_intremap_entry(entry, vector, delivery_mode, dest_mode, > dest); + spin_unlock_irqrestore(lock, flags); > + } > + > done: > if ( iommu->enabled ) > { > spin_lock_irqsave(&iommu->lock, flags); > invalidate_interrupt_table(iommu, req_id); > + if ( alias_id != req_id ) > + invalidate_interrupt_table(iommu, alias_id); > flush_command_buffer(iommu); > spin_unlock_irqrestore(&iommu->lock, flags); > } > diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/iommu_map.c > --- a/xen/drivers/passthrough/amd/iommu_map.c Sat Aug 13 10:14:58 2011 > +0100 +++ b/xen/drivers/passthrough/amd/iommu_map.c Tue Aug 16 11:57:01 > 2011 +0100 @@ -543,7 +543,7 @@ static int update_paging_mode(struct dom > for_each_pdev( d, pdev ) > { > bdf = (pdev->bus << 8) | pdev->devfn; > - req_id = get_requestor_id(bdf); > + req_id = get_dma_requestor_id(bdf); > iommu = find_iommu_for_device(bdf); > if ( !iommu ) > { > diff -r 8d6edc3d26d2 xen/drivers/passthrough/amd/pci_amd_iommu.c > --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Sat Aug 13 10:14:58 > 2011 +0100 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Aug > 16 11:57:01 2011 +0100 @@ -34,10 +34,25 @@ struct amd_iommu > *find_iommu_for_device( > return ivrs_mappings[bdf].iommu; > } > > -int get_requestor_id(u16 bdf) > +/* > + * Some devices will use alias id and original device id to index > interrupt + * table and I/O page table respectively. Such devices will have > + * both alias entry and select entry in IVRS structure. > + * > + * Return original device id, if device has valid interrupt remapping > + * table setup for both select entry and alias entry. > + */ > +int get_dma_requestor_id(u16 bdf) > { > + int req_id; > + > BUG_ON ( bdf >= ivrs_bdf_entries ); > - return ivrs_mappings[bdf].dte_requestor_id; > + req_id = ivrs_mappings[bdf].dte_requestor_id; > + if ( (ivrs_mappings[bdf].intremap_table != NULL) && > + (ivrs_mappings[req_id].intremap_table != NULL) ) > + req_id = bdf; > + > + return req_id; > } > > static int is_translation_valid(u32 *entry) > @@ -79,7 +94,7 @@ static void amd_iommu_setup_domain_devic > valid = 0; > > /* get device-table entry */ > - req_id = get_requestor_id(bdf); > + req_id = get_dma_requestor_id(bdf); > dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE); > > spin_lock_irqsave(&iommu->lock, flags); > @@ -252,7 +267,7 @@ static void amd_iommu_disable_domain_dev > int req_id; > > BUG_ON ( iommu->dev_table.buffer == NULL ); > - req_id = get_requestor_id(bdf); > + req_id = get_dma_requestor_id(bdf); > dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE); > > spin_lock_irqsave(&iommu->lock, flags); > @@ -314,7 +329,7 @@ static int amd_iommu_assign_device(struc > static int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn) > { > int bdf = (bus << 8) | devfn; > - int req_id = get_requestor_id(bdf); > + int req_id = get_dma_requestor_id(bdf); > > if ( ivrs_mappings[req_id].unity_map_enable ) > { > @@ -433,7 +448,7 @@ static int amd_iommu_group_id(u8 bus, u8 > int rt; > int bdf = (bus << 8) | devfn; > rt = ( bdf < ivrs_bdf_entries ) ? > - get_requestor_id(bdf) : > + get_dma_requestor_id(bdf) : > bdf; > return rt; > } > diff -r 8d6edc3d26d2 xen/drivers/passthrough/iommu.c > --- a/xen/drivers/passthrough/iommu.c Sat Aug 13 10:14:58 2011 +0100 > +++ b/xen/drivers/passthrough/iommu.c Tue Aug 16 11:57:01 2011 +0100 > @@ -50,6 +50,7 @@ bool_t __read_mostly iommu_hap_pt_share; > bool_t __read_mostly iommu_hap_pt_share; > bool_t __read_mostly iommu_debug; > bool_t __read_mostly iommu_amd_perdev_vector_map = 1; > +bool_t __read_mostly amd_iommu_perdev_intremap; > > static void __init parse_iommu_param(char *s) > { > @@ -76,6 +77,8 @@ static void __init parse_iommu_param(cha > iommu_intremap = 0; > else if ( !strcmp(s, "debug") ) > iommu_debug = 1; > + else if ( !strcmp(s, "amd-iommu-perdev-intremap") ) > + amd_iommu_perdev_intremap = 1; > else if ( !strcmp(s, "dom0-passthrough") ) > iommu_passthrough = 1; > else if ( !strcmp(s, "dom0-strict") ) > diff -r 8d6edc3d26d2 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h > --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Sat Aug 13 10:14:58 > 2011 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Tue Aug > 16 11:57:01 2011 +0100 @@ -65,7 +65,7 @@ void amd_iommu_share_p2m(struct > domain * > void amd_iommu_share_p2m(struct domain *d); > > /* device table functions */ > -int get_requestor_id(u16 bdf); > +int get_dma_requestor_id(u16 bdf); > void amd_iommu_add_dev_table_entry( > u32 *dte, u8 sys_mgt, u8 dev_ex, u8 lint1_pass, u8 lint0_pass, > u8 nmi_pass, u8 ext_int_pass, u8 init_pass); > @@ -97,6 +97,7 @@ unsigned int amd_iommu_read_ioapic_from_ > unsigned int apic, unsigned int reg); > > extern int ioapic_bdf[MAX_IO_APICS]; > +extern void *shared_intremap_table; > > /* power management support */ > void amd_iommu_resume(void); > diff -r 8d6edc3d26d2 xen/include/xen/iommu.h > --- a/xen/include/xen/iommu.h Sat Aug 13 10:14:58 2011 +0100 > +++ b/xen/include/xen/iommu.h Tue Aug 16 11:57:01 2011 +0100 > @@ -32,6 +32,7 @@ extern bool_t iommu_snoop, iommu_qinval, > extern bool_t iommu_snoop, iommu_qinval, iommu_intremap; > extern bool_t iommu_hap_pt_share; > extern bool_t iommu_debug; > +extern bool_t amd_iommu_perdev_intremap; > > extern struct rangeset *mmio_ro_ranges;_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Jackson
2011-Aug-16 12:46 UTC
[Xen-devel] Re: [PATCH] Revert 23733:fbf3768e5934 "AMD IOMMU: remove global ..."
Wei Wang2 writes ("Re: [PATCH] Revert 23733:fbf3768e5934 "AMD IOMMU: remove global ...""):> On you testing system is it possible to disable IDE combined mode for SATA > devices? device id 0xa0 is very likely the id SATA wants to sent interrupt > messages to iommu in IDE mode. Enable AHCI mode or IDE native could fix it.Yes, I could try fiddling with the BIOS settings as a test. But all three of my AMD test machines have this problem and I don''t think an instruction to all users of AMD machines, that they have to do things to their BIOS, is really going to fly. I will try this and report back. Ian. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Jackson
2011-Aug-16 12:59 UTC
[Xen-devel] Re: [PATCH] Revert 23733:fbf3768e5934 "AMD IOMMU: remove global ..."
I wrote:> Wei Wang2 writes ("Re: [PATCH] Revert 23733:fbf3768e5934 "AMD IOMMU: remove global ...""): > > On you testing system is it possible to disable IDE combined mode for SATA > > devices? device id 0xa0 is very likely the id SATA wants to sent interrupt > > messages to iommu in IDE mode. Enable AHCI mode or IDE native could fix it. > > Yes, I could try fiddling with the BIOS settings as a test. But all > three of my AMD test machines have this problem and I don''t think an > instruction to all users of AMD machines, that they have to do things > to their BIOS, is really going to fly. > > I will try this and report back.I went into the BIOS and changed "SATA IDE Combined Mode" from "Enabled" to "Disabled". This did not fix the problem. Screenshot below of the relevant page from the BIOS setup. Ian. Advanced ******************************************************************************** * IDE Configuration * DISABLED: disables the * * *************************************************** * integrated IDE * * OnBoard PCI IDE Controller [Enabled] * Controller. * * OnChip SATA Channel [Enabled] * ENABLED: enables the * * OnChip SATA Type [Native IDE] * integrated IDE * * SATA IDE Combined Mode [Disabled] * Controller. * * * * * * Primary IDE Master : [Not Detected] * * * * Primary IDE Slave : [Not Detected] * * * * Secondary IDE Master : [Not Detected] * * * * Secondary IDE Slave : [Not Detected] * * * * Third IDE Master : [Hard Disk] * * * * Third IDE Slave : [Not Detected] * ****:Move * * * Fourth IDE Master : [Not Detected] * Enter:Select * * * Fourth IDE Slave : [Not Detected] * +/-/:Value * * * F10:Save * * IDE Detect Time Out (Sec) [35] * ESC:Exit * * * F1:General Help * * * F8:Fail-Safe Defaults* * * F9:Optimized Defaults* ******************************************************************************** v02.68 (C)Copyright 1985-2009, American Megatrends, Inc. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Wei Wang2
2011-Aug-16 13:36 UTC
[Xen-devel] Re: [PATCH] Revert 23733:fbf3768e5934 "AMD IOMMU: remove global ..."
Ian, Could you attach a full boot log for from the fault system? It looks like some IVRS entries are wrong and prevent device 0xa0 from using correct device id to find its interrupt remapping table. A global table could hide this issue since every device ends up to the same interrupt table even using wrong device id. Meanwhile, I am OK with reversing 23733 to get system booting. Thanks, Wei On Tuesday 16 August 2011 14:59:26 Ian Jackson wrote:> I wrote: > > Wei Wang2 writes ("Re: [PATCH] Revert 23733:fbf3768e5934 "AMD IOMMU:remove global ...""):> > > On you testing system is it possible to disable IDE combined mode for > > > SATA devices? device id 0xa0 is very likely the id SATA wants to sent > > > interrupt messages to iommu in IDE mode. Enable AHCI mode or IDE native > > > could fix it. > > > > Yes, I could try fiddling with the BIOS settings as a test. But all > > three of my AMD test machines have this problem and I don''t think an > > instruction to all users of AMD machines, that they have to do things > > to their BIOS, is really going to fly. > > > > I will try this and report back. > > I went into the BIOS and changed "SATA IDE Combined Mode" from > "Enabled" to "Disabled". This did not fix the problem. Screenshot > below of the relevant page from the BIOS setup. > > Ian. > > Advanced > *************************************************************************** >***** * IDE Configuration * DISABLED: > disables the * * *************************************************** * > integrated IDE * * OnBoard PCI IDE Controller [Enabled] > * Controller. * * OnChip SATA Channel [Enabled] > * ENABLED: enables the * * OnChip SATA Type [Native > IDE] * integrated IDE * * SATA IDE Combined Mode > [Disabled] * Controller. * * > * * * * Primary IDE Master > : [Not Detected] * * * * Primary IDE > Slave : [Not Detected] * * * * > Secondary IDE Master : [Not Detected] * > * * * Secondary IDE Slave : [Not Detected] * > * * * Third IDE Master : [Hard Disk] * > * * * Third IDE Slave : [Not Detected] * > ****:Move * * * Fourth IDE Master : [Not Detected] > * Enter:Select * * * Fourth IDE Slave : [Not > Detected] * +/-/:Value * * > * F10:Save * * IDE Detect Time Out (Sec) > [35] * ESC:Exit * * > * F1:General Help * * > * F8:Fail-Safe Defaults* * > * F9:Optimized Defaults* > *************************************************************************** >***** v02.68 (C)Copyright 1985-2009, American Megatrends, Inc._______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel