Andrew Cooper
2013-Jun-03  20:44 UTC
[PATCH] AMD/IO-APIC: Use old IO-APIC ack method if AMD 813{1, 2} PCI-X tunnel is present
References at time of patch:
AMD 8132 PCI-X HyperTransport Tunnel
http://support.amd.com/us/ChipsetMotherboard_TechDocs/30801.pdf
AMD 8131 PCI-X HyperTransport Tunnel
http://support.amd.com/us/ChipsetMotherboard_TechDocs/26310_AMD-8131_HyperTransport_PCI-X_Tunnel_Revision_Guide_rev_3_18.pdf
The IO-APICs on the above two PCI-X HyperTransport Tunnels will issue illegal
HT packets if an interrupt gets masked while active.
This is sadly exactly what the "new" IO-APIC ack mode does.  The
"old" ack
mode does however avoid this bad behaviour, and stabilises an affected system
belonging to a customer.
Original patch:
Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
Forward ported and tweaked for upstream:
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
diff -r 2d37d2d652a8 -r d9491f594d14 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -2016,8 +2016,70 @@ static void __init ioapic_pm_state_alloc
     BUG_ON(ioapic_pm_state == NULL);
 }
 
+/* AMD 8131 and 8132 PCI-X HyperTransport Tunnel IO-APICs will issue illegal
+ * HyperTransport packets if an interrupt is masked while active.  This leads
+ * to system instability and crashes with no obvious cause.
+ *
+ * This is sadly the exact behaviour of the "new" IO-APIC ack mode,
but using
+ * "old" mode works around the issue.
+ */
+void __init amd813x_errata_quirks(void)
+{
+    unsigned i, found = 0;
+    uint32_t bus, dev, vendor_id;
+
+    if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
+        return;
+
+    /* Check for the affected IO-APIC version (0x11) to save scanning the PCI
+     * bus on most systems. */
+    for ( i = 0; i < nr_ioapics; ++i )
+    {
+        if ( mp_ioapics[i].mpc_apicver == 0x11 )
+        {
+            found = 1;
+            break;
+        }
+    }
+
+    if ( !found )
+        return;
+
+    for ( bus = 0; bus < 256; ++bus )
+    {
+        for ( dev = 0; dev < 32; ++dev )
+        {
+            /* IO-APIC is always Function 1.  Check for Multifunction. */
+            if ( !( pci_conf_read8(0, bus, dev, 0, PCI_HEADER_TYPE ) & 0x80
) )
+                continue;
+
+            vendor_id = pci_conf_read32(0, bus, dev, 1, PCI_VENDOR_ID);
+
+            /* 0x7451 is AMD-8131 PCI-X IO-APIC */
+            if ( vendor_id == 0x74511022 )
+            {
+                printk(XENLOG_WARNING "Found AMD 8131 PCI-X tunnel. "
+                       "Forcing IO-APIC ack method to
''old'' due to erratum #63\n");
+                ioapic_ack_new = 0;
+                return;
+            }
+            /* 0x7459 is AMD-8132 PCI-X IO-APIC */
+            else if ( vendor_id == 0x74591022 )
+            {
+                printk(XENLOG_WARNING "Found AMD 8132 PCI-X tunnel. "
+                       "Forcing IO-APIC ack method to
''old'' due to erratum #81\n");
+                ioapic_ack_new = 0;
+                return;
+            }
+
+        }
+    }
+}
+
 void __init setup_IO_APIC(void)
 {
+    amd813x_errata_quirks();
+
     enable_IO_APIC();
 
     if (acpi_ioapic)
Jan Beulich
2013-Jun-04  09:03 UTC
Re: [PATCH] AMD/IO-APIC: Use old IO-APIC ack method if AMD 813{1, 2} PCI-X tunnel is present
>>> On 03.06.13 at 22:44, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > References at time of patch: > > AMD 8132 PCI-X HyperTransport Tunnel > http://support.amd.com/us/ChipsetMotherboard_TechDocs/30801.pdf > > AMD 8131 PCI-X HyperTransport Tunnel > http://support.amd.com/us/ChipsetMotherboard_TechDocs/26310_AMD-8131_HyperTra > nsport_PCI-X_Tunnel_Revision_Guide_rev_3_18.pdf > > The IO-APICs on the above two PCI-X HyperTransport Tunnels will issue illegal > HT packets if an interrupt gets masked while active. > > This is sadly exactly what the "new" IO-APIC ack mode does. The "old" ack > mode does however avoid this bad behaviour, and stabilises an affected > system belonging to a customer.Ugly. Can you tell whether any of these systems have more than a single IO-APIC? Because if they don''t, I''d have a much smaller patch (defaulting to "old" on single-IO-APIC systems) that we have been carrying for ages, but that wasn''t liked by Keir when submitted originally.> --- a/xen/arch/x86/io_apic.c > +++ b/xen/arch/x86/io_apic.c > @@ -2016,8 +2016,70 @@ static void __init ioapic_pm_state_alloc > BUG_ON(ioapic_pm_state == NULL); > } > > +/* AMD 8131 and 8132 PCI-X HyperTransport Tunnel IO-APICs will issue illegal > + * HyperTransport packets if an interrupt is masked while active. This leads > + * to system instability and crashes with no obvious cause. > + * > + * This is sadly the exact behaviour of the "new" IO-APIC ack mode, but using > + * "old" mode works around the issue. > + */ > +void __init amd813x_errata_quirks(void) > +{ > + unsigned i, found = 0; > + uint32_t bus, dev, vendor_id; > + > + if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ) > + return; > + > + /* Check for the affected IO-APIC version (0x11) to save scanning the PCI > + * bus on most systems. */ > + for ( i = 0; i < nr_ioapics; ++i ) > + { > + if ( mp_ioapics[i].mpc_apicver == 0x11 ) > + { > + found = 1; > + break; > + } > + } > + > + if ( !found ) > + return; > + > + for ( bus = 0; bus < 256; ++bus ) > + { > + for ( dev = 0; dev < 32; ++dev ) > + { > + /* IO-APIC is always Function 1. Check for Multifunction. */ > + if ( !( pci_conf_read8(0, bus, dev, 0, PCI_HEADER_TYPE ) & 0x80 ) ) > + continue; > + > + vendor_id = pci_conf_read32(0, bus, dev, 1, PCI_VENDOR_ID); > + > + /* 0x7451 is AMD-8131 PCI-X IO-APIC */ > + if ( vendor_id == 0x74511022 ) > + { > + printk(XENLOG_WARNING "Found AMD 8131 PCI-X tunnel. " > + "Forcing IO-APIC ack method to ''old'' due to erratum #63\n"); > + ioapic_ack_new = 0; > + return; > + } > + /* 0x7459 is AMD-8132 PCI-X IO-APIC */ > + else if ( vendor_id == 0x74591022 ) > + { > + printk(XENLOG_WARNING "Found AMD 8132 PCI-X tunnel. " > + "Forcing IO-APIC ack method to ''old'' due to erratum #81\n"); > + ioapic_ack_new = 0; > + return; > + } > + > + } > + } > +} > + > void __init setup_IO_APIC(void) > { > + amd813x_errata_quirks(); > +Just like said on another patch of yours recently - you shouldn''t override command line options. Jan
Andrew Cooper
2013-Jun-04  09:06 UTC
Re: [PATCH] AMD/IO-APIC: Use old IO-APIC ack method if AMD 813{1, 2} PCI-X tunnel is present
On 04/06/13 10:03, Jan Beulich wrote:>>>> On 03.06.13 at 22:44, Andrew Cooper <andrew.cooper3@citrix.com> wrote: >> References at time of patch: >> >> AMD 8132 PCI-X HyperTransport Tunnel >> http://support.amd.com/us/ChipsetMotherboard_TechDocs/30801.pdf >> >> AMD 8131 PCI-X HyperTransport Tunnel >> http://support.amd.com/us/ChipsetMotherboard_TechDocs/26310_AMD-8131_HyperTra >> nsport_PCI-X_Tunnel_Revision_Guide_rev_3_18.pdf >> >> The IO-APICs on the above two PCI-X HyperTransport Tunnels will issue illegal >> HT packets if an interrupt gets masked while active. >> >> This is sadly exactly what the "new" IO-APIC ack mode does. The "old" ack >> mode does however avoid this bad behaviour, and stabilises an affected >> system belonging to a customer. > Ugly. > > Can you tell whether any of these systems have more than a single > IO-APIC? Because if they don''t, I''d have a much smaller patch > (defaulting to "old" on single-IO-APIC systems) that we have been > carrying for ages, but that wasn''t liked by Keir when submitted > originally.The system which caused us the issue has 3 IO-APICs. Two of these tunnels and a southbridge one.> >> --- a/xen/arch/x86/io_apic.c >> +++ b/xen/arch/x86/io_apic.c >> @@ -2016,8 +2016,70 @@ static void __init ioapic_pm_state_alloc >> BUG_ON(ioapic_pm_state == NULL); >> } >> >> +/* AMD 8131 and 8132 PCI-X HyperTransport Tunnel IO-APICs will issue illegal >> + * HyperTransport packets if an interrupt is masked while active. This leads >> + * to system instability and crashes with no obvious cause. >> + * >> + * This is sadly the exact behaviour of the "new" IO-APIC ack mode, but using >> + * "old" mode works around the issue. >> + */ >> +void __init amd813x_errata_quirks(void) >> +{ >> + unsigned i, found = 0; >> + uint32_t bus, dev, vendor_id; >> + >> + if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ) >> + return; >> + >> + /* Check for the affected IO-APIC version (0x11) to save scanning the PCI >> + * bus on most systems. */ >> + for ( i = 0; i < nr_ioapics; ++i ) >> + { >> + if ( mp_ioapics[i].mpc_apicver == 0x11 ) >> + { >> + found = 1; >> + break; >> + } >> + } >> + >> + if ( !found ) >> + return; >> + >> + for ( bus = 0; bus < 256; ++bus ) >> + { >> + for ( dev = 0; dev < 32; ++dev ) >> + { >> + /* IO-APIC is always Function 1. Check for Multifunction. */ >> + if ( !( pci_conf_read8(0, bus, dev, 0, PCI_HEADER_TYPE ) & 0x80 ) ) >> + continue; >> + >> + vendor_id = pci_conf_read32(0, bus, dev, 1, PCI_VENDOR_ID); >> + >> + /* 0x7451 is AMD-8131 PCI-X IO-APIC */ >> + if ( vendor_id == 0x74511022 ) >> + { >> + printk(XENLOG_WARNING "Found AMD 8131 PCI-X tunnel. " >> + "Forcing IO-APIC ack method to ''old'' due to erratum #63\n"); >> + ioapic_ack_new = 0; >> + return; >> + } >> + /* 0x7459 is AMD-8132 PCI-X IO-APIC */ >> + else if ( vendor_id == 0x74591022 ) >> + { >> + printk(XENLOG_WARNING "Found AMD 8132 PCI-X tunnel. " >> + "Forcing IO-APIC ack method to ''old'' due to erratum #81\n"); >> + ioapic_ack_new = 0; >> + return; >> + } >> + >> + } >> + } >> +} >> + >> void __init setup_IO_APIC(void) >> { >> + amd813x_errata_quirks(); >> + > Just like said on another patch of yours recently - you shouldn''t > override command line options. > > Jan >OK - I will tweak it a little. ~Andrew