Reference: http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/xeon-e5-family-spec-update.pdf Disable x2apic on affected systems. This requires exposing apic_boot_mode outside of apic.c Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> diff -r be00287ccdf0 -r 89ba0b0192c4 xen/arch/x86/apic.c --- a/xen/arch/x86/apic.c +++ b/xen/arch/x86/apic.c @@ -78,7 +78,7 @@ boolean_param("x2apic", opt_x2apic); * Bootstrap processor local APIC boot mode - so we can undo our changes * to the APIC state. */ -static enum apic_mode apic_boot_mode = APIC_MODE_INVALID; +enum apic_mode apic_boot_mode = APIC_MODE_INVALID; bool_t __read_mostly x2apic_enabled = 0; bool_t __read_mostly directed_eoi_enabled = 0; diff -r be00287ccdf0 -r 89ba0b0192c4 xen/arch/x86/genapic/probe.c --- a/xen/arch/x86/genapic/probe.c +++ b/xen/arch/x86/genapic/probe.c @@ -56,12 +56,51 @@ static void __init genapic_apic_force(ch } custom_param("apic", genapic_apic_force); +/* Xeon E5 Family processors (Sandy-Bridge) suffer from erratum BT98, which + * affects Stepping C-1, but is reported fixed in Stepping C-2. + * + * This causes system instability when using x2apic and VT-d queued + * invalidation. The workaround is to disable x2apic and VT-d. + */ +static void __init smb_bt98_erratum(void) +{ + const struct cpuinfo_x86 *c = &boot_cpu_data; + + if (!(c->x86_vendor == X86_VENDOR_INTEL && + c->x86 == 6 && + c->x86_model == 0x2d && + c->x86_mask == 0x6)) + return; + + printk(KERN_WARNING + "Disabling x2apic due to Sandy-Bridge BT98 erratum\n"); + + clear_bit(X86_FEATURE_X2APIC, boot_cpu_data.x86_capability); + x2apic_enabled = 0; + + /* If the BIOS started us in x2apic mode, switch back to xapic. */ + if (apic_boot_mode == APIC_MODE_X2APIC) { + uint64_t msr; + + rdmsrl(MSR_IA32_APICBASE, msr); + msr &= ~(MSR_IA32_APICBASE_ENABLE|MSR_IA32_APICBASE_EXTD); + wrmsrl(MSR_IA32_APICBASE, msr); + msr |= MSR_IA32_APICBASE_ENABLE; + wrmsrl(MSR_IA32_APICBASE, msr); + + apic_boot_mode = APIC_MODE_XAPIC; + } +} + void __init generic_apic_probe(void) { int i, changed; record_boot_APIC_mode(); + /* Must be before check_x2apic_preenabled() */ + smb_bt98_erratum(); + check_x2apic_preenabled(); cmdline_apic = changed = (genapic != NULL); diff -r be00287ccdf0 -r 89ba0b0192c4 xen/include/asm-x86/apic.h --- a/xen/include/asm-x86/apic.h +++ b/xen/include/asm-x86/apic.h @@ -28,6 +28,7 @@ enum apic_mode { APIC_MODE_XAPIC, /* xAPIC mode - default upon chipset reset */ APIC_MODE_X2APIC /* x2APIC mode - common for large MP machines */ }; +extern enum apic_mode apic_boot_mode; extern u8 apic_verbosity; extern bool_t x2apic_enabled;
Reference: http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/xeon-e5-family-spec-update.pdf Disable the IOMMU on affected systems. Specifying iommu=no-intremap on the Xen command line will allow use of basic VT-d functionality without suffering the system instability, albeit with the security problems associated with disabling interrupt remapping. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> diff -r 89ba0b0192c4 -r 9c2aed177e25 xen/drivers/passthrough/vtd/quirks.c --- a/xen/drivers/passthrough/vtd/quirks.c +++ b/xen/drivers/passthrough/vtd/quirks.c @@ -267,6 +267,27 @@ static void __init tylersburg_intremap_q } } +/* Xeon E5 Family processors (Sandy-Bridge) suffer from erratum BT98, which + * affects Stepping C-1, but is reported fixed in Stepping C-2. + * + * This causes system instability when using x2apic and VT-d queued + * invalidation. The workaround is to disable x2apic and VT-d. + */ +static void __init snb_bt98_erratum(void) +{ + const struct cpuinfo_x86 *c = &boot_cpu_data; + + if ( !( c->x86_vendor == X86_VENDOR_INTEL && + c->x86 == 6 && + c->x86_model == 0x2d && + c->x86_mask == 0x6 ) ) + return; + + printk(XENLOG_WARNING VTDPREFIX + " Disabling IOMMU due to Sandy-Bridge BT98 erratum\n"); + iommu_enable = 0; +} + /* initialize platform identification flags */ void __init platform_quirks_init(void) { @@ -288,9 +309,12 @@ void __init platform_quirks_init(void) /* ioremap IGD MMIO+0x2000 page */ map_igd_reg(); - /* Tylersburg interrupt remap quirk */ + /* Interrupt remapping quirks */ if ( iommu_intremap ) + { tylersburg_intremap_quirk(); + snb_bt98_erratum(); + } } /*
Jan Beulich
2013-Apr-29 09:04 UTC
Re: [PATCH 1 of 2] x86/x2apic: Sandy-Bridge BT98 Erratum
>>> On 26.04.13 at 20:11, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > +/* Xeon E5 Family processors (Sandy-Bridge) suffer from erratum BT98, which > + * affects Stepping C-1, but is reported fixed in Stepping C-2. > + * > + * This causes system instability when using x2apic and VT-d queued > + * invalidation. The workaround is to disable x2apic and VT-d. > + */ > +static void __init smb_bt98_erratum(void)snb_bt98_erratum() perhaps? And is this really limited to just one specific product line (usually the same microarchitecture gets used for multiple ones, and then the BT98 reference would become confusing)?> +{ > + const struct cpuinfo_x86 *c = &boot_cpu_data; > + > + if (!(c->x86_vendor == X86_VENDOR_INTEL && > + c->x86 == 6 && > + c->x86_model == 0x2d && > + c->x86_mask == 0x6)) > + return; > + > + printk(KERN_WARNING > + "Disabling x2apic due to Sandy-Bridge BT98 erratum\n"); > + > + clear_bit(X86_FEATURE_X2APIC, boot_cpu_data.x86_capability); > + x2apic_enabled = 0; > +Up here it''s fine.> + /* If the BIOS started us in x2apic mode, switch back to xapic. */ > + if (apic_boot_mode == APIC_MODE_X2APIC) { > + uint64_t msr; > + > + rdmsrl(MSR_IA32_APICBASE, msr); > + msr &= ~(MSR_IA32_APICBASE_ENABLE|MSR_IA32_APICBASE_EXTD); > + wrmsrl(MSR_IA32_APICBASE, msr); > + msr |= MSR_IA32_APICBASE_ENABLE; > + wrmsrl(MSR_IA32_APICBASE, msr); > + > + apic_boot_mode = APIC_MODE_XAPIC; > + }But how good are our chances that this won''t cause subsequent problems? The BIOS may e.g. have assigned APIC IDs wider than what xAPIC can deal with. I think we ought to consider denying to boot in that case altogether. How does Linux deal with the erratum?> +} > + > void __init generic_apic_probe(void) > { > int i, changed; > > record_boot_APIC_mode(); > > + /* Must be before check_x2apic_preenabled() */Indentation (in case you need to resubmit anyway).> + smb_bt98_erratum();Perhaps worth moving into check_x2apic_preenabled() instead, keeping x2apic related code as much as possible in x2apic.c?> + > check_x2apic_preenabled(); > cmdline_apic = changed = (genapic != NULL); >
Andrew Cooper
2013-Apr-29 10:25 UTC
Re: [PATCH 1 of 2] x86/x2apic: Sandy-Bridge BT98 Erratum
On 29/04/13 10:04, Jan Beulich wrote:>>>> On 26.04.13 at 20:11, Andrew Cooper <andrew.cooper3@citrix.com> wrote: >> +/* Xeon E5 Family processors (Sandy-Bridge) suffer from erratum BT98, which >> + * affects Stepping C-1, but is reported fixed in Stepping C-2. >> + * >> + * This causes system instability when using x2apic and VT-d queued >> + * invalidation. The workaround is to disable x2apic and VT-d. >> + */ >> +static void __init smb_bt98_erratum(void) > snb_bt98_erratum() perhaps? And is this really limited to just one > specific product line (usually the same microarchitecture gets used > for multiple ones, and then the BT98 reference would become > confusing)?Oops - I did mean snb. However, I would request guidance from Intel as to what the best naming policy would be.> >> +{ >> + const struct cpuinfo_x86 *c = &boot_cpu_data; >> + >> + if (!(c->x86_vendor == X86_VENDOR_INTEL && >> + c->x86 == 6 && >> + c->x86_model == 0x2d && >> + c->x86_mask == 0x6)) >> + return; >> + >> + printk(KERN_WARNING >> + "Disabling x2apic due to Sandy-Bridge BT98 erratum\n"); >> + >> + clear_bit(X86_FEATURE_X2APIC, boot_cpu_data.x86_capability); >> + x2apic_enabled = 0; >> + > Up here it''s fine. > >> + /* If the BIOS started us in x2apic mode, switch back to xapic. */ >> + if (apic_boot_mode == APIC_MODE_X2APIC) { >> + uint64_t msr; >> + >> + rdmsrl(MSR_IA32_APICBASE, msr); >> + msr &= ~(MSR_IA32_APICBASE_ENABLE|MSR_IA32_APICBASE_EXTD); >> + wrmsrl(MSR_IA32_APICBASE, msr); >> + msr |= MSR_IA32_APICBASE_ENABLE; >> + wrmsrl(MSR_IA32_APICBASE, msr); >> + >> + apic_boot_mode = APIC_MODE_XAPIC; >> + } > But how good are our chances that this won''t cause subsequent > problems? The BIOS may e.g. have assigned APIC IDs wider than > what xAPIC can deal with. I think we ought to consider denying > to boot in that case altogether. How does Linux deal with the > erratum?I guess "Works on my affected silicon" is not a good enough indicator, although in that case it was booting in xapic mode not x2apic mode. A quick look over linux mainline suggests that there is no workaround present for this erratum. As for APIC IDs, the CPU IDs are fixed, and the IO-APIC id on the PCH is 4 bits wide. Unhelpfully, the SDM states that the BIOS should read all the LAPIC IDs, and if any single one is greater than 255, the BIOS must hand over to the OS in x2apic mode. The code in this patch runs before the APs are booted, so querying their LAPIC IDs is somewhat hard. Alternatively, doing a synchronised switch from x2apic to xapic once the APs are running is likely to also be problematic, but might be our only option.> >> +} >> + >> void __init generic_apic_probe(void) >> { >> int i, changed; >> >> record_boot_APIC_mode(); >> >> + /* Must be before check_x2apic_preenabled() */ > Indentation (in case you need to resubmit anyway). > >> + smb_bt98_erratum(); > Perhaps worth moving into check_x2apic_preenabled() instead, > keeping x2apic related code as much as possible in x2apic.c?Ok.> >> + >> check_x2apic_preenabled(); >> cmdline_apic = changed = (genapic != NULL); >> >
>>> On 26.04.13 at 20:11, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > @@ -288,9 +309,12 @@ void __init platform_quirks_init(void) > /* ioremap IGD MMIO+0x2000 page */ > map_igd_reg(); > > - /* Tylersburg interrupt remap quirk */ > + /* Interrupt remapping quirks */ > if ( iommu_intremap ) > + { > tylersburg_intremap_quirk(); > + snb_bt98_erratum(); > + }Considering the nature of the erratum, keying this off iommu_intremap seems wrong - iommu_qinval ought to be used instead - intel_vtd_setup() takes care to disable interrupt remapping when queued invalidation is not available, yet when you make the above conditional upon iommu_intremap, queued invalidation could still get enabled (and used for whatever else purposes, now or in the future). Jan
Andrew Cooper
2013-Apr-29 15:23 UTC
Re: [PATCH 2 of 2] x86/VT-d: Sandy-Bridge BT98 Erratum
On 29/04/13 16:13, Jan Beulich wrote:>>>> On 26.04.13 at 20:11, Andrew Cooper <andrew.cooper3@citrix.com> wrote: >> @@ -288,9 +309,12 @@ void __init platform_quirks_init(void) >> /* ioremap IGD MMIO+0x2000 page */ >> map_igd_reg(); >> >> - /* Tylersburg interrupt remap quirk */ >> + /* Interrupt remapping quirks */ >> if ( iommu_intremap ) >> + { >> tylersburg_intremap_quirk(); >> + snb_bt98_erratum(); >> + } > Considering the nature of the erratum, keying this off > iommu_intremap seems wrong - iommu_qinval ought to be used > instead - intel_vtd_setup() takes care to disable interrupt > remapping when queued invalidation is not available, yet when > you make the above conditional upon iommu_intremap, queued > invalidation could still get enabled (and used for whatever else > purposes, now or in the future). > > Jan >Yes - I mentally had the erratum the wrong way around, as intremap is the visible impact of it. I shall respin once hearing back from Xiantao, especially regarding the first patch. ~Andrew