Fix vmce addr/misc wrmsr bug This patch fix a bug related to wrmsr vmce bank addr/misc registers, since they are not read-only. Intel SDM recommanded os mce driver clear bank status/addr/misc. So guest mce driver may clear addr and misc registers. Under such case, old vmce wrmsr logic would generate a #GP fault at guest mce context, and make guest crash. Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> diff -r 755f440b3b78 xen/arch/x86/cpu/mcheck/vmce.c --- a/xen/arch/x86/cpu/mcheck/vmce.c Wed Apr 18 18:33:36 2012 +0800 +++ b/xen/arch/x86/cpu/mcheck/vmce.c Sun May 06 03:05:20 2012 +0800 @@ -209,6 +209,14 @@ struct domain_mca_msrs *vmce = dom_vmce(v->domain); struct bank_entry *entry = NULL; + /* Give the first entry of the list, it corresponds to current + * vMCE# injection. When vMCE# is finished processing by the + * the guest, this node will be deleted. + * Only error bank is written. Non-error banks simply return. + */ + if ( !list_empty(&vmce->impact_header) ) + entry = list_entry(vmce->impact_header.next, struct bank_entry, list); + switch ( msr & (MSR_IA32_MC0_CTL | 3) ) { case MSR_IA32_MC0_CTL: @@ -216,32 +224,28 @@ vmce->mci_ctl[bank] = val; break; case MSR_IA32_MC0_STATUS: - /* Give the first entry of the list, it corresponds to current - * vMCE# injection. When vMCE# is finished processing by the - * the guest, this node will be deleted. - * Only error bank is written. Non-error banks simply return. - */ - if ( !list_empty(&vmce->impact_header) ) + if ( entry && (entry->bank == bank) ) { - entry = list_entry(vmce->impact_header.next, - struct bank_entry, list); - if ( entry->bank == bank ) - entry->mci_status = val; + entry->mci_status = val; mce_printk(MCE_VERBOSE, - "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", - bank, val); + "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", bank, val); } - else - mce_printk(MCE_VERBOSE, - "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val); break; case MSR_IA32_MC0_ADDR: - mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n", bank); - ret = -1; + if ( entry && (entry->bank == bank) ) + { + entry->mci_addr = val; + mce_printk(MCE_VERBOSE, + "MCE: wr MC%u_ADDR %"PRIx64" in vMCE#\n", bank, val); + } break; case MSR_IA32_MC0_MISC: - mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n", bank); - ret = -1; + if ( entry && (entry->bank == bank) ) + { + entry->mci_misc = val; + mce_printk(MCE_VERBOSE, + "MCE: wr MC%u_MISC %"PRIx64" in vMCE#\n", bank, val); + } break; default: switch ( boot_cpu_data.x86_vendor ) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
>>> On 06.05.12 at 14:13, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Fix vmce addr/misc wrmsr bug > > This patch fix a bug related to wrmsr vmce bank addr/misc > registers, since they are not read-only. > > Intel SDM recommanded os mce driver clear bank status/addr/misc. > So guest mce driver may clear addr and misc registers. > Under such case, old vmce wrmsr logic would generate > a #GP fault at guest mce context, and make guest crash. > > Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> > > diff -r 755f440b3b78 xen/arch/x86/cpu/mcheck/vmce.c > --- a/xen/arch/x86/cpu/mcheck/vmce.c Wed Apr 18 18:33:36 2012 +0800 > +++ b/xen/arch/x86/cpu/mcheck/vmce.c Sun May 06 03:05:20 2012 +0800 > @@ -209,6 +209,14 @@ > struct domain_mca_msrs *vmce = dom_vmce(v->domain); > struct bank_entry *entry = NULL; > > + /* Give the first entry of the list, it corresponds to current > + * vMCE# injection. When vMCE# is finished processing by the > + * the guest, this node will be deleted. > + * Only error bank is written. Non-error banks simply return. > + */ > + if ( !list_empty(&vmce->impact_header) ) > + entry = list_entry(vmce->impact_header.next, struct bank_entry, list); > + > switch ( msr & (MSR_IA32_MC0_CTL | 3) ) > { > case MSR_IA32_MC0_CTL: > @@ -216,32 +224,28 @@ > vmce->mci_ctl[bank] = val; > break; > case MSR_IA32_MC0_STATUS: > - /* Give the first entry of the list, it corresponds to current > - * vMCE# injection. When vMCE# is finished processing by the > - * the guest, this node will be deleted. > - * Only error bank is written. Non-error banks simply return. > - */ > - if ( !list_empty(&vmce->impact_header) ) > + if ( entry && (entry->bank == bank) ) > { > - entry = list_entry(vmce->impact_header.next, > - struct bank_entry, list); > - if ( entry->bank == bank ) > - entry->mci_status = val; > + entry->mci_status = val; > mce_printk(MCE_VERBOSE, > - "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", > - bank, val); > + "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", bank, val); > } > - else > - mce_printk(MCE_VERBOSE, > - "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val);Why do you delete this printk()? It''ll be impossible to track down ignored guest writes, should those cause a problem in the guest.> break; > case MSR_IA32_MC0_ADDR: > - mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n", bank); > - ret = -1; > + if ( entry && (entry->bank == bank) ) > + { > + entry->mci_addr = val; > + mce_printk(MCE_VERBOSE, > + "MCE: wr MC%u_ADDR %"PRIx64" in vMCE#\n", bank, val); > + }The patch description talks only about clearing the register, yet you silently accept all writes here. Would real hardware accept writes of other than zero? Further, just as above, ignore writes won''t be possible to track down.> break; > case MSR_IA32_MC0_MISC: > - mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n", bank); > - ret = -1; > + if ( entry && (entry->bank == bank) ) > + { > + entry->mci_misc = val; > + mce_printk(MCE_VERBOSE, > + "MCE: wr MC%u_MISC %"PRIx64" in vMCE#\n", bank, val); > + }Same here in both respects. Jan> break; > default: > switch ( boot_cpu_data.x86_vendor )
Jan Beulich wrote:>>>> On 06.05.12 at 14:13, "Liu, Jinsong" <jinsong.liu@intel.com> >>>> wrote: Fix vmce addr/misc wrmsr bug >> >> This patch fix a bug related to wrmsr vmce bank addr/misc >> registers, since they are not read-only. >> >> Intel SDM recommanded os mce driver clear bank status/addr/misc. >> So guest mce driver may clear addr and misc registers. >> Under such case, old vmce wrmsr logic would generate >> a #GP fault at guest mce context, and make guest crash. >> >> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> >> >> diff -r 755f440b3b78 xen/arch/x86/cpu/mcheck/vmce.c >> --- a/xen/arch/x86/cpu/mcheck/vmce.c Wed Apr 18 18:33:36 2012 +0800 >> +++ b/xen/arch/x86/cpu/mcheck/vmce.c Sun May 06 03:05:20 2012 +0800 >> @@ -209,6 +209,14 @@ struct domain_mca_msrs *vmce >> dom_vmce(v->domain); struct bank_entry *entry = NULL; >> >> + /* Give the first entry of the list, it corresponds to current >> + * vMCE# injection. When vMCE# is finished processing by the >> + * the guest, this node will be deleted. >> + * Only error bank is written. Non-error banks simply return. + >> */ + if ( !list_empty(&vmce->impact_header) ) >> + entry = list_entry(vmce->impact_header.next, struct >> bank_entry, list); + switch ( msr & (MSR_IA32_MC0_CTL | 3) ) >> { >> case MSR_IA32_MC0_CTL: >> @@ -216,32 +224,28 @@ >> vmce->mci_ctl[bank] = val; >> break; >> case MSR_IA32_MC0_STATUS: >> - /* Give the first entry of the list, it corresponds to >> current >> - * vMCE# injection. When vMCE# is finished processing by the >> - * the guest, this node will be deleted. >> - * Only error bank is written. Non-error banks simply >> return. >> - */ >> - if ( !list_empty(&vmce->impact_header) ) >> + if ( entry && (entry->bank == bank) ) >> { >> - entry = list_entry(vmce->impact_header.next, >> - struct bank_entry, list); >> - if ( entry->bank == bank ) >> - entry->mci_status = val; >> + entry->mci_status = val; >> mce_printk(MCE_VERBOSE, >> - "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", >> - bank, val); >> + "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", >> bank, val); } >> - else >> - mce_printk(MCE_VERBOSE, >> - "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, >> val); > > Why do you delete this printk()? It''ll be impossible to track down > ignored guest writes, should those cause a problem in the guest.OK, will add printk.> >> break; >> case MSR_IA32_MC0_ADDR: >> - mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n", >> bank); >> - ret = -1; >> + if ( entry && (entry->bank == bank) ) >> + { >> + entry->mci_addr = val; >> + mce_printk(MCE_VERBOSE, >> + "MCE: wr MC%u_ADDR %"PRIx64" in vMCE#\n", >> bank, val); + } > > The patch description talks only about clearing the register, yet you > silently accept all writes here. Would real hardware accept writes of > other than zero?Yes, except write all 1''s would cause GP. Will add code to handle writing all 1''s case.> > Further, just as above, ignore writes won''t be possible to track down. > >> break; >> case MSR_IA32_MC0_MISC: >> - mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n", >> bank); >> - ret = -1; >> + if ( entry && (entry->bank == bank) ) >> + { >> + entry->mci_misc = val; >> + mce_printk(MCE_VERBOSE, >> + "MCE: wr MC%u_MISC %"PRIx64" in vMCE#\n", >> bank, val); + } > > Same here in both respects.Same as above. Thanks, Jinsong