Jan, Considering our discussion is some confused, I sent this patch as RFC. Comments/suggestions could based on this patch. Thanks, Jinsong ====================Xen/MCE: adjust for future new vMCE model Recently we designed a new vMCE model, and would implement later. In order not to block live migration from current vMCE to future new vMCE, we do some middle work in this patch, basically it does minimal adjustment, including 1. remove MCG_CTL; 2. stick all 1''s to MCi_CTL; 3. set MCG_CTL_P=0 and 2 banks at MCG_CAP; Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> diff -r 4f92bdf3370c xen/arch/x86/cpu/mcheck/mce.c --- a/xen/arch/x86/cpu/mcheck/mce.c Wed Jun 27 09:36:43 2012 +0200 +++ b/xen/arch/x86/cpu/mcheck/mce.c Fri Jul 06 09:34:31 2012 +0800 @@ -843,8 +843,6 @@ mctelem_init(sizeof(struct mc_info)); - vmce_init(c); - /* Turn on MCE now */ set_in_cr4(X86_CR4_MCE); diff -r 4f92bdf3370c xen/arch/x86/cpu/mcheck/mce.h --- a/xen/arch/x86/cpu/mcheck/mce.h Wed Jun 27 09:36:43 2012 +0200 +++ b/xen/arch/x86/cpu/mcheck/mce.h Fri Jul 06 09:34:31 2012 +0800 @@ -170,8 +170,6 @@ int inject_vmce(struct domain *d); int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct mcinfo_global *global); -extern int vmce_init(struct cpuinfo_x86 *c); - static inline int mce_vendor_bank_msr(const struct vcpu *v, uint32_t msr) { if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && diff -r 4f92bdf3370c xen/arch/x86/cpu/mcheck/vmce.c --- a/xen/arch/x86/cpu/mcheck/vmce.c Wed Jun 27 09:36:43 2012 +0200 +++ b/xen/arch/x86/cpu/mcheck/vmce.c Fri Jul 06 09:34:31 2012 +0800 @@ -19,36 +19,33 @@ #include "mce.h" #include "x86_mca.h" +/* + * Emulalte 2 banks for guest + * Bank0: reserved for ''bank0 quirk'' occur at some very old processors: + * 1). Intel cpu whose family-model value < 06-1A; + * 2). AMD K7 + * Bank1: used to transfer error info to guest + */ +#define GUEST_BANK_NUM 2 + #define dom_vmce(x) ((x)->arch.vmca_msrs) static uint64_t __read_mostly g_mcg_cap; -/* Real value in physical CTL MSR */ -static uint64_t __read_mostly h_mcg_ctl; -static uint64_t *__read_mostly h_mci_ctrl; - int vmce_init_msr(struct domain *d) { dom_vmce(d) = xmalloc(struct domain_mca_msrs); if ( !dom_vmce(d) ) return -ENOMEM; - dom_vmce(d)->mci_ctl = xmalloc_array(uint64_t, nr_mce_banks); - if ( !dom_vmce(d)->mci_ctl ) - { - xfree(dom_vmce(d)); - return -ENOMEM; - } - memset(dom_vmce(d)->mci_ctl, ~0, - nr_mce_banks * sizeof(*dom_vmce(d)->mci_ctl)); - dom_vmce(d)->mcg_status = 0x0; - dom_vmce(d)->mcg_ctl = ~(uint64_t)0x0; dom_vmce(d)->nr_injection = 0; INIT_LIST_HEAD(&dom_vmce(d)->impact_header); spin_lock_init(&dom_vmce(d)->lock); + g_mcg_cap = MCG_TES_P | MCG_SER_P | GUEST_BANK_NUM; + return 0; } @@ -56,7 +53,6 @@ { if ( !dom_vmce(d) ) return; - xfree(dom_vmce(d)->mci_ctl); xfree(dom_vmce(d)); dom_vmce(d) = NULL; } @@ -68,7 +64,7 @@ int vmce_restore_vcpu(struct vcpu *v, uint64_t caps) { - if ( caps & ~g_mcg_cap & ~MCG_CAP_COUNT & ~MCG_CTL_P ) + if ( caps & ~g_mcg_cap & ~MCG_CAP_COUNT ) { dprintk(XENLOG_G_ERR, "%s restore: unsupported MCA capabilities" " %#" PRIx64 " for d%d:v%u (supported: %#Lx)\n", @@ -93,9 +89,7 @@ switch ( msr & (MSR_IA32_MC0_CTL | 3) ) { case MSR_IA32_MC0_CTL: - if ( bank < nr_mce_banks ) - *val = vmce->mci_ctl[bank] & - (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL); + *val = ~0UL; mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL 0x%"PRIx64"\n", bank, *val); break; @@ -187,11 +181,9 @@ *val); break; case MSR_IA32_MCG_CTL: - /* Always 0 if no CTL support */ - if ( cur->arch.mcg_cap & MCG_CTL_P ) - *val = vmce->mcg_ctl & h_mcg_ctl; - mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n", - *val); + BUG_ON( cur->arch.mcg_cap & MCG_CTL_P ); + mce_printk(MCE_QUIET, "MCE: no MCG_CTL\n"); + ret = -1; break; default: ret = mce_bank_msr(cur, msr) ? bank_mce_rdmsr(cur, msr, val) : 0; @@ -220,8 +212,10 @@ switch ( msr & (MSR_IA32_MC0_CTL | 3) ) { case MSR_IA32_MC0_CTL: - if ( bank < nr_mce_banks ) - vmce->mci_ctl[bank] = val; + /* + * if guest crazy clear any bit of MCi_CTL, + * treat it as not implement and ignore write change it. + */ break; case MSR_IA32_MC0_STATUS: if ( entry && (entry->bank == bank) ) @@ -305,7 +299,9 @@ switch ( msr ) { case MSR_IA32_MCG_CTL: - vmce->mcg_ctl = val; + BUG_ON( cur->arch.mcg_cap & MCG_CTL_P ); + mce_printk(MCE_QUIET, "MCE: no MCG_CTL\n"); + ret = -1; break; case MSR_IA32_MCG_STATUS: vmce->mcg_status = val; @@ -520,52 +516,6 @@ } #endif -int vmce_init(struct cpuinfo_x86 *c) -{ - u64 value; - unsigned int i; - - if ( !h_mci_ctrl ) - { - h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks); - if (!h_mci_ctrl) - { - dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n"); - return -ENOMEM; - } - /* Don''t care banks before firstbank */ - memset(h_mci_ctrl, ~0, - min(firstbank, nr_mce_banks) * sizeof(*h_mci_ctrl)); - for (i = firstbank; i < nr_mce_banks; i++) - rdmsrl(MSR_IA32_MCx_CTL(i), h_mci_ctrl[i]); - } - - rdmsrl(MSR_IA32_MCG_CAP, value); - /* For Guest vMCE usage */ - g_mcg_cap = value & (MCG_CAP_COUNT | MCG_CTL_P | MCG_TES_P | MCG_SER_P); - if (value & MCG_CTL_P) - rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl); - - return 0; -} - -static int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d) -{ - int bank_nr; - - if ( !bank || !d || !h_mci_ctrl ) - return 1; - - /* Will MCE happen in host if If host mcg_ctl is 0? */ - if ( ~d->arch.vmca_msrs->mcg_ctl & h_mcg_ctl ) - return 1; - - bank_nr = bank->mc_bank; - if (~d->arch.vmca_msrs->mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) - return 1; - return 0; -} - static int is_hvm_vmce_ready(struct mcinfo_bank *bank, struct domain *d) { struct vcpu *v; @@ -619,14 +569,6 @@ if (no_vmce) return 0; - /* Guest has different MCE ctl value setting */ - if (mca_ctl_conflict(bank, d)) - { - dprintk(XENLOG_WARNING, - "No vmce, guest has different mca control setting\n"); - return 0; - } - return 1; } diff -r 4f92bdf3370c xen/include/asm-x86/mce.h --- a/xen/include/asm-x86/mce.h Wed Jun 27 09:36:43 2012 +0200 +++ b/xen/include/asm-x86/mce.h Fri Jul 06 09:34:31 2012 +0800 @@ -16,9 +16,7 @@ struct domain_mca_msrs { /* Guest should not change below values after DOM boot up */ - uint64_t mcg_ctl; uint64_t mcg_status; - uint64_t *mci_ctl; uint16_t nr_injection; struct list_head impact_header; spinlock_t lock; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2012-Jul-06 08:01 UTC
Re: [RFC PATCH] Xen/MCE: adjust for future new vMCE model
>>> On 05.07.12 at 20:33, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Considering our discussion is some confused, I sent this patch as RFC. > Comments/suggestions could based on this patch.It would be a lot easier if this was an incremental one on top of the uncontroversial one. But anyway:> ====================> Xen/MCE: adjust for future new vMCE model > > Recently we designed a new vMCE model, and would implement later. > > In order not to block live migration from current vMCE to future new vMCE, > we do some middle work in this patch, basically it does minimal adjustment, > including > 1. remove MCG_CTL; > 2. stick all 1''s to MCi_CTL; > 3. set MCG_CTL_P=0 and 2 banks at MCG_CAP; > > Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> > > diff -r 4f92bdf3370c xen/arch/x86/cpu/mcheck/mce.c > --- a/xen/arch/x86/cpu/mcheck/mce.c Wed Jun 27 09:36:43 2012 +0200 > +++ b/xen/arch/x86/cpu/mcheck/mce.c Fri Jul 06 09:34:31 2012 +0800 > @@ -843,8 +843,6 @@ > > mctelem_init(sizeof(struct mc_info)); > > - vmce_init(c); > - > /* Turn on MCE now */ > set_in_cr4(X86_CR4_MCE); > > diff -r 4f92bdf3370c xen/arch/x86/cpu/mcheck/mce.h > --- a/xen/arch/x86/cpu/mcheck/mce.h Wed Jun 27 09:36:43 2012 +0200 > +++ b/xen/arch/x86/cpu/mcheck/mce.h Fri Jul 06 09:34:31 2012 +0800 > @@ -170,8 +170,6 @@ > int inject_vmce(struct domain *d); > int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct > mcinfo_global *global); > > -extern int vmce_init(struct cpuinfo_x86 *c); > - > static inline int mce_vendor_bank_msr(const struct vcpu *v, uint32_t msr) > { > if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && > diff -r 4f92bdf3370c xen/arch/x86/cpu/mcheck/vmce.c > --- a/xen/arch/x86/cpu/mcheck/vmce.c Wed Jun 27 09:36:43 2012 +0200 > +++ b/xen/arch/x86/cpu/mcheck/vmce.c Fri Jul 06 09:34:31 2012 +0800 > @@ -19,36 +19,33 @@ > #include "mce.h" > #include "x86_mca.h" > > +/* > + * Emulalte 2 banks for guest > + * Bank0: reserved for ''bank0 quirk'' occur at some very old processors: > + * 1). Intel cpu whose family-model value < 06-1A; > + * 2). AMD K7 > + * Bank1: used to transfer error info to guest > + */ > +#define GUEST_BANK_NUM 2 > + > #define dom_vmce(x) ((x)->arch.vmca_msrs) > > static uint64_t __read_mostly g_mcg_cap; > > -/* Real value in physical CTL MSR */ > -static uint64_t __read_mostly h_mcg_ctl; > -static uint64_t *__read_mostly h_mci_ctrl; > - > int vmce_init_msr(struct domain *d) > { > dom_vmce(d) = xmalloc(struct domain_mca_msrs); > if ( !dom_vmce(d) ) > return -ENOMEM; > > - dom_vmce(d)->mci_ctl = xmalloc_array(uint64_t, nr_mce_banks); > - if ( !dom_vmce(d)->mci_ctl ) > - { > - xfree(dom_vmce(d)); > - return -ENOMEM; > - } > - memset(dom_vmce(d)->mci_ctl, ~0, > - nr_mce_banks * sizeof(*dom_vmce(d)->mci_ctl)); > - > dom_vmce(d)->mcg_status = 0x0; > - dom_vmce(d)->mcg_ctl = ~(uint64_t)0x0; > dom_vmce(d)->nr_injection = 0; > > INIT_LIST_HEAD(&dom_vmce(d)->impact_header); > spin_lock_init(&dom_vmce(d)->lock); > > + g_mcg_cap = MCG_TES_P | MCG_SER_P | GUEST_BANK_NUM;Is there really any point in retaining g_mcg_cap?> + > return 0; > } > > @@ -56,7 +53,6 @@ > { > if ( !dom_vmce(d) ) > return; > - xfree(dom_vmce(d)->mci_ctl); > xfree(dom_vmce(d)); > dom_vmce(d) = NULL; > } > @@ -68,7 +64,7 @@ > > int vmce_restore_vcpu(struct vcpu *v, uint64_t caps) > { > - if ( caps & ~g_mcg_cap & ~MCG_CAP_COUNT & ~MCG_CTL_P ) > + if ( caps & ~g_mcg_cap & ~MCG_CAP_COUNT )As said before, we should permit restoring with MCG_CTL_P set...> { > dprintk(XENLOG_G_ERR, "%s restore: unsupported MCA capabilities" > " %#" PRIx64 " for d%d:v%u (supported: %#Lx)\n", > @@ -93,9 +89,7 @@ > switch ( msr & (MSR_IA32_MC0_CTL | 3) ) > { > case MSR_IA32_MC0_CTL: > - if ( bank < nr_mce_banks ) > - *val = vmce->mci_ctl[bank] & > - (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL); > + *val = ~0UL; > mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL 0x%"PRIx64"\n", > bank, *val); > break; > @@ -187,11 +181,9 @@ > *val); > break; > case MSR_IA32_MCG_CTL: > - /* Always 0 if no CTL support */ > - if ( cur->arch.mcg_cap & MCG_CTL_P ) > - *val = vmce->mcg_ctl & h_mcg_ctl; > - mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n", > - *val); > + BUG_ON( cur->arch.mcg_cap & MCG_CTL_P );... and not BUG_ON() here (instead, once again simply return all ones, which you say shouldn''t surprise guests, as it ought to mean the eventual bits that were tried to get cleared are simply unimplemented. If MCG_CTL_P is clear, behavior would probably best be left as it is currently (returning all zeros), but forcing #GP (i.e. returning -1 from here) would imo also be a valid alternative. Jan> + mce_printk(MCE_QUIET, "MCE: no MCG_CTL\n"); > + ret = -1; > break; > default: > ret = mce_bank_msr(cur, msr) ? bank_mce_rdmsr(cur, msr, val) : 0; > @@ -220,8 +212,10 @@ > switch ( msr & (MSR_IA32_MC0_CTL | 3) ) > { > case MSR_IA32_MC0_CTL: > - if ( bank < nr_mce_banks ) > - vmce->mci_ctl[bank] = val; > + /* > + * if guest crazy clear any bit of MCi_CTL, > + * treat it as not implement and ignore write change it. > + */ > break; > case MSR_IA32_MC0_STATUS: > if ( entry && (entry->bank == bank) ) > @@ -305,7 +299,9 @@ > switch ( msr ) > { > case MSR_IA32_MCG_CTL: > - vmce->mcg_ctl = val; > + BUG_ON( cur->arch.mcg_cap & MCG_CTL_P ); > + mce_printk(MCE_QUIET, "MCE: no MCG_CTL\n"); > + ret = -1; > break; > case MSR_IA32_MCG_STATUS: > vmce->mcg_status = val; > @@ -520,52 +516,6 @@ > } > #endif > > -int vmce_init(struct cpuinfo_x86 *c) > -{ > - u64 value; > - unsigned int i; > - > - if ( !h_mci_ctrl ) > - { > - h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks); > - if (!h_mci_ctrl) > - { > - dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n"); > - return -ENOMEM; > - } > - /* Don''t care banks before firstbank */ > - memset(h_mci_ctrl, ~0, > - min(firstbank, nr_mce_banks) * sizeof(*h_mci_ctrl)); > - for (i = firstbank; i < nr_mce_banks; i++) > - rdmsrl(MSR_IA32_MCx_CTL(i), h_mci_ctrl[i]); > - } > - > - rdmsrl(MSR_IA32_MCG_CAP, value); > - /* For Guest vMCE usage */ > - g_mcg_cap = value & (MCG_CAP_COUNT | MCG_CTL_P | MCG_TES_P | MCG_SER_P); > - if (value & MCG_CTL_P) > - rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl); > - > - return 0; > -} > - > -static int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d) > -{ > - int bank_nr; > - > - if ( !bank || !d || !h_mci_ctrl ) > - return 1; > - > - /* Will MCE happen in host if If host mcg_ctl is 0? */ > - if ( ~d->arch.vmca_msrs->mcg_ctl & h_mcg_ctl ) > - return 1; > - > - bank_nr = bank->mc_bank; > - if (~d->arch.vmca_msrs->mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) > - return 1; > - return 0; > -} > - > static int is_hvm_vmce_ready(struct mcinfo_bank *bank, struct domain *d) > { > struct vcpu *v; > @@ -619,14 +569,6 @@ > if (no_vmce) > return 0; > > - /* Guest has different MCE ctl value setting */ > - if (mca_ctl_conflict(bank, d)) > - { > - dprintk(XENLOG_WARNING, > - "No vmce, guest has different mca control setting\n"); > - return 0; > - } > - > return 1; > } > > diff -r 4f92bdf3370c xen/include/asm-x86/mce.h > --- a/xen/include/asm-x86/mce.h Wed Jun 27 09:36:43 2012 +0200 > +++ b/xen/include/asm-x86/mce.h Fri Jul 06 09:34:31 2012 +0800 > @@ -16,9 +16,7 @@ > struct domain_mca_msrs > { > /* Guest should not change below values after DOM boot up */ > - uint64_t mcg_ctl; > uint64_t mcg_status; > - uint64_t *mci_ctl; > uint16_t nr_injection; > struct list_head impact_header; > spinlock_t lock;