Jiang, Yunhong
2010-Jan-28 05:55 UTC
[Xen-devel] [PATCH 2/6] MCE: Not GP fault when guest write non 0s or 1s to MCA CTL MSRs.
Not GP fault when guest write non 0s or 1s to MCA CTL MSRs. a) For Mci_CTL MSR, Guest can write any value to it. When read back, it will be ANDed with the physical value. Some bit in physical value can be 0, either because read-only in hardware (like masked by AMD''s Mci_CTL_MASK), or because Xen didn''t enable it. If guest write some bit as 0, while that bit is 1 in host, we will not inject MCE corresponding that bank to guest, as we can''t distinguish if the MCE is caused by the guest-cleared bit. b) For MCG_CTL MSR, guest can write any value to it. When read back, it will be ANDed with the physical value. If guest does not write all 1s. In mca_ctl_conflict(), we simply not inject any vMCE to guest if some bit is set in physical MSR while is cleared in guest ''s vMCG_CTL MSR. Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> diff -r b2a7600b71cc xen/arch/x86/cpu/mcheck/mce.c --- a/xen/arch/x86/cpu/mcheck/mce.c Tue Jan 26 00:52:19 2010 +0800 +++ b/xen/arch/x86/cpu/mcheck/mce.c Tue Jan 26 00:57:19 2010 +0800 @@ -30,6 +30,11 @@ unsigned int nr_mce_banks; unsigned int nr_mce_banks; static uint64_t g_mcg_cap; + +/* Real value in physical CTL MSR */ +static uint64_t h_mcg_ctl = 0UL; +static uint64_t *h_mci_ctrl; +int firstbank; static void intpose_init(void); static void mcinfo_clear(struct mc_info *); @@ -642,6 +647,21 @@ void mcheck_init(struct cpuinfo_x86 *c) break; } + if ( !h_mci_ctrl ) + { + h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks); + if (!h_mci_ctrl) + { + dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n"); + return; + } + /* Don''t care banks before firstbank */ + memset(h_mci_ctrl, 0xff, sizeof(h_mci_ctrl)); + for (i = firstbank; i < nr_mce_banks; i++) + rdmsrl(MSR_IA32_MC0_CTL + 4*i, h_mci_ctrl[i]); + } + if (g_mcg_cap & MCG_CTL_P) + rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl); set_poll_bankmask(c); if (!inited) printk(XENLOG_INFO "CPU%i: No machine check initialization\n", @@ -708,7 +728,8 @@ int mce_rdmsr(uint32_t msr, uint64_t *va *val); break; case MSR_IA32_MCG_CTL: - *val = d->arch.vmca_msrs.mcg_ctl; + /* Always 0 if no CTL support */ + *val = d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl; mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n", *val); break; @@ -723,7 +744,8 @@ int mce_rdmsr(uint32_t msr, uint64_t *va switch (msr & (MSR_IA32_MC0_CTL | 3)) { case MSR_IA32_MC0_CTL: - *val = d->arch.vmca_msrs.mci_ctl[bank]; + *val = d->arch.vmca_msrs.mci_ctl[bank] & + (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL); mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL 0x%"PRIx64"\n", bank, *val); break; @@ -805,13 +827,6 @@ int mce_wrmsr(u32 msr, u64 val) switch ( msr ) { case MSR_IA32_MCG_CTL: - if ( val && (val + 1) ) - { - mce_printk(MCE_QUIET, "MCE: val \"%"PRIx64"\" written " - "to MCG_CTL should be all 0s or 1s\n", val); - ret = -1; - break; - } d->arch.vmca_msrs.mcg_ctl = val; break; case MSR_IA32_MCG_STATUS: @@ -855,14 +870,6 @@ int mce_wrmsr(u32 msr, u64 val) switch ( msr & (MSR_IA32_MC0_CTL | 3) ) { case MSR_IA32_MC0_CTL: - if ( val && (val + 1) ) - { - mce_printk(MCE_QUIET, "MCE: val written to MC%u_CTL " - "should be all 0s or 1s (is %"PRIx64")\n", - bank, val); - ret = -1; - break; - } d->arch.vmca_msrs.mci_ctl[bank] = val; break; case MSR_IA32_MC0_STATUS: @@ -1162,6 +1169,23 @@ void intpose_inval(unsigned int cpu_nr, (r) <= MSR_IA32_MC0_MISC + (nr_mce_banks - 1) * 4 && \ ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */ +int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d) +{ + int bank_nr; + + if ( !bank || !d || !h_mci_ctrl ) + return 1; + + /* Will MCE happen in host if If host mcg_ctl is 0? */ + if ( ~d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl ) + return 1; + + bank_nr = bank->mc_bank; + if (~d->arch.vmca_msrs.mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) + return 1; + return 0; +} + static int x86_mc_msrinject_verify(struct xen_mc_msrinject *mci) { struct cpuinfo_x86 *c; diff -r b2a7600b71cc xen/arch/x86/cpu/mcheck/mce.h --- a/xen/arch/x86/cpu/mcheck/mce.h Tue Jan 26 00:52:19 2010 +0800 +++ b/xen/arch/x86/cpu/mcheck/mce.h Tue Jan 26 00:52:20 2010 +0800 @@ -39,6 +39,8 @@ void amd_nonfatal_mcheck_init(struct cpu void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c); u64 mce_cap_init(void); +extern int firstbank; +int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d); int intel_mce_rdmsr(uint32_t msr, uint64_t *val); int intel_mce_wrmsr(uint32_t msr, uint64_t val); diff -r b2a7600b71cc xen/arch/x86/cpu/mcheck/mce_intel.c --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Tue Jan 26 00:52:19 2010 +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Tue Jan 26 00:52:20 2010 +0800 @@ -20,7 +20,6 @@ int ser_support = 0; int ser_support = 0; static int nr_intel_ext_msrs = 0; -static int firstbank; /* Below are for MCE handling */ struct mce_softirq_barrier { @@ -361,7 +360,15 @@ static void intel_UCR_handler(struct mci * the mfn in question) */ BUG_ON( result->owner == DOMID_COW ); if ( result->owner != DOMID_XEN ) { + d = get_domain_by_id(result->owner); + if ( mca_ctl_conflict(bank, d) ) + { + /* Guest has different MCE ctl with hypervisor */ + put_domain(d); + return; + } + gfn mfn_to_gmfn(d, ((bank->mc_addr) >> PAGE_SHIFT)); bank->mc_addr _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Christoph Egger
2010-Jan-28 08:11 UTC
[Xen-devel] Re: [PATCH 2/6] MCE: Not GP fault when guest write non 0s or 1s to MCA CTL MSRs.
On Thursday 28 January 2010 06:55:53 Jiang, Yunhong wrote:> Not GP fault when guest write non 0s or 1s to MCA CTL MSRs. > > a) For Mci_CTL MSR, Guest can write any value to it. When read back, it > will be ANDed with the physical value. Some bit in physical value can be 0, > either because read-only in hardware (like masked by AMD''s Mci_CTL_MASK), > or because Xen didn''t enable it. If guest write some bit as 0, while that > bit is 1 in host, we will not inject MCE corresponding that bank to guest, > as we can''t distinguish if the MCE is caused by the guest-cleared bit. > > b) For MCG_CTL MSR, guest can write any value to it. When read back, it > will be ANDed with the physical value. If guest does not write all 1s. In > mca_ctl_conflict(), we simply not inject any vMCE to guest if some bit is > set in physical MSR while is cleared in guest ''s vMCG_CTL MSR. > > Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> > > > diff -r b2a7600b71cc xen/arch/x86/cpu/mcheck/mce.c > --- a/xen/arch/x86/cpu/mcheck/mce.c Tue Jan 26 00:52:19 2010 +0800 > +++ b/xen/arch/x86/cpu/mcheck/mce.c Tue Jan 26 00:57:19 2010 +0800 > @@ -30,6 +30,11 @@ unsigned int nr_mce_banks; > unsigned int nr_mce_banks; > > static uint64_t g_mcg_cap; > + > +/* Real value in physical CTL MSR */ > +static uint64_t h_mcg_ctl = 0UL; > +static uint64_t *h_mci_ctrl; > +int firstbank;The physical MSRs are per physical cpu-core on AMD CPUs. The data structure does not reflect this. Other than that this patch is fine with me. Christoph> > static void intpose_init(void); > static void mcinfo_clear(struct mc_info *); > @@ -642,6 +647,21 @@ void mcheck_init(struct cpuinfo_x86 *c) > break; > } > > + if ( !h_mci_ctrl ) > + { > + h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks); > + if (!h_mci_ctrl) > + { > + dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n"); > + return; > + } > + /* Don''t care banks before firstbank */ > + memset(h_mci_ctrl, 0xff, sizeof(h_mci_ctrl)); > + for (i = firstbank; i < nr_mce_banks; i++) > + rdmsrl(MSR_IA32_MC0_CTL + 4*i, h_mci_ctrl[i]); > + } > + if (g_mcg_cap & MCG_CTL_P) > + rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl); > set_poll_bankmask(c); > if (!inited) > printk(XENLOG_INFO "CPU%i: No machine check initialization\n", > @@ -708,7 +728,8 @@ int mce_rdmsr(uint32_t msr, uint64_t *va > *val); > break; > case MSR_IA32_MCG_CTL: > - *val = d->arch.vmca_msrs.mcg_ctl; > + /* Always 0 if no CTL support */ > + *val = d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl; > mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n", > *val); > break; > @@ -723,7 +744,8 @@ int mce_rdmsr(uint32_t msr, uint64_t *va > switch (msr & (MSR_IA32_MC0_CTL | 3)) > { > case MSR_IA32_MC0_CTL: > - *val = d->arch.vmca_msrs.mci_ctl[bank]; > + *val = d->arch.vmca_msrs.mci_ctl[bank] & > + (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL); > mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL 0x%"PRIx64"\n", > bank, *val); > break; > @@ -805,13 +827,6 @@ int mce_wrmsr(u32 msr, u64 val) > switch ( msr ) > { > case MSR_IA32_MCG_CTL: > - if ( val && (val + 1) ) > - { > - mce_printk(MCE_QUIET, "MCE: val \"%"PRIx64"\" written " > - "to MCG_CTL should be all 0s or 1s\n", val); > - ret = -1; > - break; > - } > d->arch.vmca_msrs.mcg_ctl = val; > break; > case MSR_IA32_MCG_STATUS: > @@ -855,14 +870,6 @@ int mce_wrmsr(u32 msr, u64 val) > switch ( msr & (MSR_IA32_MC0_CTL | 3) ) > { > case MSR_IA32_MC0_CTL: > - if ( val && (val + 1) ) > - { > - mce_printk(MCE_QUIET, "MCE: val written to MC%u_CTL " > - "should be all 0s or 1s (is %"PRIx64")\n", > - bank, val); > - ret = -1; > - break; > - } > d->arch.vmca_msrs.mci_ctl[bank] = val; > break; > case MSR_IA32_MC0_STATUS: > @@ -1162,6 +1169,23 @@ void intpose_inval(unsigned int cpu_nr, > (r) <= MSR_IA32_MC0_MISC + (nr_mce_banks - 1) * 4 && \ > ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */ > > +int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d) > +{ > + int bank_nr; > + > + if ( !bank || !d || !h_mci_ctrl ) > + return 1; > + > + /* Will MCE happen in host if If host mcg_ctl is 0? */ > + if ( ~d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl ) > + return 1; > + > + bank_nr = bank->mc_bank; > + if (~d->arch.vmca_msrs.mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) > + return 1; > + return 0; > +} > + > static int x86_mc_msrinject_verify(struct xen_mc_msrinject *mci) > { > struct cpuinfo_x86 *c; > diff -r b2a7600b71cc xen/arch/x86/cpu/mcheck/mce.h > --- a/xen/arch/x86/cpu/mcheck/mce.h Tue Jan 26 00:52:19 2010 +0800 > +++ b/xen/arch/x86/cpu/mcheck/mce.h Tue Jan 26 00:52:20 2010 +0800 > @@ -39,6 +39,8 @@ void amd_nonfatal_mcheck_init(struct cpu > void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c); > > u64 mce_cap_init(void); > +extern int firstbank; > +int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d); > > int intel_mce_rdmsr(uint32_t msr, uint64_t *val); > int intel_mce_wrmsr(uint32_t msr, uint64_t val); > diff -r b2a7600b71cc xen/arch/x86/cpu/mcheck/mce_intel.c > --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Tue Jan 26 00:52:19 2010 +0800 > +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Tue Jan 26 00:52:20 2010 +0800 > @@ -20,7 +20,6 @@ int ser_support = 0; > int ser_support = 0; > > static int nr_intel_ext_msrs = 0; > -static int firstbank; > > /* Below are for MCE handling */ > struct mce_softirq_barrier { > @@ -361,7 +360,15 @@ static void intel_UCR_handler(struct mci > * the mfn in question) */ > BUG_ON( result->owner == DOMID_COW ); > if ( result->owner != DOMID_XEN ) { > + > d = get_domain_by_id(result->owner); > + if ( mca_ctl_conflict(bank, d) ) > + { > + /* Guest has different MCE ctl with > hypervisor */ + put_domain(d); > + return; > + } > + > gfn > mfn_to_gmfn(d, ((bank->mc_addr) >> > PAGE_SHIFT)); bank->mc_addr-- ---to satisfy European Law for business letters: Advanced Micro Devices GmbH Karl-Hammerschmidt-Str. 34, 85609 Dornach b. Muenchen Geschaeftsfuehrer: Andrew Bowd, Thomas M. McCoy, Giuliano Meroni Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen Registergericht Muenchen, HRB Nr. 43632 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2010-Jan-28 09:48 UTC
[Xen-devel] RE: [PATCH 2/6] MCE: Not GP fault when guest write non 0s or 1s to MCA CTL MSRs.
>-----Original Message----- >From: Christoph Egger [mailto:Christoph.Egger@amd.com] >Sent: Thursday, January 28, 2010 4:12 PM >To: Jiang, Yunhong >Cc: Keir Fraser; Frank.Vanderlinden@Sun.COM; Jan Beulich; >xen-devel@lists.xensource.com >Subject: Re: [PATCH 2/6] MCE: Not GP fault when guest write non 0s or 1s to MCA >CTL MSRs. > >On Thursday 28 January 2010 06:55:53 Jiang, Yunhong wrote: >> Not GP fault when guest write non 0s or 1s to MCA CTL MSRs. >> >> a) For Mci_CTL MSR, Guest can write any value to it. When read back, it >> will be ANDed with the physical value. Some bit in physical value can be 0, >> either because read-only in hardware (like masked by AMD''s Mci_CTL_MASK), >> or because Xen didn''t enable it. If guest write some bit as 0, while that >> bit is 1 in host, we will not inject MCE corresponding that bank to guest, >> as we can''t distinguish if the MCE is caused by the guest-cleared bit. >> >> b) For MCG_CTL MSR, guest can write any value to it. When read back, it >> will be ANDed with the physical value. If guest does not write all 1s. In >> mca_ctl_conflict(), we simply not inject any vMCE to guest if some bit is >> set in physical MSR while is cleared in guest ''s vMCG_CTL MSR. >> >> Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> >> >> >> diff -r b2a7600b71cc xen/arch/x86/cpu/mcheck/mce.c >> --- a/xen/arch/x86/cpu/mcheck/mce.c Tue Jan 26 00:52:19 2010 +0800 >> +++ b/xen/arch/x86/cpu/mcheck/mce.c Tue Jan 26 00:57:19 2010 +0800 >> @@ -30,6 +30,11 @@ unsigned int nr_mce_banks; >> unsigned int nr_mce_banks; >> >> static uint64_t g_mcg_cap; >> + >> +/* Real value in physical CTL MSR */ >> +static uint64_t h_mcg_ctl = 0UL; >> +static uint64_t *h_mci_ctrl; >> +int firstbank; > >The physical MSRs are per physical cpu-core on AMD CPUs. >The data structure does not reflect this. > >Other than that this patch is fine with me.Christoph, thanks for your review very much. Yes, the MSRs are mostly per cpu-core in Intel side also, although some sharing may happen. On AMD platform, will different CPU has different ctrl value? Or will Xen setup different value to different CPU? I assume they should be same, am I right? Thanks --jyh> >Christoph > > >> >> static void intpose_init(void); >> static void mcinfo_clear(struct mc_info *); >> @@ -642,6 +647,21 @@ void mcheck_init(struct cpuinfo_x86 *c) >> break; >> } >> >> + if ( !h_mci_ctrl ) >> + { >> + h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks); >> + if (!h_mci_ctrl) >> + { >> + dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n"); >> + return; >> + } >> + /* Don''t care banks before firstbank */ >> + memset(h_mci_ctrl, 0xff, sizeof(h_mci_ctrl)); >> + for (i = firstbank; i < nr_mce_banks; i++) >> + rdmsrl(MSR_IA32_MC0_CTL + 4*i, h_mci_ctrl[i]); >> + } >> + if (g_mcg_cap & MCG_CTL_P) >> + rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl); >> set_poll_bankmask(c); >> if (!inited) >> printk(XENLOG_INFO "CPU%i: No machine check initialization\n", >> @@ -708,7 +728,8 @@ int mce_rdmsr(uint32_t msr, uint64_t *va >> *val); >> break; >> case MSR_IA32_MCG_CTL: >> - *val = d->arch.vmca_msrs.mcg_ctl; >> + /* Always 0 if no CTL support */ >> + *val = d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl; >> mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n", >> *val); >> break; >> @@ -723,7 +744,8 @@ int mce_rdmsr(uint32_t msr, uint64_t *va >> switch (msr & (MSR_IA32_MC0_CTL | 3)) >> { >> case MSR_IA32_MC0_CTL: >> - *val = d->arch.vmca_msrs.mci_ctl[bank]; >> + *val = d->arch.vmca_msrs.mci_ctl[bank] & >> + (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL); >> mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL >0x%"PRIx64"\n", >> bank, *val); >> break; >> @@ -805,13 +827,6 @@ int mce_wrmsr(u32 msr, u64 val) >> switch ( msr ) >> { >> case MSR_IA32_MCG_CTL: >> - if ( val && (val + 1) ) >> - { >> - mce_printk(MCE_QUIET, "MCE: val \"%"PRIx64"\" written " >> - "to MCG_CTL should be all 0s or 1s\n", val); >> - ret = -1; >> - break; >> - } >> d->arch.vmca_msrs.mcg_ctl = val; >> break; >> case MSR_IA32_MCG_STATUS: >> @@ -855,14 +870,6 @@ int mce_wrmsr(u32 msr, u64 val) >> switch ( msr & (MSR_IA32_MC0_CTL | 3) ) >> { >> case MSR_IA32_MC0_CTL: >> - if ( val && (val + 1) ) >> - { >> - mce_printk(MCE_QUIET, "MCE: val written to MC%u_CTL " >> - "should be all 0s or 1s (is %"PRIx64")\n", >> - bank, val); >> - ret = -1; >> - break; >> - } >> d->arch.vmca_msrs.mci_ctl[bank] = val; >> break; >> case MSR_IA32_MC0_STATUS: >> @@ -1162,6 +1169,23 @@ void intpose_inval(unsigned int cpu_nr, >> (r) <= MSR_IA32_MC0_MISC + (nr_mce_banks - 1) * 4 && \ >> ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */ >> >> +int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d) >> +{ >> + int bank_nr; >> + >> + if ( !bank || !d || !h_mci_ctrl ) >> + return 1; >> + >> + /* Will MCE happen in host if If host mcg_ctl is 0? */ >> + if ( ~d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl ) >> + return 1; >> + >> + bank_nr = bank->mc_bank; >> + if (~d->arch.vmca_msrs.mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) >> + return 1; >> + return 0; >> +} >> + >> static int x86_mc_msrinject_verify(struct xen_mc_msrinject *mci) >> { >> struct cpuinfo_x86 *c; >> diff -r b2a7600b71cc xen/arch/x86/cpu/mcheck/mce.h >> --- a/xen/arch/x86/cpu/mcheck/mce.h Tue Jan 26 00:52:19 2010 +0800 >> +++ b/xen/arch/x86/cpu/mcheck/mce.h Tue Jan 26 00:52:20 2010 +0800 >> @@ -39,6 +39,8 @@ void amd_nonfatal_mcheck_init(struct cpu >> void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c); >> >> u64 mce_cap_init(void); >> +extern int firstbank; >> +int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d); >> >> int intel_mce_rdmsr(uint32_t msr, uint64_t *val); >> int intel_mce_wrmsr(uint32_t msr, uint64_t val); >> diff -r b2a7600b71cc xen/arch/x86/cpu/mcheck/mce_intel.c >> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Tue Jan 26 00:52:19 2010 +0800 >> +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Tue Jan 26 00:52:20 2010 +0800 >> @@ -20,7 +20,6 @@ int ser_support = 0; >> int ser_support = 0; >> >> static int nr_intel_ext_msrs = 0; >> -static int firstbank; >> >> /* Below are for MCE handling */ >> struct mce_softirq_barrier { >> @@ -361,7 +360,15 @@ static void intel_UCR_handler(struct mci >> * the mfn in question) */ >> BUG_ON( result->owner == DOMID_COW ); >> if ( result->owner != DOMID_XEN ) { >> + >> d = get_domain_by_id(result->owner); >> + if ( mca_ctl_conflict(bank, d) ) >> + { >> + /* Guest has different MCE ctl with >> hypervisor */ + put_domain(d); >> + return; >> + } >> + >> gfn >> mfn_to_gmfn(d, ((bank->mc_addr) >> >> PAGE_SHIFT)); bank->mc_addr > > > >-- >---to satisfy European Law for business letters: >Advanced Micro Devices GmbH >Karl-Hammerschmidt-Str. 34, 85609 Dornach b. Muenchen >Geschaeftsfuehrer: Andrew Bowd, Thomas M. McCoy, Giuliano Meroni >Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen >Registergericht Muenchen, HRB Nr. 43632_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Christoph Egger
2010-Jan-28 16:15 UTC
[Xen-devel] Re: [PATCH 2/6] MCE: Not GP fault when guest write non 0s or 1s to MCA CTL MSRs.
On Thursday 28 January 2010 10:48:40 Jiang, Yunhong wrote:> >-----Original Message----- > >From: Christoph Egger [mailto:Christoph.Egger@amd.com] > >Sent: Thursday, January 28, 2010 4:12 PM > >To: Jiang, Yunhong > >Cc: Keir Fraser; Frank.Vanderlinden@Sun.COM; Jan Beulich; > >xen-devel@lists.xensource.com > >Subject: Re: [PATCH 2/6] MCE: Not GP fault when guest write non 0s or 1s > > to MCA CTL MSRs. > > > >On Thursday 28 January 2010 06:55:53 Jiang, Yunhong wrote: > >> Not GP fault when guest write non 0s or 1s to MCA CTL MSRs. > >> > >> a) For Mci_CTL MSR, Guest can write any value to it. When read back, it > >> will be ANDed with the physical value. Some bit in physical value can be > >> 0, either because read-only in hardware (like masked by AMD''s > >> Mci_CTL_MASK), or because Xen didn''t enable it. If guest write some bit > >> as 0, while that bit is 1 in host, we will not inject MCE corresponding > >> that bank to guest, as we can''t distinguish if the MCE is caused by the > >> guest-cleared bit. > >> > >> b) For MCG_CTL MSR, guest can write any value to it. When read back, it > >> will be ANDed with the physical value. If guest does not write all 1s. > >> In mca_ctl_conflict(), we simply not inject any vMCE to guest if some > >> bit is set in physical MSR while is cleared in guest ''s vMCG_CTL MSR. > >> > >> Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> > >> > >> > >> diff -r b2a7600b71cc xen/arch/x86/cpu/mcheck/mce.c > >> --- a/xen/arch/x86/cpu/mcheck/mce.c Tue Jan 26 00:52:19 2010 +0800 > >> +++ b/xen/arch/x86/cpu/mcheck/mce.c Tue Jan 26 00:57:19 2010 +0800 > >> @@ -30,6 +30,11 @@ unsigned int nr_mce_banks; > >> unsigned int nr_mce_banks; > >> > >> static uint64_t g_mcg_cap; > >> + > >> +/* Real value in physical CTL MSR */ > >> +static uint64_t h_mcg_ctl = 0UL; > >> +static uint64_t *h_mci_ctrl; > >> +int firstbank; > > > >The physical MSRs are per physical cpu-core on AMD CPUs. > >The data structure does not reflect this. > > > >Other than that this patch is fine with me. > > Christoph, thanks for your review very much. > > Yes, the MSRs are mostly per cpu-core in Intel side also, although some > sharing may happen. On AMD platform, will different CPU has different ctrl > value?This is possible. If this actually happens or not depends on what Xen is doing. Christoph> Or will Xen setup different value to different CPU? I assume they > should be same, am I right? > > Thanks > --jyh > > >Christoph > > > >> static void intpose_init(void); > >> static void mcinfo_clear(struct mc_info *); > >> @@ -642,6 +647,21 @@ void mcheck_init(struct cpuinfo_x86 *c) > >> break; > >> } > >> > >> + if ( !h_mci_ctrl ) > >> + { > >> + h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks); > >> + if (!h_mci_ctrl) > >> + { > >> + dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n"); > >> + return; > >> + } > >> + /* Don''t care banks before firstbank */ > >> + memset(h_mci_ctrl, 0xff, sizeof(h_mci_ctrl)); > >> + for (i = firstbank; i < nr_mce_banks; i++) > >> + rdmsrl(MSR_IA32_MC0_CTL + 4*i, h_mci_ctrl[i]); > >> + } > >> + if (g_mcg_cap & MCG_CTL_P) > >> + rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl); > >> set_poll_bankmask(c); > >> if (!inited) > >> printk(XENLOG_INFO "CPU%i: No machine check initialization\n", > >> @@ -708,7 +728,8 @@ int mce_rdmsr(uint32_t msr, uint64_t *va > >> *val); > >> break; > >> case MSR_IA32_MCG_CTL: > >> - *val = d->arch.vmca_msrs.mcg_ctl; > >> + /* Always 0 if no CTL support */ > >> + *val = d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl; > >> mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n", > >> *val); > >> break; > >> @@ -723,7 +744,8 @@ int mce_rdmsr(uint32_t msr, uint64_t *va > >> switch (msr & (MSR_IA32_MC0_CTL | 3)) > >> { > >> case MSR_IA32_MC0_CTL: > >> - *val = d->arch.vmca_msrs.mci_ctl[bank]; > >> + *val = d->arch.vmca_msrs.mci_ctl[bank] & > >> + (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL); > >> mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL > > > >0x%"PRIx64"\n", > > > >> bank, *val); > >> break; > >> @@ -805,13 +827,6 @@ int mce_wrmsr(u32 msr, u64 val) > >> switch ( msr ) > >> { > >> case MSR_IA32_MCG_CTL: > >> - if ( val && (val + 1) ) > >> - { > >> - mce_printk(MCE_QUIET, "MCE: val \"%"PRIx64"\" written " > >> - "to MCG_CTL should be all 0s or 1s\n", val); > >> - ret = -1; > >> - break; > >> - } > >> d->arch.vmca_msrs.mcg_ctl = val; > >> break; > >> case MSR_IA32_MCG_STATUS: > >> @@ -855,14 +870,6 @@ int mce_wrmsr(u32 msr, u64 val) > >> switch ( msr & (MSR_IA32_MC0_CTL | 3) ) > >> { > >> case MSR_IA32_MC0_CTL: > >> - if ( val && (val + 1) ) > >> - { > >> - mce_printk(MCE_QUIET, "MCE: val written to MC%u_CTL " > >> - "should be all 0s or 1s (is %"PRIx64")\n", > >> - bank, val); > >> - ret = -1; > >> - break; > >> - } > >> d->arch.vmca_msrs.mci_ctl[bank] = val; > >> break; > >> case MSR_IA32_MC0_STATUS: > >> @@ -1162,6 +1169,23 @@ void intpose_inval(unsigned int cpu_nr, > >> (r) <= MSR_IA32_MC0_MISC + (nr_mce_banks - 1) * 4 && \ > >> ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */ > >> > >> +int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d) > >> +{ > >> + int bank_nr; > >> + > >> + if ( !bank || !d || !h_mci_ctrl ) > >> + return 1; > >> + > >> + /* Will MCE happen in host if If host mcg_ctl is 0? */ > >> + if ( ~d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl ) > >> + return 1; > >> + > >> + bank_nr = bank->mc_bank; > >> + if (~d->arch.vmca_msrs.mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) > >> + return 1; > >> + return 0; > >> +} > >> + > >> static int x86_mc_msrinject_verify(struct xen_mc_msrinject *mci) > >> { > >> struct cpuinfo_x86 *c; > >> diff -r b2a7600b71cc xen/arch/x86/cpu/mcheck/mce.h > >> --- a/xen/arch/x86/cpu/mcheck/mce.h Tue Jan 26 00:52:19 2010 +0800 > >> +++ b/xen/arch/x86/cpu/mcheck/mce.h Tue Jan 26 00:52:20 2010 +0800 > >> @@ -39,6 +39,8 @@ void amd_nonfatal_mcheck_init(struct cpu > >> void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c); > >> > >> u64 mce_cap_init(void); > >> +extern int firstbank; > >> +int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d); > >> > >> int intel_mce_rdmsr(uint32_t msr, uint64_t *val); > >> int intel_mce_wrmsr(uint32_t msr, uint64_t val); > >> diff -r b2a7600b71cc xen/arch/x86/cpu/mcheck/mce_intel.c > >> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Tue Jan 26 00:52:19 2010 +0800 > >> +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Tue Jan 26 00:52:20 2010 +0800 > >> @@ -20,7 +20,6 @@ int ser_support = 0; > >> int ser_support = 0; > >> > >> static int nr_intel_ext_msrs = 0; > >> -static int firstbank; > >> > >> /* Below are for MCE handling */ > >> struct mce_softirq_barrier { > >> @@ -361,7 +360,15 @@ static void intel_UCR_handler(struct mci > >> * the mfn in question) */ > >> BUG_ON( result->owner == DOMID_COW ); > >> if ( result->owner != DOMID_XEN ) { > >> + > >> d = get_domain_by_id(result->owner); > >> + if ( mca_ctl_conflict(bank, d) ) > >> + { > >> + /* Guest has different MCE ctl with > >> hypervisor */ + put_domain(d); > >> + return; > >> + } > >> + > >> gfn > >> mfn_to_gmfn(d, ((bank->mc_addr) >> > >> PAGE_SHIFT)); bank->mc_addr-- ---to satisfy European Law for business letters: Advanced Micro Devices GmbH Karl-Hammerschmidt-Str. 34, 85609 Dornach b. Muenchen Geschaeftsfuehrer: Andrew Bowd, Thomas M. McCoy, Giuliano Meroni Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen Registergericht Muenchen, HRB Nr. 43632 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2010-Jan-29 02:44 UTC
[Xen-devel] RE: [PATCH 2/6] MCE: Not GP fault when guest write non 0s or 1s to MCA CTL MSRs.
>> >The physical MSRs are per physical cpu-core on AMD CPUs. >> >The data structure does not reflect this. >> > >> >Other than that this patch is fine with me. >> >> Christoph, thanks for your review very much. >> >> Yes, the MSRs are mostly per cpu-core in Intel side also, although some >> sharing may happen. On AMD platform, will different CPU has different ctrl >> value? > >This is possible. If this actually happens or not depends on what Xen is >doing. > >ChristophIf this really happen, that means the physical CPU has different MCE setting. In that situation, I think we should not try to inject any vMCE to guest, because the vCPU can combine to any physical CPU. But, still I suspect if under any situation will that happen. --jyh> >> Or will Xen setup different value to different CPU? I assume they >> should be same, am I right? >> >> Thanks >> --jyh >> >> >Christoph >> > >> >> static void intpose_init(void); >> >> static void mcinfo_clear(struct mc_info *); >> >> @@ -642,6 +647,21 @@ void mcheck_init(struct cpuinfo_x86 *c) >> >> break; >> >> } >> >> >> >> + if ( !h_mci_ctrl ) >> >> + { >> >> + h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks); >> >> + if (!h_mci_ctrl) >> >> + { >> >> + dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n"); >> >> + return; >> >> + } >> >> + /* Don''t care banks before firstbank */ >> >> + memset(h_mci_ctrl, 0xff, sizeof(h_mci_ctrl)); >> >> + for (i = firstbank; i < nr_mce_banks; i++) >> >> + rdmsrl(MSR_IA32_MC0_CTL + 4*i, h_mci_ctrl[i]); >> >> + } >> >> + if (g_mcg_cap & MCG_CTL_P) >> >> + rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl); >> >> set_poll_bankmask(c); >> >> if (!inited) >> >> printk(XENLOG_INFO "CPU%i: No machine check initialization\n", >> >> @@ -708,7 +728,8 @@ int mce_rdmsr(uint32_t msr, uint64_t *va >> >> *val); >> >> break; >> >> case MSR_IA32_MCG_CTL: >> >> - *val = d->arch.vmca_msrs.mcg_ctl; >> >> + /* Always 0 if no CTL support */ >> >> + *val = d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl; >> >> mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL >0x%"PRIx64"\n", >> >> *val); >> >> break; >> >> @@ -723,7 +744,8 @@ int mce_rdmsr(uint32_t msr, uint64_t *va >> >> switch (msr & (MSR_IA32_MC0_CTL | 3)) >> >> { >> >> case MSR_IA32_MC0_CTL: >> >> - *val = d->arch.vmca_msrs.mci_ctl[bank]; >> >> + *val = d->arch.vmca_msrs.mci_ctl[bank] & >> >> + (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL); >> >> mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL >> > >> >0x%"PRIx64"\n", >> > >> >> bank, *val); >> >> break; >> >> @@ -805,13 +827,6 @@ int mce_wrmsr(u32 msr, u64 val) >> >> switch ( msr ) >> >> { >> >> case MSR_IA32_MCG_CTL: >> >> - if ( val && (val + 1) ) >> >> - { >> >> - mce_printk(MCE_QUIET, "MCE: val \"%"PRIx64"\" written " >> >> - "to MCG_CTL should be all 0s or 1s\n", val); >> >> - ret = -1; >> >> - break; >> >> - } >> >> d->arch.vmca_msrs.mcg_ctl = val; >> >> break; >> >> case MSR_IA32_MCG_STATUS: >> >> @@ -855,14 +870,6 @@ int mce_wrmsr(u32 msr, u64 val) >> >> switch ( msr & (MSR_IA32_MC0_CTL | 3) ) >> >> { >> >> case MSR_IA32_MC0_CTL: >> >> - if ( val && (val + 1) ) >> >> - { >> >> - mce_printk(MCE_QUIET, "MCE: val written to MC%u_CTL >" >> >> - "should be all 0s or 1s (is %"PRIx64")\n", >> >> - bank, val); >> >> - ret = -1; >> >> - break; >> >> - } >> >> d->arch.vmca_msrs.mci_ctl[bank] = val; >> >> break; >> >> case MSR_IA32_MC0_STATUS: >> >> @@ -1162,6 +1169,23 @@ void intpose_inval(unsigned int cpu_nr, >> >> (r) <= MSR_IA32_MC0_MISC + (nr_mce_banks - 1) * 4 && \ >> >> ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */ >> >> >> >> +int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d) >> >> +{ >> >> + int bank_nr; >> >> + >> >> + if ( !bank || !d || !h_mci_ctrl ) >> >> + return 1; >> >> + >> >> + /* Will MCE happen in host if If host mcg_ctl is 0? */ >> >> + if ( ~d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl ) >> >> + return 1; >> >> + >> >> + bank_nr = bank->mc_bank; >> >> + if (~d->arch.vmca_msrs.mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) >> >> + return 1; >> >> + return 0; >> >> +} >> >> + >> >> static int x86_mc_msrinject_verify(struct xen_mc_msrinject *mci) >> >> { >> >> struct cpuinfo_x86 *c; >> >> diff -r b2a7600b71cc xen/arch/x86/cpu/mcheck/mce.h >> >> --- a/xen/arch/x86/cpu/mcheck/mce.h Tue Jan 26 00:52:19 2010 +0800 >> >> +++ b/xen/arch/x86/cpu/mcheck/mce.h Tue Jan 26 00:52:20 2010 +0800 >> >> @@ -39,6 +39,8 @@ void amd_nonfatal_mcheck_init(struct cpu >> >> void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c); >> >> >> >> u64 mce_cap_init(void); >> >> +extern int firstbank; >> >> +int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d); >> >> >> >> int intel_mce_rdmsr(uint32_t msr, uint64_t *val); >> >> int intel_mce_wrmsr(uint32_t msr, uint64_t val); >> >> diff -r b2a7600b71cc xen/arch/x86/cpu/mcheck/mce_intel.c >> >> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Tue Jan 26 00:52:19 2010 +0800 >> >> +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Tue Jan 26 00:52:20 2010 >+0800 >> >> @@ -20,7 +20,6 @@ int ser_support = 0; >> >> int ser_support = 0; >> >> >> >> static int nr_intel_ext_msrs = 0; >> >> -static int firstbank; >> >> >> >> /* Below are for MCE handling */ >> >> struct mce_softirq_barrier { >> >> @@ -361,7 +360,15 @@ static void intel_UCR_handler(struct mci >> >> * the mfn in question) */ >> >> BUG_ON( result->owner == DOMID_COW ); >> >> if ( result->owner != DOMID_XEN ) { >> >> + >> >> d = get_domain_by_id(result->owner); >> >> + if ( mca_ctl_conflict(bank, d) ) >> >> + { >> >> + /* Guest has different MCE ctl with >> >> hypervisor */ + put_domain(d); >> >> + return; >> >> + } >> >> + >> >> gfn >> >> mfn_to_gmfn(d, ((bank->mc_addr) >> >> >> PAGE_SHIFT)); bank->mc_addr > > > > >-- >---to satisfy European Law for business letters: >Advanced Micro Devices GmbH >Karl-Hammerschmidt-Str. 34, 85609 Dornach b. Muenchen >Geschaeftsfuehrer: Andrew Bowd, Thomas M. McCoy, Giuliano Meroni >Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen >Registergericht Muenchen, HRB Nr. 43632_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2010-Jan-29 08:17 UTC
[Xen-devel] RE: [PATCH 2/6] MCE: Not GP fault when guest write non 0s or 1s to MCA CTL MSRs.
Christoph ,here is the updated patch, as stated in previous mail, if there are different Mci_CTL in different CPU, vMCE will be disabled Any comments? --jyh Not GP fault when guest write non 0s or 1s to MCA CTL MSRs. a) For Mci_CTL MSR, Guest can write any value to it. When read back, it will be ANDed with the physical value. Some bit in physical value can be 0, either because read-only in hardware (like masked by AMD''s Mci_CTL_MASK), or because Xen didn''t enable it. If guest write some bit as 0, while that bit is 1 in host, we will not inject MCE corresponding that bank to guest, as we can''t distinguish if the MCE is caused by the guest-cleared bit. b) For MCG_CTL MSR, guest can write any value to it. When read back, it will be ANDed with the physical value. If guest does not write all 1s. In mca_ctl_conflict(), we simply not inject any vMCE to guest if some bit is set in physical MSR while is cleared in guest ''s vMCG_CTL MSR. Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> diff -r c814db5ffd8a xen/arch/x86/cpu/mcheck/mce.c --- a/xen/arch/x86/cpu/mcheck/mce.c Thu Jan 28 18:57:01 2010 +0800 +++ b/xen/arch/x86/cpu/mcheck/mce.c Thu Jan 28 23:54:35 2010 +0800 @@ -30,6 +30,13 @@ unsigned int nr_mce_banks; unsigned int nr_mce_banks; static uint64_t g_mcg_cap; +/* We will try to inject vMCE to corresponding guest */ +int vmce_enable = 1; + +/* Real value in physical CTL MSR */ +static uint64_t h_mcg_ctl = 0UL; +static uint64_t *h_mci_ctrl; +int firstbank; static void intpose_init(void); static void mcinfo_clear(struct mc_info *); @@ -642,6 +649,34 @@ void mcheck_init(struct cpuinfo_x86 *c) break; } + if ( !h_mci_ctrl ) + { + h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks); + if (!h_mci_ctrl) + { + dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n"); + return; + } + /* Don''t care banks before firstbank */ + memset(h_mci_ctrl, 0xff, sizeof(h_mci_ctrl)); + for (i = firstbank; i < nr_mce_banks; i++) + rdmsrl(MSR_IA32_MC0_CTL + 4*i, h_mci_ctrl[i]); + } else { + uint64_t ctrl; + + for ( i = firstbank; i < nr_mce_banks; i++ ) + { + rdmsrl(MSR_IA32_MC0_CTL + 4*i, ctrl); + if ( ctrl != h_mci_ctrl[i] ) + { + dprintk(XENLOG_WARNING, + "Different MCi ctl in pCPUs, no vMCE to guest\n"); + vmce_enable = 0; + } + } + } + if (g_mcg_cap & MCG_CTL_P) + rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl); set_poll_bankmask(c); if (!inited) printk(XENLOG_INFO "CPU%i: No machine check initialization\n", @@ -708,7 +743,8 @@ int mce_rdmsr(uint32_t msr, uint64_t *va *val); break; case MSR_IA32_MCG_CTL: - *val = d->arch.vmca_msrs.mcg_ctl; + /* Always 0 if no CTL support */ + *val = d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl; mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n", *val); break; @@ -723,7 +759,8 @@ int mce_rdmsr(uint32_t msr, uint64_t *va switch (msr & (MSR_IA32_MC0_CTL | 3)) { case MSR_IA32_MC0_CTL: - *val = d->arch.vmca_msrs.mci_ctl[bank]; + *val = d->arch.vmca_msrs.mci_ctl[bank] & + (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL); mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL 0x%"PRIx64"\n", bank, *val); break; @@ -805,13 +842,6 @@ int mce_wrmsr(u32 msr, u64 val) switch ( msr ) { case MSR_IA32_MCG_CTL: - if ( val && (val + 1) ) - { - mce_printk(MCE_QUIET, "MCE: val \"%"PRIx64"\" written " - "to MCG_CTL should be all 0s or 1s\n", val); - ret = -1; - break; - } d->arch.vmca_msrs.mcg_ctl = val; break; case MSR_IA32_MCG_STATUS: @@ -855,14 +885,6 @@ int mce_wrmsr(u32 msr, u64 val) switch ( msr & (MSR_IA32_MC0_CTL | 3) ) { case MSR_IA32_MC0_CTL: - if ( val && (val + 1) ) - { - mce_printk(MCE_QUIET, "MCE: val written to MC%u_CTL " - "should be all 0s or 1s (is %"PRIx64")\n", - bank, val); - ret = -1; - break; - } d->arch.vmca_msrs.mci_ctl[bank] = val; break; case MSR_IA32_MC0_STATUS: @@ -1162,6 +1184,26 @@ void intpose_inval(unsigned int cpu_nr, (r) <= MSR_IA32_MC0_MISC + (nr_mce_banks - 1) * 4 && \ ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */ +int mce_inject_vmce(struct mcinfo_bank *bank, struct domain *d) +{ + int bank_nr; + + if ( !vmce_enable ) + return 0; + + if ( !bank || !d || !h_mci_ctrl ) + return 0; + + /* Will MCE happen in host if If host mcg_ctl is 0? */ + if ( ~d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl ) + return 0; + + bank_nr = bank->mc_bank; + if (~d->arch.vmca_msrs.mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) + return 0; + return 1; +} + static int x86_mc_msrinject_verify(struct xen_mc_msrinject *mci) { struct cpuinfo_x86 *c; diff -r c814db5ffd8a xen/arch/x86/cpu/mcheck/mce.h --- a/xen/arch/x86/cpu/mcheck/mce.h Thu Jan 28 18:57:01 2010 +0800 +++ b/xen/arch/x86/cpu/mcheck/mce.h Thu Jan 28 23:55:01 2010 +0800 @@ -39,6 +39,8 @@ void amd_nonfatal_mcheck_init(struct cpu void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c); u64 mce_cap_init(void); +extern int firstbank; +int mce_inject_vmce(struct mcinfo_bank *bank, struct domain *d); int intel_mce_rdmsr(uint32_t msr, uint64_t *val); int intel_mce_wrmsr(uint32_t msr, uint64_t val); diff -r c814db5ffd8a xen/arch/x86/cpu/mcheck/mce_intel.c --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Jan 28 18:57:01 2010 +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Jan 28 23:55:18 2010 +0800 @@ -20,7 +20,6 @@ int ser_support = 0; int ser_support = 0; static int nr_intel_ext_msrs = 0; -static int firstbank; /* Below are for MCE handling */ struct mce_softirq_barrier { @@ -361,7 +360,15 @@ static void intel_UCR_handler(struct mci * the mfn in question) */ BUG_ON( result->owner == DOMID_COW ); if ( result->owner != DOMID_XEN ) { + d = get_domain_by_id(result->owner); + if ( !mce_inject_vmce(bank, d) ) + { + /* Guest has different MCE ctl with hypervisor */ + put_domain(d); + return; + } + gfn mfn_to_gmfn(d, ((bank->mc_addr) >> PAGE_SHIFT)); bank->mc_addr>If this really happen, that means the physical CPU has different MCE setting. In that >situation, I think we should not try to inject any vMCE to guest, because the vCPU >can combine to any physical CPU. >But, still I suspect if under any situation will that happen. > >--jyh_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel