This patch cleaned the mcheck_init. Firstly and most importantly, the maxium MCA banks are hard-coded as MAX_NR_BANKS, which is 30. This is not architecture correct. This patch removes this definition, replacig the cpu_banks_t with mca_banks, and provide some basic function, like set/clear/test/alloc/free for mcabanks_t. Secondly, remove the broadcast_check code to intel specific, since only Intel platform support broadcast now. Thirdly, the X86_FEATURE_MCA check and CR4_MCE enable is done in every vendor-specifc callback, that''s redundant, move it to mcheck_init. Also, we should enable CR4_MCE only in the end of the mcheck_init, to close the small window between CR4_enable and the mca setup. And we also move vmce specific code to vmce.c as vmce_init, to make code clean. arch/x86/cpu/mcheck/amd_k8.c | 7 arch/x86/cpu/mcheck/k7.c | 12 - arch/x86/cpu/mcheck/mce.c | 293 ++++++++++++++++++++-------------------- arch/x86/cpu/mcheck/mce.h | 20 +- arch/x86/cpu/mcheck/mce_intel.c | 126 +++++++++++------ arch/x86/cpu/mcheck/non-fatal.c | 5 arch/x86/cpu/mcheck/vmce.c | 53 +++++++ arch/x86/cpu/mcheck/x86_mca.h | 33 ++++ include/asm-x86/mce.h | 2 9 files changed, 333 insertions(+), 218 deletions(-) Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> diff -r 45321a57873a xen/arch/x86/cpu/mcheck/amd_k8.c --- a/xen/arch/x86/cpu/mcheck/amd_k8.c Mon Jun 07 16:41:39 2010 +0800 +++ b/xen/arch/x86/cpu/mcheck/amd_k8.c Mon Jun 07 16:41:43 2010 +0800 @@ -81,13 +81,8 @@ enum mcheck_type amd_k8_mcheck_init(stru uint32_t i; enum mcequirk_amd_flags quirkflag; - /* Check for PPro style MCA; our caller has confirmed MCE support. */ - if (!cpu_has(c, X86_FEATURE_MCA)) - return mcheck_none; - quirkflag = mcequirk_lookup_amd_quirkdata(c); - mce_cap_init(); x86_mce_vector_register(k8_machine_check); for (i = 0; i < nr_mce_banks; i++) { @@ -101,7 +96,5 @@ enum mcheck_type amd_k8_mcheck_init(stru } } - set_in_cr4(X86_CR4_MCE); - return mcheck_amd_k8; } diff -r 45321a57873a xen/arch/x86/cpu/mcheck/k7.c --- a/xen/arch/x86/cpu/mcheck/k7.c Mon Jun 07 16:41:39 2010 +0800 +++ b/xen/arch/x86/cpu/mcheck/k7.c Mon Jun 07 16:41:43 2010 +0800 @@ -70,19 +70,9 @@ static fastcall void k7_machine_check(st /* AMD K7 machine check */ enum mcheck_type amd_k7_mcheck_init(struct cpuinfo_x86 *c) { - u32 l, h; int i; - /* Check for PPro style MCA; our caller has confirmed MCE support. */ - if (!cpu_has(c, X86_FEATURE_MCA)) - return mcheck_none; - x86_mce_vector_register(k7_machine_check); - - rdmsr (MSR_IA32_MCG_CAP, l, h); - if (l & (1<<8)) /* Control register present ? */ - wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); - nr_mce_banks = l & 0xff; /* Clear status for MC index 0 separately, we don''t touch CTL, * as some Athlons cause spurious MCEs when its enabled. */ @@ -92,7 +82,5 @@ enum mcheck_type amd_k7_mcheck_init(stru wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); } - set_in_cr4 (X86_CR4_MCE); - return mcheck_amd_k7; } diff -r 45321a57873a xen/arch/x86/cpu/mcheck/mce.c --- a/xen/arch/x86/cpu/mcheck/mce.c Mon Jun 07 16:41:39 2010 +0800 +++ b/xen/arch/x86/cpu/mcheck/mce.c Mon Jun 07 16:43:46 2010 +0800 @@ -25,21 +25,15 @@ int mce_disabled; invbool_param("mce", mce_disabled); -static int mce_force_broadcast; -boolean_param("mce_fb", mce_force_broadcast); int is_mc_panic; unsigned int nr_mce_banks; int mce_broadcast = 0; -uint64_t g_mcg_cap; - -/* Real value in physical CTL MSR */ -uint64_t h_mcg_ctl = 0UL; -uint64_t *h_mci_ctrl; int firstbank; static void intpose_init(void); static void mcinfo_clear(struct mc_info *); +struct mca_banks *mca_allbanks; #define SEG_PL(segsel) ((segsel) & 0x3) #define _MC_MSRINJ_F_REQ_HWCR_WREN (1 << 16) @@ -54,8 +48,6 @@ static int x86_mcerr(const char *msg, in #else #define x86_mcerr(msg, err) (err) #endif - -cpu_banks_t mca_allbanks; int mce_verbosity; static void __init mce_set_verbosity(char *str) @@ -113,6 +105,36 @@ void mce_recoverable_register(mce_recove mc_recoverable_scan = cbfunc; } +struct mca_banks *mcabanks_alloc(void) +{ + struct mca_banks *mb; + + mb = xmalloc(struct mca_banks); + if (!mb) + return NULL; + + mb->bank_map = xmalloc_array(unsigned long, + BITS_TO_LONGS(nr_mce_banks)); + if (!mb->bank_map) + { + xfree(mb); + return NULL; + } + + mb->num = nr_mce_banks; + memset(mb->bank_map, 0, sizeof(long) * BITS_TO_LONGS(nr_mce_banks)); + + return mb; +} + +void mcabanks_free(struct mca_banks *banks) +{ + if (banks == NULL) + return; + if (banks->bank_map) + xfree(banks->bank_map); + xfree(banks); +} /* Judging whether to Clear Machine Check error bank callback handler * According to Intel latest MCA OS Recovery Writer''s Guide, * whether the error MCA bank needs to be cleared is decided by the mca_source @@ -218,8 +240,8 @@ static int mca_init_global(uint32_t flag * For Intel latest CPU, whether to clear the error bank status needs to * be judged by the callback function defined above. */ -mctelem_cookie_t mcheck_mca_logout(enum mca_source who, cpu_banks_t bankmask, - struct mca_summary *sp, cpu_banks_t* clear_bank) +mctelem_cookie_t mcheck_mca_logout(enum mca_source who, struct mca_banks *bankmask, + struct mca_summary *sp, struct mca_banks* clear_bank) { uint64_t gstatus, status; struct mcinfo_global *mig = NULL; /* on stack */ @@ -263,7 +285,7 @@ mctelem_cookie_t mcheck_mca_logout(enum struct mcinfo_bank *mib; /* on stack */ /* Skip bank if corresponding bit in bankmask is clear */ - if (!test_bit(i, bankmask)) + if (!mcabanks_test(i, bankmask)) continue; mca_rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status); @@ -318,7 +340,7 @@ mctelem_cookie_t mcheck_mca_logout(enum /* Clear status */ mca_wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL); else if ( who == MCA_MCE_SCAN && need_clear) - set_bit(i, clear_bank); + mcabanks_set(i, clear_bank); wmb(); } @@ -352,7 +374,7 @@ mctelem_cookie_t mcheck_mca_logout(enum /* Shared #MC handler. */ void mcheck_cmn_handler(struct cpu_user_regs *regs, long error_code, - cpu_banks_t bankmask) + struct mca_banks *bankmask) { int xen_state_lost, dom0_state_lost, domU_state_lost; struct vcpu *v = current; @@ -568,13 +590,13 @@ cmn_handler_done: } } -void mcheck_mca_clearbanks(cpu_banks_t bankmask) +void mcheck_mca_clearbanks(struct mca_banks *bankmask) { int i; uint64_t status; for (i = 0; i < 32 && i < nr_mce_banks; i++) { - if (!test_bit(i, bankmask)) + if (!mcabanks_test(i, bankmask)) continue; mca_rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status); if (!(status & MCi_STATUS_VAL)) @@ -613,21 +635,6 @@ int mce_available(struct cpuinfo_x86 *c) return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); } -static int mce_is_broadcast(struct cpuinfo_x86 *c) -{ - if (mce_force_broadcast) - return 1; - - /* According to Intel SDM Dec, 2009, 15.10.4.1, For processors with - * DisplayFamily_DisplayModel encoding of 06H_EH and above, - * a MCA signal is broadcast to all logical processors in the system - */ - if (c->x86_vendor == X86_VENDOR_INTEL && c->x86 == 6 && - c->x86_model >= 0xe) - return 1; - return 0; -} - /* * Check if bank 0 is usable for MCE. It isn''t for AMD K7, * and Intel P6 family before model 0x1a. @@ -645,77 +652,9 @@ int mce_firstbank(struct cpuinfo_x86 *c) return 0; } -/* This has to be run for each processor */ -void mcheck_init(struct cpuinfo_x86 *c) -{ - int i, broadcast; - enum mcheck_type inited = mcheck_none; +int show_mca_info(int inited, struct cpuinfo_x86 *c) +{ static enum mcheck_type g_type = mcheck_unset; - static int broadcast_check; - - if (mce_disabled == 1) { - dprintk(XENLOG_INFO, "MCE support disabled by bootparam\n"); - return; - } - - broadcast = mce_is_broadcast(c); - if (broadcast_check && (broadcast != mce_broadcast) ) - dprintk(XENLOG_INFO, - "CPUs have mixed broadcast support" - "may cause undetermined result!!!\n"); - - broadcast_check = 1; - if (broadcast) - mce_broadcast = broadcast; - - for (i = 0; i < MAX_NR_BANKS; i++) - set_bit(i,mca_allbanks); - - /* Enforce at least MCE support in CPUID information. Individual - * families may also need to enforce a check for MCA support. */ - if (!cpu_has(c, X86_FEATURE_MCE)) { - printk(XENLOG_INFO "CPU%i: No machine check support available\n", - smp_processor_id()); - return; - } - - intpose_init(); - mctelem_init(sizeof (struct mc_info)); - - switch (c->x86_vendor) { - case X86_VENDOR_AMD: - inited = amd_mcheck_init(c); - break; - - case X86_VENDOR_INTEL: - switch (c->x86) { - case 6: - case 15: - inited = intel_mcheck_init(c); - break; - } - break; - - default: - break; - } - - if ( !h_mci_ctrl ) - { - h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks); - if (!h_mci_ctrl) - { - dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n"); - return; - } - /* Don''t care banks before firstbank */ - memset(h_mci_ctrl, 0xff, sizeof(h_mci_ctrl)); - for (i = firstbank; i < nr_mce_banks; i++) - rdmsrl(MSR_IA32_MC0_CTL + 4*i, h_mci_ctrl[i]); - } - if (g_mcg_cap & MCG_CTL_P) - rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl); - set_poll_bankmask(c); if (inited != g_type) { char prefix[20]; @@ -744,32 +683,130 @@ void mcheck_init(struct cpuinfo_x86 *c) printk("%sNo machine check initialization\n", prefix); break; } - - g_type = inited; - } -} - -u64 mce_cap_init(void) + g_type = inited; + } + + return 0; +} + +int set_poll_bankmask(struct cpuinfo_x86 *c) +{ + int cpu = smp_processor_id(); + struct mca_banks *mb; + + mb = mcabanks_alloc(); + if (!mb) + return -ENOMEM; + + if (cmci_support && !mce_disabled) { + mb->num = per_cpu(no_cmci_banks, cpu)->num; + bitmap_copy(mb->bank_map, per_cpu(no_cmci_banks, cpu)->bank_map, + nr_mce_banks); + } + else { + bitmap_copy(mb->bank_map, mca_allbanks->bank_map, nr_mce_banks); + if (mce_firstbank(c)) + mcabanks_clear(0, mb); + } + per_cpu(poll_bankmask, cpu) = mb; + + return 0; +} + +/* The perbank ctl/status init is platform specific because of AMD''s quirk */ +int mca_cap_init(void) { u32 l, h; u64 value; rdmsr(MSR_IA32_MCG_CAP, l, h); value = ((u64)h << 32) | l; - /* For Guest vMCE usage */ - g_mcg_cap = value & ~MCG_CMCI_P; if (l & MCG_CTL_P) /* Control register present ? */ wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); + if (nr_mce_banks && (l & MCG_CAP_COUNT) != nr_mce_banks) + { + dprintk(XENLOG_WARNING, "Different bank number on cpu %x\n", + smp_processor_id()); + return -ENODEV; + } nr_mce_banks = l & MCG_CAP_COUNT; - if ( nr_mce_banks > MAX_NR_BANKS ) + + /* mcabanks_alloc depends on nr_mcebanks */ + if (!mca_allbanks) { - printk(KERN_WARNING "MCE: exceed max mce banks\n"); - g_mcg_cap = (g_mcg_cap & ~MCG_CAP_COUNT) | MAX_NR_BANKS; + int i; + + mca_allbanks = mcabanks_alloc(); + for ( i = 0; i < nr_mce_banks; i++) + mcabanks_set(i, mca_allbanks); } - return value; + return mca_allbanks ? 0:-ENOMEM; +} + +/* This has to be run for each processor */ +void mcheck_init(struct cpuinfo_x86 *c) +{ + enum mcheck_type inited = mcheck_none; + + if (mce_disabled == 1) { + dprintk(XENLOG_INFO, "MCE support disabled by bootparam\n"); + return; + } + + if (!mce_available(c)) + { + printk(XENLOG_INFO "CPU%i: No machine check support available\n", + smp_processor_id()); + return; + } + + /*Hardware Enable */ + if (mca_cap_init()) + return; + + switch (c->x86_vendor) { + case X86_VENDOR_AMD: + inited = amd_mcheck_init(c); + break; + + case X86_VENDOR_INTEL: + switch (c->x86) { + case 6: + case 15: + inited = intel_mcheck_init(c); + break; + } + break; + + default: + break; + } + + show_mca_info(inited, c); + if (inited == mcheck_none || inited == mcheck_unset) + goto out; + + intpose_init(); + + mctelem_init(sizeof(struct mc_info)); + + vmce_init(c); + + /* Turn on MCE now */ + set_in_cr4(X86_CR4_MCE); + + set_poll_bankmask(c); + + return; +out: + if (smp_processor_id() == 0) + { + mcabanks_free(mca_allbanks); + mca_allbanks = NULL; + } } static void mcinfo_clear(struct mc_info *mi) @@ -1040,23 +1077,6 @@ void intpose_inval(unsigned int cpu_nr, (r) <= MSR_IA32_MC0_MISC + (nr_mce_banks - 1) * 4 && \ ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */ -int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d) -{ - int bank_nr; - - if ( !bank || !d || !h_mci_ctrl ) - return 1; - - /* Will MCE happen in host if If host mcg_ctl is 0? */ - if ( ~d->arch.vmca_msrs->mcg_ctl & h_mcg_ctl ) - return 1; - - bank_nr = bank->mc_bank; - if (~d->arch.vmca_msrs->mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) - return 1; - return 0; -} - static int x86_mc_msrinject_verify(struct xen_mc_msrinject *mci) { struct cpuinfo_x86 *c; @@ -1481,19 +1501,6 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u return ret; } -void set_poll_bankmask(struct cpuinfo_x86 *c) -{ - - if (cmci_support && !mce_disabled) { - memcpy(&(__get_cpu_var(poll_bankmask)), - &(__get_cpu_var(no_cmci_banks)), sizeof(cpu_banks_t)); - } - else { - memcpy(&(get_cpu_var(poll_bankmask)), &mca_allbanks, sizeof(cpu_banks_t)); - if (mce_firstbank(c)) - clear_bit(0, get_cpu_var(poll_bankmask)); - } -} void mc_panic(char *s) { is_mc_panic = 1; diff -r 45321a57873a xen/arch/x86/cpu/mcheck/mce.h --- a/xen/arch/x86/cpu/mcheck/mce.h Mon Jun 07 16:41:39 2010 +0800 +++ b/xen/arch/x86/cpu/mcheck/mce.h Mon Jun 07 16:41:43 2010 +0800 @@ -72,7 +72,7 @@ extern void x86_mce_vector_register(x86_ /* Common generic MCE handler that implementations may nominate * via x86_mce_vector_register. */ -extern void mcheck_cmn_handler(struct cpu_user_regs *, long, cpu_banks_t); +extern void mcheck_cmn_handler(struct cpu_user_regs *, long, struct mca_banks *); /* Register a handler for judging whether mce is recoverable. */ typedef int (*mce_recoverable_t)(u64 status); @@ -126,18 +126,17 @@ struct mca_summary { uint32_t recoverable; }; -extern cpu_banks_t mca_allbanks; -void set_poll_bankmask(struct cpuinfo_x86 *c); -DECLARE_PER_CPU(cpu_banks_t, poll_bankmask); -DECLARE_PER_CPU(cpu_banks_t, no_cmci_banks); +DECLARE_PER_CPU(struct mca_banks *, poll_bankmask); +DECLARE_PER_CPU(struct mca_banks *, no_cmci_banks); + extern int cmci_support; extern int ser_support; extern int is_mc_panic; extern int mce_broadcast; -extern void mcheck_mca_clearbanks(cpu_banks_t); +extern void mcheck_mca_clearbanks(struct mca_banks *); -extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, cpu_banks_t, - struct mca_summary *, cpu_banks_t*); +extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, struct mca_banks *, + struct mca_summary *, struct mca_banks *); /* Register a callback to be made during bank telemetry logout. * This callback is only available to those machine check handlers @@ -170,10 +169,7 @@ int inject_vmce(struct domain *d); int inject_vmce(struct domain *d); int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct mcinfo_global *global); -extern uint64_t g_mcg_cap; -/* Real value in physical CTL MSR */ -extern uint64_t h_mcg_ctl; -extern uint64_t *h_mci_ctrl; +extern int vmce_init(struct cpuinfo_x86 *c); extern unsigned int nr_mce_banks; diff -r 45321a57873a xen/arch/x86/cpu/mcheck/mce_intel.c --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Jun 07 16:41:39 2010 +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Jun 07 16:41:43 2010 +0800 @@ -16,10 +16,13 @@ #include "mce.h" #include "x86_mca.h" -DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned); -DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks); +DEFINE_PER_CPU(struct mca_banks *, mce_banks_owned); +DEFINE_PER_CPU(struct mca_banks *, no_cmci_banks); +DEFINE_PER_CPU(struct mca_banks *, mce_clear_banks); int cmci_support = 0; int ser_support = 0; +static int mce_force_broadcast; +boolean_param("mce_fb", mce_force_broadcast); static int nr_intel_ext_msrs = 0; @@ -532,12 +535,14 @@ static void intel_machine_check(struct c uint64_t gstatus; mctelem_cookie_t mctc = NULL; struct mca_summary bs; - cpu_banks_t clear_bank; + struct mca_banks *clear_bank; mce_spin_lock(&mce_logout_lock); - memset( &clear_bank, 0x0, sizeof(cpu_banks_t)); - mctc = mcheck_mca_logout(MCA_MCE_SCAN, mca_allbanks, &bs, &clear_bank); + clear_bank = __get_cpu_var(mce_clear_banks); + memset( clear_bank->bank_map, 0x0, + sizeof(long) * BITS_TO_LONGS(clear_bank->num)); + mctc = mcheck_mca_logout(MCA_MCE_SCAN, mca_allbanks, &bs, clear_bank); if (bs.errcnt) { /* dump MCE error */ @@ -703,7 +708,7 @@ static int do_cmci_discover(int i) rdmsrl(msr, val); /* Some other CPU already owns this bank. */ if (val & CMCI_EN) { - clear_bit(i, __get_cpu_var(mce_banks_owned)); + mcabanks_clear(i, __get_cpu_var(mce_banks_owned)); goto out; } @@ -713,12 +718,12 @@ static int do_cmci_discover(int i) if (!(val & CMCI_EN)) { /* This bank does not support CMCI. Polling timer has to handle it. */ - set_bit(i, __get_cpu_var(no_cmci_banks)); + mcabanks_set(i, __get_cpu_var(no_cmci_banks)); return 0; } - set_bit(i, __get_cpu_var(mce_banks_owned)); + mcabanks_set(i, __get_cpu_var(mce_banks_owned)); out: - clear_bit(i, __get_cpu_var(no_cmci_banks)); + mcabanks_clear(i, __get_cpu_var(no_cmci_banks)); return 1; } @@ -734,7 +739,7 @@ static void cmci_discover(void) spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < nr_mce_banks; i++) - if (!test_bit(i, __get_cpu_var(mce_banks_owned))) + if (!mcabanks_test(i, __get_cpu_var(mce_banks_owned))) do_cmci_discover(i); spin_unlock_irqrestore(&cmci_discover_lock, flags); @@ -761,8 +766,8 @@ static void cmci_discover(void) mce_printk(MCE_VERBOSE, "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n", smp_processor_id(), - *((unsigned long *)__get_cpu_var(mce_banks_owned)), - *((unsigned long *)__get_cpu_var(no_cmci_banks))); + *((unsigned long *)__get_cpu_var(mce_banks_owned)->bank_map), + *((unsigned long *)__get_cpu_var(no_cmci_banks)->bank_map)); } /* @@ -808,12 +813,12 @@ static void clear_cmci(void) for (i = 0; i < nr_mce_banks; i++) { unsigned msr = MSR_IA32_MC0_CTL2 + i; u64 val; - if (!test_bit(i, __get_cpu_var(mce_banks_owned))) + if (!mcabanks_test(i, __get_cpu_var(mce_banks_owned))) continue; rdmsrl(msr, val); if (val & (CMCI_EN|CMCI_THRESHOLD_MASK)) wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK)); - clear_bit(i, __get_cpu_var(mce_banks_owned)); + mcabanks_clear(i, __get_cpu_var(mce_banks_owned)); } } @@ -882,16 +887,44 @@ fastcall void smp_cmci_interrupt(struct void mce_intel_feature_init(struct cpuinfo_x86 *c) { - #ifdef CONFIG_X86_MCE_THERMAL intel_init_thermal(c); #endif intel_init_cmci(c); } -static void _mce_cap_init(struct cpuinfo_x86 *c) -{ - u32 l = mce_cap_init(); +static int mce_is_broadcast(struct cpuinfo_x86 *c) +{ + if (mce_force_broadcast) + return 1; + + /* According to Intel SDM Dec, 2009, 15.10.4.1, For processors with + * DisplayFamily_DisplayModel encoding of 06H_EH and above, + * a MCA signal is broadcast to all logical processors in the system + */ + if (c->x86_vendor == X86_VENDOR_INTEL && c->x86 == 6 && + c->x86_model >= 0xe) + return 1; + return 0; +} + +static void intel_mca_cap_init(struct cpuinfo_x86 *c) +{ + static int broadcast_check; + int broadcast; + u32 l, h; + + broadcast = mce_is_broadcast(c); + if (broadcast_check && (broadcast != mce_broadcast) ) + dprintk(XENLOG_INFO, + "CPUs have mixed broadcast support" + "may cause undetermined result!!!\n"); + + broadcast_check = 1; + if (broadcast) + mce_broadcast = broadcast; + + rdmsr(MSR_IA32_MCG_CAP, l, h); if ((l & MCG_CMCI_P) && cpu_has_apic) cmci_support = 1; @@ -916,8 +949,6 @@ static void mce_init(void) mctelem_cookie_t mctc; struct mca_summary bs; - clear_in_cr4(X86_CR4_MCE); - mce_barrier_init(&mce_inside_bar); mce_barrier_init(&mce_severity_bar); mce_barrier_init(&mce_trap_bar); @@ -933,8 +964,6 @@ static void mce_init(void) x86_mcinfo_dump(mctelem_dataptr(mctc)); mctelem_commit(mctc); } - - set_in_cr4(X86_CR4_MCE); for (i = firstbank; i < nr_mce_banks; i++) { @@ -953,10 +982,35 @@ static void mce_init(void) wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); } +static int init_mca_banks(void) +{ + struct mca_banks *mb1, *mb2, * mb3; + + mb1 = mcabanks_alloc(); + mb2 = mcabanks_alloc(); + mb3 = mcabanks_alloc(); + if (!mb1 || !mb2 || !mb3) + goto out; + + __get_cpu_var(mce_clear_banks) = mb1; + __get_cpu_var(no_cmci_banks) = mb2; + __get_cpu_var(mce_banks_owned) = mb3; + + return 0; +out: + mcabanks_free(mb1); + mcabanks_free(mb2); + mcabanks_free(mb3); + return -ENOMEM; +} + /* p4/p6 family have similar MCA initialization process */ enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c) { - _mce_cap_init(c); + if (init_mca_banks()) + return mcheck_none; + + intel_mca_cap_init(c); /* machine check is available */ x86_mce_vector_register(intel_machine_check); @@ -974,17 +1028,14 @@ enum mcheck_type intel_mcheck_init(struc int intel_mce_wrmsr(uint32_t msr, uint64_t val) { - int ret = 1; - - switch ( msr ) + int ret = 0; + + if (msr > MSR_IA32_MC0_CTL2 && + msr < (MSR_IA32_MC0_CTL2 + nr_mce_banks - 1)) { - case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1: mce_printk(MCE_QUIET, "We have disabled CMCI capability, " "Guest should not write this MSR!\n"); - break; - default: - ret = 0; - break; + ret = 1; } return ret; @@ -992,17 +1043,14 @@ int intel_mce_wrmsr(uint32_t msr, uint64 int intel_mce_rdmsr(uint32_t msr, uint64_t *val) { - int ret = 1; - - switch ( msr ) + int ret = 0; + + if (msr > MSR_IA32_MC0_CTL2 && + msr < (MSR_IA32_MC0_CTL2 + nr_mce_banks - 1)) { - case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1: mce_printk(MCE_QUIET, "We have disabled CMCI capability, " "Guest should not read this MSR!\n"); - break; - default: - ret = 0; - break; + ret = 1; } return ret; diff -r 45321a57873a xen/arch/x86/cpu/mcheck/non-fatal.c --- a/xen/arch/x86/cpu/mcheck/non-fatal.c Mon Jun 07 16:41:39 2010 +0800 +++ b/xen/arch/x86/cpu/mcheck/non-fatal.c Mon Jun 07 16:41:43 2010 +0800 @@ -22,7 +22,7 @@ #include "mce.h" -DEFINE_PER_CPU(cpu_banks_t, poll_bankmask); +DEFINE_PER_CPU(struct mca_banks *, poll_bankmask); static struct timer mce_timer; #define MCE_PERIOD MILLISECS(8000) @@ -94,6 +94,9 @@ static int __init init_nonfatal_mce_chec if (mce_disabled || !mce_available(c)) return -ENODEV; + if ( __get_cpu_var(poll_bankmask) == NULL ) + return -EINVAL; + /* * Check for non-fatal errors every MCE_RATE s */ diff -r 45321a57873a xen/arch/x86/cpu/mcheck/vmce.c --- a/xen/arch/x86/cpu/mcheck/vmce.c Mon Jun 07 16:41:39 2010 +0800 +++ b/xen/arch/x86/cpu/mcheck/vmce.c Mon Jun 07 16:41:43 2010 +0800 @@ -20,6 +20,12 @@ #define dom_vmce(x) ((x)->arch.vmca_msrs) +uint64_t g_mcg_cap; + +/* Real value in physical CTL MSR */ +uint64_t h_mcg_ctl = 0UL; +uint64_t *h_mci_ctrl; + int vmce_init_msr(struct domain *d) { dom_vmce(d) = xmalloc(struct domain_mca_msrs); @@ -431,3 +437,50 @@ int vmce_domain_inject( return inject_vmce(d); } +int vmce_init(struct cpuinfo_x86 *c) +{ + u32 l, h; + u64 value; + int i; + + if ( !h_mci_ctrl ) + { + h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks); + if (!h_mci_ctrl) + { + dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n"); + return -ENOMEM; + } + /* Don''t care banks before firstbank */ + memset(h_mci_ctrl, 0xff, sizeof(h_mci_ctrl)); + for (i = firstbank; i < nr_mce_banks; i++) + rdmsrl(MSR_IA32_MC0_CTL + 4*i, h_mci_ctrl[i]); + } + + if (g_mcg_cap & MCG_CTL_P) + rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl); + + rdmsr(MSR_IA32_MCG_CAP, l, h); + value = ((u64)h << 32) | l; + /* For Guest vMCE usage */ + g_mcg_cap = value & ~MCG_CMCI_P; + + return 0; +} + +int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d) +{ + int bank_nr; + + if ( !bank || !d || !h_mci_ctrl ) + return 1; + + /* Will MCE happen in host if If host mcg_ctl is 0? */ + if ( ~d->arch.vmca_msrs->mcg_ctl & h_mcg_ctl ) + return 1; + + bank_nr = bank->mc_bank; + if (~d->arch.vmca_msrs->mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) + return 1; + return 0; +} diff -r 45321a57873a xen/arch/x86/cpu/mcheck/x86_mca.h --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Mon Jun 07 16:41:39 2010 +0800 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Mon Jun 07 16:41:43 2010 +0800 @@ -89,8 +89,37 @@ #define CMCI_THRESHOLD 0x2 #include <asm/domain.h> -typedef DECLARE_BITMAP(cpu_banks_t, MAX_NR_BANKS); -DECLARE_PER_CPU(cpu_banks_t, mce_banks_owned); + +struct mca_banks +{ + int num; + unsigned long *bank_map; +}; + +static inline void mcabanks_clear(int bit, struct mca_banks *banks) \ +{ + if (!banks || !banks->bank_map || bit >= banks->num) + return ; + clear_bit(bit, banks->bank_map); +} + +static inline void mcabanks_set(int bit, struct mca_banks* banks) +{ + if (!banks || !banks->bank_map || bit >= banks->num) + return; + set_bit(bit, banks->bank_map); +} + +static inline int mcabanks_test(int bit, struct mca_banks* banks) +{ + if (!banks || !banks->bank_map || bit >= banks->num) + return 0; + return test_bit(bit, banks->bank_map); +} + +struct mca_banks *mcabanks_alloc(void); +void mcabanks_free(struct mca_banks *banks); +extern struct mca_banks *mca_allbanks; /* Below interfaces are defined for MCA internal processing: * a. pre_handler will be called early in MCA ISR context, mainly for early diff -r 45321a57873a xen/include/asm-x86/mce.h --- a/xen/include/asm-x86/mce.h Mon Jun 07 16:41:39 2010 +0800 +++ b/xen/include/asm-x86/mce.h Mon Jun 07 16:41:43 2010 +0800 @@ -2,8 +2,6 @@ #include <public/arch-x86/xen-mca.h> #ifndef _XEN_X86_MCE_H #define _XEN_X86_MCE_H -/* Define for GUEST MCA handling */ -#define MAX_NR_BANKS 30 /* This entry is for recording bank nodes for the impacted domain, * put into impact_header list. */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Christoph Egger
2010-Jun-08 16:12 UTC
[Xen-devel] Re: [PATCH] Clean-up mcheck_init handler
Acked-By: Christoph Egger <Christoph.Egger@amd.com> On Monday 07 June 2010 10:43:12 Jiang, Yunhong wrote:> This patch cleaned the mcheck_init. > > Firstly and most importantly, the maxium MCA banks are hard-coded as > MAX_NR_BANKS, which is 30. This is not architecture correct. This patch > removes this definition, replacig the cpu_banks_t with mca_banks, and > provide some basic function, like set/clear/test/alloc/free for mcabanks_t. > > Secondly, remove the broadcast_check code to intel specific, since only > Intel platform support broadcast now. > > Thirdly, the X86_FEATURE_MCA check and CR4_MCE enable is done in every > vendor-specifc callback, that''s redundant, move it to mcheck_init. Also, we > should enable CR4_MCE only in the end of the mcheck_init, to close the > small window between CR4_enable and the mca setup. > > And we also move vmce specific code to vmce.c as vmce_init, to make code > clean. > > arch/x86/cpu/mcheck/amd_k8.c | 7 > arch/x86/cpu/mcheck/k7.c | 12 - > arch/x86/cpu/mcheck/mce.c | 293 > ++++++++++++++++++++-------------------- arch/x86/cpu/mcheck/mce.h | > 20 +- > arch/x86/cpu/mcheck/mce_intel.c | 126 +++++++++++------ > arch/x86/cpu/mcheck/non-fatal.c | 5 > arch/x86/cpu/mcheck/vmce.c | 53 +++++++ > arch/x86/cpu/mcheck/x86_mca.h | 33 ++++ > include/asm-x86/mce.h | 2 > 9 files changed, 333 insertions(+), 218 deletions(-) > > Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com> > > diff -r 45321a57873a xen/arch/x86/cpu/mcheck/amd_k8.c > --- a/xen/arch/x86/cpu/mcheck/amd_k8.c Mon Jun 07 16:41:39 2010 +0800 > +++ b/xen/arch/x86/cpu/mcheck/amd_k8.c Mon Jun 07 16:41:43 2010 +0800 > @@ -81,13 +81,8 @@ enum mcheck_type amd_k8_mcheck_init(stru > uint32_t i; > enum mcequirk_amd_flags quirkflag; > > - /* Check for PPro style MCA; our caller has confirmed MCE support. > */ - if (!cpu_has(c, X86_FEATURE_MCA)) > - return mcheck_none; > - > quirkflag = mcequirk_lookup_amd_quirkdata(c); > > - mce_cap_init(); > x86_mce_vector_register(k8_machine_check); > > for (i = 0; i < nr_mce_banks; i++) { > @@ -101,7 +96,5 @@ enum mcheck_type amd_k8_mcheck_init(stru > } > } > > - set_in_cr4(X86_CR4_MCE); > - > return mcheck_amd_k8; > } > diff -r 45321a57873a xen/arch/x86/cpu/mcheck/k7.c > --- a/xen/arch/x86/cpu/mcheck/k7.c Mon Jun 07 16:41:39 2010 +0800 > +++ b/xen/arch/x86/cpu/mcheck/k7.c Mon Jun 07 16:41:43 2010 +0800 > @@ -70,19 +70,9 @@ static fastcall void k7_machine_check(st > /* AMD K7 machine check */ > enum mcheck_type amd_k7_mcheck_init(struct cpuinfo_x86 *c) > { > - u32 l, h; > int i; > > - /* Check for PPro style MCA; our caller has confirmed MCE support. > */ - if (!cpu_has(c, X86_FEATURE_MCA)) > - return mcheck_none; > - > x86_mce_vector_register(k7_machine_check); > - > - rdmsr (MSR_IA32_MCG_CAP, l, h); > - if (l & (1<<8)) /* Control register present ? */ > - wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); > - nr_mce_banks = l & 0xff; > > /* Clear status for MC index 0 separately, we don''t touch CTL, > * as some Athlons cause spurious MCEs when its enabled. */ > @@ -92,7 +82,5 @@ enum mcheck_type amd_k7_mcheck_init(stru > wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); > } > > - set_in_cr4 (X86_CR4_MCE); > - > return mcheck_amd_k7; > } > diff -r 45321a57873a xen/arch/x86/cpu/mcheck/mce.c > --- a/xen/arch/x86/cpu/mcheck/mce.c Mon Jun 07 16:41:39 2010 +0800 > +++ b/xen/arch/x86/cpu/mcheck/mce.c Mon Jun 07 16:43:46 2010 +0800 > @@ -25,21 +25,15 @@ > > int mce_disabled; > invbool_param("mce", mce_disabled); > -static int mce_force_broadcast; > -boolean_param("mce_fb", mce_force_broadcast); > int is_mc_panic; > unsigned int nr_mce_banks; > > int mce_broadcast = 0; > -uint64_t g_mcg_cap; > - > -/* Real value in physical CTL MSR */ > -uint64_t h_mcg_ctl = 0UL; > -uint64_t *h_mci_ctrl; > int firstbank; > > static void intpose_init(void); > static void mcinfo_clear(struct mc_info *); > +struct mca_banks *mca_allbanks; > > #define SEG_PL(segsel) ((segsel) & 0x3) > #define _MC_MSRINJ_F_REQ_HWCR_WREN (1 << 16) > @@ -54,8 +48,6 @@ static int x86_mcerr(const char *msg, in > #else > #define x86_mcerr(msg, err) (err) > #endif > - > -cpu_banks_t mca_allbanks; > > int mce_verbosity; > static void __init mce_set_verbosity(char *str) > @@ -113,6 +105,36 @@ void mce_recoverable_register(mce_recove > mc_recoverable_scan = cbfunc; > } > > +struct mca_banks *mcabanks_alloc(void) > +{ > + struct mca_banks *mb; > + > + mb = xmalloc(struct mca_banks); > + if (!mb) > + return NULL; > + > + mb->bank_map = xmalloc_array(unsigned long, > + BITS_TO_LONGS(nr_mce_banks)); > + if (!mb->bank_map) > + { > + xfree(mb); > + return NULL; > + } > + > + mb->num = nr_mce_banks; > + memset(mb->bank_map, 0, sizeof(long) * BITS_TO_LONGS(nr_mce_banks)); > + > + return mb; > +} > + > +void mcabanks_free(struct mca_banks *banks) > +{ > + if (banks == NULL) > + return; > + if (banks->bank_map) > + xfree(banks->bank_map); > + xfree(banks); > +} > /* Judging whether to Clear Machine Check error bank callback handler > * According to Intel latest MCA OS Recovery Writer''s Guide, > * whether the error MCA bank needs to be cleared is decided by the > mca_source @@ -218,8 +240,8 @@ static int mca_init_global(uint32_t flag > * For Intel latest CPU, whether to clear the error bank status needs to > * be judged by the callback function defined above. > */ > -mctelem_cookie_t mcheck_mca_logout(enum mca_source who, cpu_banks_t > bankmask, - struct mca_summary *sp, cpu_banks_t* clear_bank) > +mctelem_cookie_t mcheck_mca_logout(enum mca_source who, struct mca_banks > *bankmask, + struct mca_summary *sp, struct mca_banks* clear_bank) > { > uint64_t gstatus, status; > struct mcinfo_global *mig = NULL; /* on stack */ > @@ -263,7 +285,7 @@ mctelem_cookie_t mcheck_mca_logout(enum > struct mcinfo_bank *mib; /* on stack */ > > /* Skip bank if corresponding bit in bankmask is clear */ > - if (!test_bit(i, bankmask)) > + if (!mcabanks_test(i, bankmask)) > continue; > > mca_rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status); > @@ -318,7 +340,7 @@ mctelem_cookie_t mcheck_mca_logout(enum > /* Clear status */ > mca_wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL); > else if ( who == MCA_MCE_SCAN && need_clear) > - set_bit(i, clear_bank); > + mcabanks_set(i, clear_bank); > > wmb(); > } > @@ -352,7 +374,7 @@ mctelem_cookie_t mcheck_mca_logout(enum > > /* Shared #MC handler. */ > void mcheck_cmn_handler(struct cpu_user_regs *regs, long error_code, > - cpu_banks_t bankmask) > + struct mca_banks *bankmask) > { > int xen_state_lost, dom0_state_lost, domU_state_lost; > struct vcpu *v = current; > @@ -568,13 +590,13 @@ cmn_handler_done: > } > } > > -void mcheck_mca_clearbanks(cpu_banks_t bankmask) > +void mcheck_mca_clearbanks(struct mca_banks *bankmask) > { > int i; > uint64_t status; > > for (i = 0; i < 32 && i < nr_mce_banks; i++) { > - if (!test_bit(i, bankmask)) > + if (!mcabanks_test(i, bankmask)) > continue; > mca_rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status); > if (!(status & MCi_STATUS_VAL)) > @@ -613,21 +635,6 @@ int mce_available(struct cpuinfo_x86 *c) > return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); > } > > -static int mce_is_broadcast(struct cpuinfo_x86 *c) > -{ > - if (mce_force_broadcast) > - return 1; > - > - /* According to Intel SDM Dec, 2009, 15.10.4.1, For processors with > - * DisplayFamily_DisplayModel encoding of 06H_EH and above, > - * a MCA signal is broadcast to all logical processors in the system > - */ > - if (c->x86_vendor == X86_VENDOR_INTEL && c->x86 == 6 && > - c->x86_model >= 0xe) > - return 1; > - return 0; > -} > - > /* > * Check if bank 0 is usable for MCE. It isn''t for AMD K7, > * and Intel P6 family before model 0x1a. > @@ -645,77 +652,9 @@ int mce_firstbank(struct cpuinfo_x86 *c) > return 0; > } > > -/* This has to be run for each processor */ > -void mcheck_init(struct cpuinfo_x86 *c) > -{ > - int i, broadcast; > - enum mcheck_type inited = mcheck_none; > +int show_mca_info(int inited, struct cpuinfo_x86 *c) > +{ > static enum mcheck_type g_type = mcheck_unset; > - static int broadcast_check; > - > - if (mce_disabled == 1) { > - dprintk(XENLOG_INFO, "MCE support disabled by > bootparam\n"); - return; > - } > - > - broadcast = mce_is_broadcast(c); > - if (broadcast_check && (broadcast != mce_broadcast) ) > - dprintk(XENLOG_INFO, > - "CPUs have mixed broadcast support" > - "may cause undetermined result!!!\n"); > - > - broadcast_check = 1; > - if (broadcast) > - mce_broadcast = broadcast; > - > - for (i = 0; i < MAX_NR_BANKS; i++) > - set_bit(i,mca_allbanks); > - > - /* Enforce at least MCE support in CPUID information. Individual > - * families may also need to enforce a check for MCA support. */ > - if (!cpu_has(c, X86_FEATURE_MCE)) { > - printk(XENLOG_INFO "CPU%i: No machine check support > available\n", - smp_processor_id()); > - return; > - } > - > - intpose_init(); > - mctelem_init(sizeof (struct mc_info)); > - > - switch (c->x86_vendor) { > - case X86_VENDOR_AMD: > - inited = amd_mcheck_init(c); > - break; > - > - case X86_VENDOR_INTEL: > - switch (c->x86) { > - case 6: > - case 15: > - inited = intel_mcheck_init(c); > - break; > - } > - break; > - > - default: > - break; > - } > - > - if ( !h_mci_ctrl ) > - { > - h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks); > - if (!h_mci_ctrl) > - { > - dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n"); > - return; > - } > - /* Don''t care banks before firstbank */ > - memset(h_mci_ctrl, 0xff, sizeof(h_mci_ctrl)); > - for (i = firstbank; i < nr_mce_banks; i++) > - rdmsrl(MSR_IA32_MC0_CTL + 4*i, h_mci_ctrl[i]); > - } > - if (g_mcg_cap & MCG_CTL_P) > - rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl); > - set_poll_bankmask(c); > > if (inited != g_type) { > char prefix[20]; > @@ -744,32 +683,130 @@ void mcheck_init(struct cpuinfo_x86 *c) > printk("%sNo machine check initialization\n", > prefix); break; > } > - > - g_type = inited; > - } > -} > - > -u64 mce_cap_init(void) > + g_type = inited; > + } > + > + return 0; > +} > + > +int set_poll_bankmask(struct cpuinfo_x86 *c) > +{ > + int cpu = smp_processor_id(); > + struct mca_banks *mb; > + > + mb = mcabanks_alloc(); > + if (!mb) > + return -ENOMEM; > + > + if (cmci_support && !mce_disabled) { > + mb->num = per_cpu(no_cmci_banks, cpu)->num; > + bitmap_copy(mb->bank_map, per_cpu(no_cmci_banks, cpu)->bank_map, > + nr_mce_banks); > + } > + else { > + bitmap_copy(mb->bank_map, mca_allbanks->bank_map, nr_mce_banks); > + if (mce_firstbank(c)) > + mcabanks_clear(0, mb); > + } > + per_cpu(poll_bankmask, cpu) = mb; > + > + return 0; > +} > + > +/* The perbank ctl/status init is platform specific because of AMD''s quirk > */ +int mca_cap_init(void) > { > u32 l, h; > u64 value; > > rdmsr(MSR_IA32_MCG_CAP, l, h); > value = ((u64)h << 32) | l; > - /* For Guest vMCE usage */ > - g_mcg_cap = value & ~MCG_CMCI_P; > > if (l & MCG_CTL_P) /* Control register present ? */ > wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); > > + if (nr_mce_banks && (l & MCG_CAP_COUNT) != nr_mce_banks) > + { > + dprintk(XENLOG_WARNING, "Different bank number on cpu %x\n", > + smp_processor_id()); > + return -ENODEV; > + } > nr_mce_banks = l & MCG_CAP_COUNT; > - if ( nr_mce_banks > MAX_NR_BANKS ) > + > + /* mcabanks_alloc depends on nr_mcebanks */ > + if (!mca_allbanks) > { > - printk(KERN_WARNING "MCE: exceed max mce banks\n"); > - g_mcg_cap = (g_mcg_cap & ~MCG_CAP_COUNT) | MAX_NR_BANKS; > + int i; > + > + mca_allbanks = mcabanks_alloc(); > + for ( i = 0; i < nr_mce_banks; i++) > + mcabanks_set(i, mca_allbanks); > } > > - return value; > + return mca_allbanks ? 0:-ENOMEM; > +} > + > +/* This has to be run for each processor */ > +void mcheck_init(struct cpuinfo_x86 *c) > +{ > + enum mcheck_type inited = mcheck_none; > + > + if (mce_disabled == 1) { > + dprintk(XENLOG_INFO, "MCE support disabled by > bootparam\n"); + return; > + } > + > + if (!mce_available(c)) > + { > + printk(XENLOG_INFO "CPU%i: No machine check support > available\n", + smp_processor_id()); > + return; > + } > + > + /*Hardware Enable */ > + if (mca_cap_init()) > + return; > + > + switch (c->x86_vendor) { > + case X86_VENDOR_AMD: > + inited = amd_mcheck_init(c); > + break; > + > + case X86_VENDOR_INTEL: > + switch (c->x86) { > + case 6: > + case 15: > + inited = intel_mcheck_init(c); > + break; > + } > + break; > + > + default: > + break; > + } > + > + show_mca_info(inited, c); > + if (inited == mcheck_none || inited == mcheck_unset) > + goto out; > + > + intpose_init(); > + > + mctelem_init(sizeof(struct mc_info)); > + > + vmce_init(c); > + > + /* Turn on MCE now */ > + set_in_cr4(X86_CR4_MCE); > + > + set_poll_bankmask(c); > + > + return; > +out: > + if (smp_processor_id() == 0) > + { > + mcabanks_free(mca_allbanks); > + mca_allbanks = NULL; > + } > } > > static void mcinfo_clear(struct mc_info *mi) > @@ -1040,23 +1077,6 @@ void intpose_inval(unsigned int cpu_nr, > (r) <= MSR_IA32_MC0_MISC + (nr_mce_banks - 1) * 4 && \ > ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */ > > -int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d) > -{ > - int bank_nr; > - > - if ( !bank || !d || !h_mci_ctrl ) > - return 1; > - > - /* Will MCE happen in host if If host mcg_ctl is 0? */ > - if ( ~d->arch.vmca_msrs->mcg_ctl & h_mcg_ctl ) > - return 1; > - > - bank_nr = bank->mc_bank; > - if (~d->arch.vmca_msrs->mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) > - return 1; > - return 0; > -} > - > static int x86_mc_msrinject_verify(struct xen_mc_msrinject *mci) > { > struct cpuinfo_x86 *c; > @@ -1481,19 +1501,6 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u > return ret; > } > > -void set_poll_bankmask(struct cpuinfo_x86 *c) > -{ > - > - if (cmci_support && !mce_disabled) { > - memcpy(&(__get_cpu_var(poll_bankmask)), > - &(__get_cpu_var(no_cmci_banks)), sizeof(cpu_banks_t)); > - } > - else { > - memcpy(&(get_cpu_var(poll_bankmask)), &mca_allbanks, > sizeof(cpu_banks_t)); - if (mce_firstbank(c)) > - clear_bit(0, get_cpu_var(poll_bankmask)); > - } > -} > void mc_panic(char *s) > { > is_mc_panic = 1; > diff -r 45321a57873a xen/arch/x86/cpu/mcheck/mce.h > --- a/xen/arch/x86/cpu/mcheck/mce.h Mon Jun 07 16:41:39 2010 +0800 > +++ b/xen/arch/x86/cpu/mcheck/mce.h Mon Jun 07 16:41:43 2010 +0800 > @@ -72,7 +72,7 @@ extern void x86_mce_vector_register(x86_ > > /* Common generic MCE handler that implementations may nominate > * via x86_mce_vector_register. */ > -extern void mcheck_cmn_handler(struct cpu_user_regs *, long, cpu_banks_t); > +extern void mcheck_cmn_handler(struct cpu_user_regs *, long, struct > mca_banks *); > > /* Register a handler for judging whether mce is recoverable. */ > typedef int (*mce_recoverable_t)(u64 status); > @@ -126,18 +126,17 @@ struct mca_summary { > uint32_t recoverable; > }; > > -extern cpu_banks_t mca_allbanks; > -void set_poll_bankmask(struct cpuinfo_x86 *c); > -DECLARE_PER_CPU(cpu_banks_t, poll_bankmask); > -DECLARE_PER_CPU(cpu_banks_t, no_cmci_banks); > +DECLARE_PER_CPU(struct mca_banks *, poll_bankmask); > +DECLARE_PER_CPU(struct mca_banks *, no_cmci_banks); > + > extern int cmci_support; > extern int ser_support; > extern int is_mc_panic; > extern int mce_broadcast; > -extern void mcheck_mca_clearbanks(cpu_banks_t); > +extern void mcheck_mca_clearbanks(struct mca_banks *); > > -extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, cpu_banks_t, > - struct mca_summary *, cpu_banks_t*); > +extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, struct > mca_banks *, + struct mca_summary *, struct mca_banks *); > > /* Register a callback to be made during bank telemetry logout. > * This callback is only available to those machine check handlers > @@ -170,10 +169,7 @@ int inject_vmce(struct domain *d); > int inject_vmce(struct domain *d); > int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct > mcinfo_global *global); > > -extern uint64_t g_mcg_cap; > -/* Real value in physical CTL MSR */ > -extern uint64_t h_mcg_ctl; > -extern uint64_t *h_mci_ctrl; > +extern int vmce_init(struct cpuinfo_x86 *c); > > extern unsigned int nr_mce_banks; > > diff -r 45321a57873a xen/arch/x86/cpu/mcheck/mce_intel.c > --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Jun 07 16:41:39 2010 > +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Jun 07 16:41:43 > 2010 +0800 @@ -16,10 +16,13 @@ > #include "mce.h" > #include "x86_mca.h" > > -DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned); > -DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks); > +DEFINE_PER_CPU(struct mca_banks *, mce_banks_owned); > +DEFINE_PER_CPU(struct mca_banks *, no_cmci_banks); > +DEFINE_PER_CPU(struct mca_banks *, mce_clear_banks); > int cmci_support = 0; > int ser_support = 0; > +static int mce_force_broadcast; > +boolean_param("mce_fb", mce_force_broadcast); > > static int nr_intel_ext_msrs = 0; > > @@ -532,12 +535,14 @@ static void intel_machine_check(struct c > uint64_t gstatus; > mctelem_cookie_t mctc = NULL; > struct mca_summary bs; > - cpu_banks_t clear_bank; > + struct mca_banks *clear_bank; > > mce_spin_lock(&mce_logout_lock); > > - memset( &clear_bank, 0x0, sizeof(cpu_banks_t)); > - mctc = mcheck_mca_logout(MCA_MCE_SCAN, mca_allbanks, &bs, > &clear_bank); + clear_bank = __get_cpu_var(mce_clear_banks); > + memset( clear_bank->bank_map, 0x0, > + sizeof(long) * BITS_TO_LONGS(clear_bank->num)); > + mctc = mcheck_mca_logout(MCA_MCE_SCAN, mca_allbanks, &bs, clear_bank); > > if (bs.errcnt) { > /* dump MCE error */ > @@ -703,7 +708,7 @@ static int do_cmci_discover(int i) > rdmsrl(msr, val); > /* Some other CPU already owns this bank. */ > if (val & CMCI_EN) { > - clear_bit(i, __get_cpu_var(mce_banks_owned)); > + mcabanks_clear(i, __get_cpu_var(mce_banks_owned)); > goto out; > } > > @@ -713,12 +718,12 @@ static int do_cmci_discover(int i) > > if (!(val & CMCI_EN)) { > /* This bank does not support CMCI. Polling timer has to handle > it. */ - set_bit(i, __get_cpu_var(no_cmci_banks)); > + mcabanks_set(i, __get_cpu_var(no_cmci_banks)); > return 0; > } > - set_bit(i, __get_cpu_var(mce_banks_owned)); > + mcabanks_set(i, __get_cpu_var(mce_banks_owned)); > out: > - clear_bit(i, __get_cpu_var(no_cmci_banks)); > + mcabanks_clear(i, __get_cpu_var(no_cmci_banks)); > return 1; > } > > @@ -734,7 +739,7 @@ static void cmci_discover(void) > spin_lock_irqsave(&cmci_discover_lock, flags); > > for (i = 0; i < nr_mce_banks; i++) > - if (!test_bit(i, __get_cpu_var(mce_banks_owned))) > + if (!mcabanks_test(i, __get_cpu_var(mce_banks_owned))) > do_cmci_discover(i); > > spin_unlock_irqrestore(&cmci_discover_lock, flags); > @@ -761,8 +766,8 @@ static void cmci_discover(void) > > mce_printk(MCE_VERBOSE, "CMCI: CPU%d owner_map[%lx], > no_cmci_map[%lx]\n", smp_processor_id(), > - *((unsigned long *)__get_cpu_var(mce_banks_owned)), > - *((unsigned long *)__get_cpu_var(no_cmci_banks))); > + *((unsigned long *)__get_cpu_var(mce_banks_owned)->bank_map), > + *((unsigned long *)__get_cpu_var(no_cmci_banks)->bank_map)); > } > > /* > @@ -808,12 +813,12 @@ static void clear_cmci(void) > for (i = 0; i < nr_mce_banks; i++) { > unsigned msr = MSR_IA32_MC0_CTL2 + i; > u64 val; > - if (!test_bit(i, __get_cpu_var(mce_banks_owned))) > + if (!mcabanks_test(i, __get_cpu_var(mce_banks_owned))) > continue; > rdmsrl(msr, val); > if (val & (CMCI_EN|CMCI_THRESHOLD_MASK)) > wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK)); > - clear_bit(i, __get_cpu_var(mce_banks_owned)); > + mcabanks_clear(i, __get_cpu_var(mce_banks_owned)); > } > } > > @@ -882,16 +887,44 @@ fastcall void smp_cmci_interrupt(struct > > void mce_intel_feature_init(struct cpuinfo_x86 *c) > { > - > #ifdef CONFIG_X86_MCE_THERMAL > intel_init_thermal(c); > #endif > intel_init_cmci(c); > } > > -static void _mce_cap_init(struct cpuinfo_x86 *c) > -{ > - u32 l = mce_cap_init(); > +static int mce_is_broadcast(struct cpuinfo_x86 *c) > +{ > + if (mce_force_broadcast) > + return 1; > + > + /* According to Intel SDM Dec, 2009, 15.10.4.1, For processors with > + * DisplayFamily_DisplayModel encoding of 06H_EH and above, > + * a MCA signal is broadcast to all logical processors in the system > + */ > + if (c->x86_vendor == X86_VENDOR_INTEL && c->x86 == 6 && > + c->x86_model >= 0xe) > + return 1; > + return 0; > +} > + > +static void intel_mca_cap_init(struct cpuinfo_x86 *c) > +{ > + static int broadcast_check; > + int broadcast; > + u32 l, h; > + > + broadcast = mce_is_broadcast(c); > + if (broadcast_check && (broadcast != mce_broadcast) ) > + dprintk(XENLOG_INFO, > + "CPUs have mixed broadcast support" > + "may cause undetermined result!!!\n"); > + > + broadcast_check = 1; > + if (broadcast) > + mce_broadcast = broadcast; > + > + rdmsr(MSR_IA32_MCG_CAP, l, h); > > if ((l & MCG_CMCI_P) && cpu_has_apic) > cmci_support = 1; > @@ -916,8 +949,6 @@ static void mce_init(void) > mctelem_cookie_t mctc; > struct mca_summary bs; > > - clear_in_cr4(X86_CR4_MCE); > - > mce_barrier_init(&mce_inside_bar); > mce_barrier_init(&mce_severity_bar); > mce_barrier_init(&mce_trap_bar); > @@ -933,8 +964,6 @@ static void mce_init(void) > x86_mcinfo_dump(mctelem_dataptr(mctc)); > mctelem_commit(mctc); > } > - > - set_in_cr4(X86_CR4_MCE); > > for (i = firstbank; i < nr_mce_banks; i++) > { > @@ -953,10 +982,35 @@ static void mce_init(void) > wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); > } > > +static int init_mca_banks(void) > +{ > + struct mca_banks *mb1, *mb2, * mb3; > + > + mb1 = mcabanks_alloc(); > + mb2 = mcabanks_alloc(); > + mb3 = mcabanks_alloc(); > + if (!mb1 || !mb2 || !mb3) > + goto out; > + > + __get_cpu_var(mce_clear_banks) = mb1; > + __get_cpu_var(no_cmci_banks) = mb2; > + __get_cpu_var(mce_banks_owned) = mb3; > + > + return 0; > +out: > + mcabanks_free(mb1); > + mcabanks_free(mb2); > + mcabanks_free(mb3); > + return -ENOMEM; > +} > + > /* p4/p6 family have similar MCA initialization process */ > enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c) > { > - _mce_cap_init(c); > + if (init_mca_banks()) > + return mcheck_none; > + > + intel_mca_cap_init(c); > > /* machine check is available */ > x86_mce_vector_register(intel_machine_check); > @@ -974,17 +1028,14 @@ enum mcheck_type intel_mcheck_init(struc > > int intel_mce_wrmsr(uint32_t msr, uint64_t val) > { > - int ret = 1; > - > - switch ( msr ) > + int ret = 0; > + > + if (msr > MSR_IA32_MC0_CTL2 && > + msr < (MSR_IA32_MC0_CTL2 + nr_mce_banks - 1)) > { > - case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1: > mce_printk(MCE_QUIET, "We have disabled CMCI capability, " > "Guest should not write this MSR!\n"); > - break; > - default: > - ret = 0; > - break; > + ret = 1; > } > > return ret; > @@ -992,17 +1043,14 @@ int intel_mce_wrmsr(uint32_t msr, uint64 > > int intel_mce_rdmsr(uint32_t msr, uint64_t *val) > { > - int ret = 1; > - > - switch ( msr ) > + int ret = 0; > + > + if (msr > MSR_IA32_MC0_CTL2 && > + msr < (MSR_IA32_MC0_CTL2 + nr_mce_banks - 1)) > { > - case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1: > mce_printk(MCE_QUIET, "We have disabled CMCI capability, " > "Guest should not read this MSR!\n"); > - break; > - default: > - ret = 0; > - break; > + ret = 1; > } > > return ret; > diff -r 45321a57873a xen/arch/x86/cpu/mcheck/non-fatal.c > --- a/xen/arch/x86/cpu/mcheck/non-fatal.c Mon Jun 07 16:41:39 2010 > +0800 +++ b/xen/arch/x86/cpu/mcheck/non-fatal.c Mon Jun 07 16:41:43 > 2010 +0800 @@ -22,7 +22,7 @@ > > #include "mce.h" > > -DEFINE_PER_CPU(cpu_banks_t, poll_bankmask); > +DEFINE_PER_CPU(struct mca_banks *, poll_bankmask); > static struct timer mce_timer; > > #define MCE_PERIOD MILLISECS(8000) > @@ -94,6 +94,9 @@ static int __init init_nonfatal_mce_chec > if (mce_disabled || !mce_available(c)) > return -ENODEV; > > + if ( __get_cpu_var(poll_bankmask) == NULL ) > + return -EINVAL; > + > /* > * Check for non-fatal errors every MCE_RATE s > */ > diff -r 45321a57873a xen/arch/x86/cpu/mcheck/vmce.c > --- a/xen/arch/x86/cpu/mcheck/vmce.c Mon Jun 07 16:41:39 2010 +0800 > +++ b/xen/arch/x86/cpu/mcheck/vmce.c Mon Jun 07 16:41:43 2010 +0800 > @@ -20,6 +20,12 @@ > > #define dom_vmce(x) ((x)->arch.vmca_msrs) > > +uint64_t g_mcg_cap; > + > +/* Real value in physical CTL MSR */ > +uint64_t h_mcg_ctl = 0UL; > +uint64_t *h_mci_ctrl; > + > int vmce_init_msr(struct domain *d) > { > dom_vmce(d) = xmalloc(struct domain_mca_msrs); > @@ -431,3 +437,50 @@ int vmce_domain_inject( > return inject_vmce(d); > } > > +int vmce_init(struct cpuinfo_x86 *c) > +{ > + u32 l, h; > + u64 value; > + int i; > + > + if ( !h_mci_ctrl ) > + { > + h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks); > + if (!h_mci_ctrl) > + { > + dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n"); > + return -ENOMEM; > + } > + /* Don''t care banks before firstbank */ > + memset(h_mci_ctrl, 0xff, sizeof(h_mci_ctrl)); > + for (i = firstbank; i < nr_mce_banks; i++) > + rdmsrl(MSR_IA32_MC0_CTL + 4*i, h_mci_ctrl[i]); > + } > + > + if (g_mcg_cap & MCG_CTL_P) > + rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl); > + > + rdmsr(MSR_IA32_MCG_CAP, l, h); > + value = ((u64)h << 32) | l; > + /* For Guest vMCE usage */ > + g_mcg_cap = value & ~MCG_CMCI_P; > + > + return 0; > +} > + > +int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d) > +{ > + int bank_nr; > + > + if ( !bank || !d || !h_mci_ctrl ) > + return 1; > + > + /* Will MCE happen in host if If host mcg_ctl is 0? */ > + if ( ~d->arch.vmca_msrs->mcg_ctl & h_mcg_ctl ) > + return 1; > + > + bank_nr = bank->mc_bank; > + if (~d->arch.vmca_msrs->mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] ) > + return 1; > + return 0; > +} > diff -r 45321a57873a xen/arch/x86/cpu/mcheck/x86_mca.h > --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Mon Jun 07 16:41:39 2010 +0800 > +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Mon Jun 07 16:41:43 2010 +0800 > @@ -89,8 +89,37 @@ > #define CMCI_THRESHOLD 0x2 > > #include <asm/domain.h> > -typedef DECLARE_BITMAP(cpu_banks_t, MAX_NR_BANKS); > -DECLARE_PER_CPU(cpu_banks_t, mce_banks_owned); > + > +struct mca_banks > +{ > + int num; > + unsigned long *bank_map; > +}; > + > +static inline void mcabanks_clear(int bit, struct mca_banks *banks) \ > +{ > + if (!banks || !banks->bank_map || bit >= banks->num) > + return ; > + clear_bit(bit, banks->bank_map); > +} > + > +static inline void mcabanks_set(int bit, struct mca_banks* banks) > +{ > + if (!banks || !banks->bank_map || bit >= banks->num) > + return; > + set_bit(bit, banks->bank_map); > +} > + > +static inline int mcabanks_test(int bit, struct mca_banks* banks) > +{ > + if (!banks || !banks->bank_map || bit >= banks->num) > + return 0; > + return test_bit(bit, banks->bank_map); > +} > + > +struct mca_banks *mcabanks_alloc(void); > +void mcabanks_free(struct mca_banks *banks); > +extern struct mca_banks *mca_allbanks; > > /* Below interfaces are defined for MCA internal processing: > * a. pre_handler will be called early in MCA ISR context, mainly for > early diff -r 45321a57873a xen/include/asm-x86/mce.h > --- a/xen/include/asm-x86/mce.h Mon Jun 07 16:41:39 2010 +0800 > +++ b/xen/include/asm-x86/mce.h Mon Jun 07 16:41:43 2010 +0800 > @@ -2,8 +2,6 @@ > #include <public/arch-x86/xen-mca.h> > #ifndef _XEN_X86_MCE_H > #define _XEN_X86_MCE_H > -/* Define for GUEST MCA handling */ > -#define MAX_NR_BANKS 30 > > /* This entry is for recording bank nodes for the impacted domain, > * put into impact_header list. */-- ---to satisfy European Law for business letters: Advanced Micro Devices GmbH Einsteinring 24, 85609 Dornach b. Muenchen Geschaeftsfuehrer: Andrew Bowd, Thomas M. McCoy, Giuliano Meroni Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen Registergericht Muenchen, HRB Nr. 43632 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel