Liu, Jinsong
2011-May-08 11:23 UTC
[Xen-devel] [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension This patch simplify mce_action(). Basically the 2 set of mce status flags MCA_... and MCER_... are highly functional similar. This patch remove the redundant middle-level flag MCA_..., and its related data structure and function, so that mce_action() logic is more clean and simpler. This patch also update mem err handler. intel_memerr_dhandler() will be commonly used by SRAO and SRAR, so for the extension of future SRAR case, this patch remove the default fail flag from intel_memerr_dhandler() outside to SRAO/SRAR level, since only SRAO/SRAR handler itself has the knowledge of how to handle the failure when mem err handler fail. Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/mce_intel.c --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 13:39:40 2011 +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 18:15:13 2011 +0800 @@ -172,23 +172,14 @@ static unsigned int __read_mostly mce_dh static unsigned int __read_mostly mce_dhandler_num; static unsigned int __read_mostly mce_uhandler_num; -enum mce_result -{ - MCER_NOERROR, - MCER_RECOVERED, - /* Not recoverd, but can continue */ - MCER_CONTINUE, - MCER_RESET, -}; - /* Maybe called in MCE context, no lock, no printk */ static enum mce_result mce_action(struct cpu_user_regs *regs, mctelem_cookie_t mctc) { struct mc_info *local_mi; - enum mce_result ret = MCER_NOERROR; + enum mce_result bank_result = MCER_NOERROR; + enum mce_result worst_result = MCER_NOERROR; struct mcinfo_common *mic = NULL; - struct mca_handle_result mca_res; struct mca_binfo binfo; const struct mca_error_handler *handlers = mce_dhandlers; unsigned int i, handler_num = mce_dhandler_num; @@ -217,7 +208,7 @@ static enum mce_result mce_action(struct /* Processing bank information */ x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK); - for ( ; ret != MCER_RESET && mic && mic->size; + for ( ; bank_result != MCER_RESET && mic && mic->size; mic = x86_mcinfo_next(mic) ) { if (mic->type != MC_TYPE_BANK) { @@ -225,34 +216,20 @@ static enum mce_result mce_action(struct } binfo.mib = (struct mcinfo_bank*)mic; binfo.bank = binfo.mib->mc_bank; - memset(&mca_res, 0x0f, sizeof(mca_res)); + bank_result = MCER_NOERROR; for ( i = 0; i < handler_num; i++ ) { if (handlers[i].owned_error(binfo.mib->mc_status)) { - handlers[i].recovery_handler(&binfo, &mca_res); - - if (mca_res.result & MCA_OWNER) - binfo.mib->mc_domid = mca_res.owner; - - if (mca_res.result == MCA_NEED_RESET) - ret = MCER_RESET; - else if (mca_res.result == MCA_RECOVERED) - { - if (ret < MCER_RECOVERED) - ret = MCER_RECOVERED; - } - else if (mca_res.result == MCA_NO_ACTION) - { - if (ret < MCER_CONTINUE) - ret = MCER_CONTINUE; - } + handlers[i].recovery_handler(&binfo, &bank_result); + if (worst_result < bank_result) + worst_result = bank_result; break; } } ASSERT(i != handler_num); } - return ret; + return worst_result; } /* @@ -608,7 +585,7 @@ struct mcinfo_recovery *mci_add_pageoff_ static void intel_memerr_dhandler( struct mca_binfo *binfo, - struct mca_handle_result *result) + enum mce_result *result) { struct mcinfo_bank *bank = binfo->mib; struct mcinfo_global *global = binfo->mig; @@ -618,7 +595,6 @@ static void intel_memerr_dhandler( uint64_t mc_status, mc_misc; mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n"); - result->result = MCA_NEED_RESET; mc_status = bank->mc_status; mc_misc = bank->mc_misc; @@ -626,7 +602,6 @@ static void intel_memerr_dhandler( !(mc_status & MCi_STATUS_MISCV) || ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) ) { - result->result |= MCA_NO_ACTION; dprintk(XENLOG_WARNING, "No physical address provided for memory error\n"); return; @@ -644,23 +619,19 @@ static void intel_memerr_dhandler( /* This is free page */ if (status & PG_OFFLINE_OFFLINED) - result->result = MCA_RECOVERED; + *result = MCER_RECOVERED; else if (status & PG_OFFLINE_PENDING) { /* This page has owner */ if (status & PG_OFFLINE_OWNED) { - result->result |= MCA_OWNER; - result->owner = status >> PG_OFFLINE_OWNER_SHIFT; + bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT; mce_printk(MCE_QUIET, "MCE: This error page is ownded" - " by DOM %d\n", result->owner); - /* Fill vMCE# injection and vMCE# MSR virtualization " - * "related data */ - bank->mc_domid = result->owner; + " by DOM %d\n", bank->mc_domid); /* XXX: Cannot handle shared pages yet * (this should identify all domains and gfn mapping to * the mfn in question) */ - BUG_ON( result->owner == DOMID_COW ); - if ( result->owner != DOMID_XEN ) { - d = get_domain_by_id(result->owner); + BUG_ON( bank->mc_domid == DOMID_COW ); + if ( bank->mc_domid != DOMID_XEN ) { + d = get_domain_by_id(bank->mc_domid); ASSERT(d); gfn = get_gpfn_from_mfn((bank->mc_addr) >> PAGE_SHIFT); @@ -683,7 +654,7 @@ static void intel_memerr_dhandler( global->mc_gstatus) == -1 ) { mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d " - "failed\n", result->owner); + "failed\n", bank->mc_domid); goto vmce_failed; } @@ -699,7 +670,7 @@ static void intel_memerr_dhandler( * For xen, it has contained the error and finished * its own recovery job. */ - result->result = MCA_RECOVERED; + *result = MCER_RECOVERED; put_domain(d); return; @@ -718,11 +689,13 @@ static int intel_srao_check(uint64_t sta static void intel_srao_dhandler( struct mca_binfo *binfo, - struct mca_handle_result *result) + enum mce_result *result) { uint64_t status = binfo->mib->mc_status; /* For unknown srao error code, no action required */ + *result = MCER_CONTINUE; + if ( status & MCi_STATUS_VAL ) { switch ( status & INTEL_MCCOD_MASK ) @@ -744,7 +717,7 @@ static int intel_default_check(uint64_t static void intel_default_mce_dhandler( struct mca_binfo *binfo, - struct mca_handle_result *result) + enum mce_result *result) { uint64_t status = binfo->mib->mc_status; enum intel_mce_type type; @@ -752,9 +725,9 @@ static void intel_default_mce_dhandler( type = intel_check_mce_type(status); if (type == intel_mce_fatal || type == intel_mce_ucr_srar) - result->result = MCA_NEED_RESET; - else if (type == intel_mce_ucr_srao) - result->result = MCA_NO_ACTION; + *result = MCER_RESET; + else + *result = MCER_CONTINUE; } static const struct mca_error_handler intel_mce_dhandlers[] = { @@ -764,7 +737,7 @@ static const struct mca_error_handler in static void intel_default_mce_uhandler( struct mca_binfo *binfo, - struct mca_handle_result *result) + enum mce_result *result) { uint64_t status = binfo->mib->mc_status; enum intel_mce_type type; @@ -776,10 +749,10 @@ static void intel_default_mce_uhandler( /* Panic if no handler for SRAR error */ case intel_mce_ucr_srar: case intel_mce_fatal: - result->result = MCA_NEED_RESET; + *result = MCER_RESET; break; default: - result->result = MCA_NO_ACTION; + *result = MCER_CONTINUE; break; } } diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/x86_mca.h --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 13:39:40 2011 +0800 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 18:15:13 2011 +0800 @@ -124,36 +124,6 @@ void mcabanks_free(struct mca_banks *ban void mcabanks_free(struct mca_banks *banks); extern struct mca_banks *mca_allbanks; -/* Below interfaces are defined for MCA internal processing: - * a. pre_handler will be called early in MCA ISR context, mainly for early - * need_reset detection for avoiding log missing. Also, it is used to judge - * impacted DOMAIN if possible. - * b. mca_error_handler is actually a (error_action_index, - * recovery_hanlder pointer) pair. The defined recovery_handler - * performs the actual recovery operations such as page_offline, cpu_offline - * in softIRQ context when the per_bank MCA error matching the corresponding - * mca_code index. If pre_handler can''t judge the impacted domain, - * recovery_handler must figure it out. -*/ - -/* MCA error has been recovered successfully by the recovery action*/ -#define MCA_RECOVERED (0x1 << 0) -/* MCA error impact the specified DOMAIN in owner field below */ -#define MCA_OWNER (0x1 << 1) -/* MCA error can''t be recovered and need reset */ -#define MCA_NEED_RESET (0x1 << 2) -/* MCA error did not have any action yet */ -#define MCA_NO_ACTION (0x1 << 3) - -struct mca_handle_result -{ - uint32_t result; - /* Used one result & MCA_OWNER */ - domid_t owner; - /* Used by mca_error_handler, result & MCA_RECOVRED */ - struct recovery_action *action; -}; - /*Keep bank so that we can get staus even if mib is NULL */ struct mca_binfo { int bank; @@ -163,8 +133,14 @@ struct mca_binfo { struct cpu_user_regs *regs; }; -extern void (*mca_prehandler)( struct cpu_user_regs *regs, - struct mca_handle_result *result); +enum mce_result +{ + MCER_NOERROR, + MCER_RECOVERED, + /* Not recoverd, but can continue */ + MCER_CONTINUE, + MCER_RESET, +}; struct mca_error_handler { @@ -175,7 +151,7 @@ struct mca_error_handler */ int (*owned_error)(uint64_t status); void (*recovery_handler)(struct mca_binfo *binfo, - struct mca_handle_result *result); + enum mce_result *result); }; /* Global variables */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Liu, Jinsong
2011-May-13 14:34 UTC
RE: [Xen-devel] [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
Jan and Keir, Any comments? Thanks Jinsong Liu, Jinsong wrote:> MCA: simplify mce actoin, and some update of mem err handler for > future SRAR extension > > This patch simplify mce_action(). Basically the 2 set of mce status > flags MCA_... and MCER_... are highly functional similar. This patch > remove the redundant middle-level flag MCA_..., and its related data > structure and function, so that mce_action() logic is more clean and > simpler. > This patch also update mem err handler. intel_memerr_dhandler() will > be commonly used by SRAO and SRAR, so for the extension of future > SRAR case, this patch remove the default fail flag from > intel_memerr_dhandler() outside to SRAO/SRAR level, since only > SRAO/SRAR handler itself has the knowledge of how to handle the > failure when mem err handler fail. > > Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> > > diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/mce_intel.c > --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 13:39:40 2011 > +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 18:15:13 > 2011 +0800 @@ -172,23 +172,14 @@ static unsigned int __read_mostly > mce_dh static unsigned int __read_mostly mce_dhandler_num; > static unsigned int __read_mostly mce_uhandler_num; > > -enum mce_result > -{ > - MCER_NOERROR, > - MCER_RECOVERED, > - /* Not recoverd, but can continue */ > - MCER_CONTINUE, > - MCER_RESET, > -}; > - > /* Maybe called in MCE context, no lock, no printk */ > static enum mce_result mce_action(struct cpu_user_regs *regs, > mctelem_cookie_t mctc) > { > struct mc_info *local_mi; > - enum mce_result ret = MCER_NOERROR; > + enum mce_result bank_result = MCER_NOERROR; > + enum mce_result worst_result = MCER_NOERROR; > struct mcinfo_common *mic = NULL; > - struct mca_handle_result mca_res; > struct mca_binfo binfo; > const struct mca_error_handler *handlers = mce_dhandlers; > unsigned int i, handler_num = mce_dhandler_num; > @@ -217,7 +208,7 @@ static enum mce_result mce_action(struct > /* Processing bank information */ > x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK); > > - for ( ; ret != MCER_RESET && mic && mic->size; > + for ( ; bank_result != MCER_RESET && mic && mic->size; > mic = x86_mcinfo_next(mic) ) > { > if (mic->type != MC_TYPE_BANK) { > @@ -225,34 +216,20 @@ static enum mce_result mce_action(struct > } > binfo.mib = (struct mcinfo_bank*)mic; > binfo.bank = binfo.mib->mc_bank; > - memset(&mca_res, 0x0f, sizeof(mca_res)); > + bank_result = MCER_NOERROR; > for ( i = 0; i < handler_num; i++ ) { > if (handlers[i].owned_error(binfo.mib->mc_status)) > { > - handlers[i].recovery_handler(&binfo, &mca_res); > - > - if (mca_res.result & MCA_OWNER) > - binfo.mib->mc_domid = mca_res.owner; > - > - if (mca_res.result == MCA_NEED_RESET) > - ret = MCER_RESET; > - else if (mca_res.result == MCA_RECOVERED) > - { > - if (ret < MCER_RECOVERED) > - ret = MCER_RECOVERED; > - } > - else if (mca_res.result == MCA_NO_ACTION) > - { > - if (ret < MCER_CONTINUE) > - ret = MCER_CONTINUE; > - } > + handlers[i].recovery_handler(&binfo, &bank_result); > + if (worst_result < bank_result) > + worst_result = bank_result; > break; > } > } > ASSERT(i != handler_num); > } > > - return ret; > + return worst_result; > } > > /* > @@ -608,7 +585,7 @@ struct mcinfo_recovery *mci_add_pageoff_ > > static void intel_memerr_dhandler( > struct mca_binfo *binfo, > - struct mca_handle_result *result) > + enum mce_result *result) > { > struct mcinfo_bank *bank = binfo->mib; > struct mcinfo_global *global = binfo->mig; > @@ -618,7 +595,6 @@ static void intel_memerr_dhandler( > uint64_t mc_status, mc_misc; > > mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n"); > - result->result = MCA_NEED_RESET; > > mc_status = bank->mc_status; > mc_misc = bank->mc_misc; > @@ -626,7 +602,6 @@ static void intel_memerr_dhandler( > !(mc_status & MCi_STATUS_MISCV) || > ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) ) > { > - result->result |= MCA_NO_ACTION; > dprintk(XENLOG_WARNING, > "No physical address provided for memory error\n"); > return; > @@ -644,23 +619,19 @@ static void intel_memerr_dhandler( > > /* This is free page */ > if (status & PG_OFFLINE_OFFLINED) > - result->result = MCA_RECOVERED; > + *result = MCER_RECOVERED; > else if (status & PG_OFFLINE_PENDING) { > /* This page has owner */ > if (status & PG_OFFLINE_OWNED) { > - result->result |= MCA_OWNER; > - result->owner = status >> PG_OFFLINE_OWNER_SHIFT; > + bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT; > mce_printk(MCE_QUIET, "MCE: This error page is ownded" > - " by DOM %d\n", result->owner); > - /* Fill vMCE# injection and vMCE# MSR virtualization " > - * "related data */ > - bank->mc_domid = result->owner; > + " by DOM %d\n", bank->mc_domid); > /* XXX: Cannot handle shared pages yet > * (this should identify all domains and gfn mapping to > * the mfn in question) */ > - BUG_ON( result->owner == DOMID_COW ); > - if ( result->owner != DOMID_XEN ) { > - d = get_domain_by_id(result->owner); > + BUG_ON( bank->mc_domid == DOMID_COW ); > + if ( bank->mc_domid != DOMID_XEN ) { > + d = get_domain_by_id(bank->mc_domid); > ASSERT(d); > gfn = get_gpfn_from_mfn((bank->mc_addr) >> > PAGE_SHIFT); > > @@ -683,7 +654,7 @@ static void intel_memerr_dhandler( > global->mc_gstatus) == -1 ) > { > mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d > " - "failed\n", result->owner); > + "failed\n", bank->mc_domid); > goto vmce_failed; > } > > @@ -699,7 +670,7 @@ static void intel_memerr_dhandler( > * For xen, it has contained the error and finished > * its own recovery job. > */ > - result->result = MCA_RECOVERED; > + *result = MCER_RECOVERED; > put_domain(d); > > return; > @@ -718,11 +689,13 @@ static int intel_srao_check(uint64_t sta > > static void intel_srao_dhandler( > struct mca_binfo *binfo, > - struct mca_handle_result *result) > + enum mce_result *result) > { > uint64_t status = binfo->mib->mc_status; > > /* For unknown srao error code, no action required */ > + *result = MCER_CONTINUE; > + > if ( status & MCi_STATUS_VAL ) > { > switch ( status & INTEL_MCCOD_MASK ) > @@ -744,7 +717,7 @@ static int intel_default_check(uint64_t > > static void intel_default_mce_dhandler( > struct mca_binfo *binfo, > - struct mca_handle_result *result) > + enum mce_result *result) > { > uint64_t status = binfo->mib->mc_status; > enum intel_mce_type type; > @@ -752,9 +725,9 @@ static void intel_default_mce_dhandler( > type = intel_check_mce_type(status); > > if (type == intel_mce_fatal || type == intel_mce_ucr_srar) > - result->result = MCA_NEED_RESET; > - else if (type == intel_mce_ucr_srao) > - result->result = MCA_NO_ACTION; > + *result = MCER_RESET; > + else > + *result = MCER_CONTINUE; > } > > static const struct mca_error_handler intel_mce_dhandlers[] = { > @@ -764,7 +737,7 @@ static const struct mca_error_handler in > > static void intel_default_mce_uhandler( > struct mca_binfo *binfo, > - struct mca_handle_result *result) > + enum mce_result *result) > { > uint64_t status = binfo->mib->mc_status; > enum intel_mce_type type; > @@ -776,10 +749,10 @@ static void intel_default_mce_uhandler( > /* Panic if no handler for SRAR error */ > case intel_mce_ucr_srar: > case intel_mce_fatal: > - result->result = MCA_NEED_RESET; > + *result = MCER_RESET; > break; > default: > - result->result = MCA_NO_ACTION; > + *result = MCER_CONTINUE; > break; > } > } > diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/x86_mca.h > --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 13:39:40 2011 +0800 > +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 18:15:13 2011 +0800 > @@ -124,36 +124,6 @@ void mcabanks_free(struct mca_banks *ban > void mcabanks_free(struct mca_banks *banks); > extern struct mca_banks *mca_allbanks; > > -/* Below interfaces are defined for MCA internal processing: > - * a. pre_handler will be called early in MCA ISR context, mainly > for early > - * need_reset detection for avoiding log missing. Also, it is > used to judge > - * impacted DOMAIN if possible. > - * b. mca_error_handler is actually a (error_action_index, > - * recovery_hanlder pointer) pair. The defined recovery_handler > - * performs the actual recovery operations such as page_offline, > cpu_offline > - * in softIRQ context when the per_bank MCA error matching the > corresponding > - * mca_code index. If pre_handler can''t judge the impacted domain, > - * recovery_handler must figure it out. > -*/ > - > -/* MCA error has been recovered successfully by the recovery action*/ > -#define MCA_RECOVERED (0x1 << 0) > -/* MCA error impact the specified DOMAIN in owner field below */ > -#define MCA_OWNER (0x1 << 1) > -/* MCA error can''t be recovered and need reset */ > -#define MCA_NEED_RESET (0x1 << 2) > -/* MCA error did not have any action yet */ > -#define MCA_NO_ACTION (0x1 << 3) > - > -struct mca_handle_result > -{ > - uint32_t result; > - /* Used one result & MCA_OWNER */ > - domid_t owner; > - /* Used by mca_error_handler, result & MCA_RECOVRED */ > - struct recovery_action *action; > -}; > - > /*Keep bank so that we can get staus even if mib is NULL */ > struct mca_binfo { > int bank; > @@ -163,8 +133,14 @@ struct mca_binfo { > struct cpu_user_regs *regs; > }; > > -extern void (*mca_prehandler)( struct cpu_user_regs *regs, > - struct mca_handle_result *result); > +enum mce_result > +{ > + MCER_NOERROR, > + MCER_RECOVERED, > + /* Not recoverd, but can continue */ > + MCER_CONTINUE, > + MCER_RESET, > +}; > > struct mca_error_handler > { > @@ -175,7 +151,7 @@ struct mca_error_handler > */ > int (*owned_error)(uint64_t status); > void (*recovery_handler)(struct mca_binfo *binfo, > - struct mca_handle_result *result); > + enum mce_result *result); > }; > > /* Global variables */_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2011-May-14 09:45 UTC
Re: [Xen-devel] [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
I don''t seem to have this one in my queue. You should resend. -- Keir On 13/05/2011 15:34, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:> Jan and Keir, > > Any comments? > > Thanks > Jinsong > > > Liu, Jinsong wrote: >> MCA: simplify mce actoin, and some update of mem err handler for >> future SRAR extension >> >> This patch simplify mce_action(). Basically the 2 set of mce status >> flags MCA_... and MCER_... are highly functional similar. This patch >> remove the redundant middle-level flag MCA_..., and its related data >> structure and function, so that mce_action() logic is more clean and >> simpler. >> This patch also update mem err handler. intel_memerr_dhandler() will >> be commonly used by SRAO and SRAR, so for the extension of future >> SRAR case, this patch remove the default fail flag from >> intel_memerr_dhandler() outside to SRAO/SRAR level, since only >> SRAO/SRAR handler itself has the knowledge of how to handle the >> failure when mem err handler fail. >> >> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> >> >> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/mce_intel.c >> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 13:39:40 2011 >> +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 18:15:13 >> 2011 +0800 @@ -172,23 +172,14 @@ static unsigned int __read_mostly >> mce_dh static unsigned int __read_mostly mce_dhandler_num; >> static unsigned int __read_mostly mce_uhandler_num; >> >> -enum mce_result >> -{ >> - MCER_NOERROR, >> - MCER_RECOVERED, >> - /* Not recoverd, but can continue */ >> - MCER_CONTINUE, >> - MCER_RESET, >> -}; >> - >> /* Maybe called in MCE context, no lock, no printk */ >> static enum mce_result mce_action(struct cpu_user_regs *regs, >> mctelem_cookie_t mctc) >> { >> struct mc_info *local_mi; >> - enum mce_result ret = MCER_NOERROR; >> + enum mce_result bank_result = MCER_NOERROR; >> + enum mce_result worst_result = MCER_NOERROR; >> struct mcinfo_common *mic = NULL; >> - struct mca_handle_result mca_res; >> struct mca_binfo binfo; >> const struct mca_error_handler *handlers = mce_dhandlers; >> unsigned int i, handler_num = mce_dhandler_num; >> @@ -217,7 +208,7 @@ static enum mce_result mce_action(struct >> /* Processing bank information */ >> x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK); >> >> - for ( ; ret != MCER_RESET && mic && mic->size; >> + for ( ; bank_result != MCER_RESET && mic && mic->size; >> mic = x86_mcinfo_next(mic) ) >> { >> if (mic->type != MC_TYPE_BANK) { >> @@ -225,34 +216,20 @@ static enum mce_result mce_action(struct >> } >> binfo.mib = (struct mcinfo_bank*)mic; >> binfo.bank = binfo.mib->mc_bank; >> - memset(&mca_res, 0x0f, sizeof(mca_res)); >> + bank_result = MCER_NOERROR; >> for ( i = 0; i < handler_num; i++ ) { >> if (handlers[i].owned_error(binfo.mib->mc_status)) >> { >> - handlers[i].recovery_handler(&binfo, &mca_res); >> - >> - if (mca_res.result & MCA_OWNER) >> - binfo.mib->mc_domid = mca_res.owner; >> - >> - if (mca_res.result == MCA_NEED_RESET) >> - ret = MCER_RESET; >> - else if (mca_res.result == MCA_RECOVERED) >> - { >> - if (ret < MCER_RECOVERED) >> - ret = MCER_RECOVERED; >> - } >> - else if (mca_res.result == MCA_NO_ACTION) >> - { >> - if (ret < MCER_CONTINUE) >> - ret = MCER_CONTINUE; >> - } >> + handlers[i].recovery_handler(&binfo, &bank_result); >> + if (worst_result < bank_result) >> + worst_result = bank_result; >> break; >> } >> } >> ASSERT(i != handler_num); >> } >> >> - return ret; >> + return worst_result; >> } >> >> /* >> @@ -608,7 +585,7 @@ struct mcinfo_recovery *mci_add_pageoff_ >> >> static void intel_memerr_dhandler( >> struct mca_binfo *binfo, >> - struct mca_handle_result *result) >> + enum mce_result *result) >> { >> struct mcinfo_bank *bank = binfo->mib; >> struct mcinfo_global *global = binfo->mig; >> @@ -618,7 +595,6 @@ static void intel_memerr_dhandler( >> uint64_t mc_status, mc_misc; >> >> mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n"); >> - result->result = MCA_NEED_RESET; >> >> mc_status = bank->mc_status; >> mc_misc = bank->mc_misc; >> @@ -626,7 +602,6 @@ static void intel_memerr_dhandler( >> !(mc_status & MCi_STATUS_MISCV) || >> ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) ) >> { >> - result->result |= MCA_NO_ACTION; >> dprintk(XENLOG_WARNING, >> "No physical address provided for memory error\n"); >> return; >> @@ -644,23 +619,19 @@ static void intel_memerr_dhandler( >> >> /* This is free page */ >> if (status & PG_OFFLINE_OFFLINED) >> - result->result = MCA_RECOVERED; >> + *result = MCER_RECOVERED; >> else if (status & PG_OFFLINE_PENDING) { >> /* This page has owner */ >> if (status & PG_OFFLINE_OWNED) { >> - result->result |= MCA_OWNER; >> - result->owner = status >> PG_OFFLINE_OWNER_SHIFT; >> + bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT; >> mce_printk(MCE_QUIET, "MCE: This error page is ownded" >> - " by DOM %d\n", result->owner); >> - /* Fill vMCE# injection and vMCE# MSR virtualization " >> - * "related data */ >> - bank->mc_domid = result->owner; >> + " by DOM %d\n", bank->mc_domid); >> /* XXX: Cannot handle shared pages yet >> * (this should identify all domains and gfn mapping to >> * the mfn in question) */ >> - BUG_ON( result->owner == DOMID_COW ); >> - if ( result->owner != DOMID_XEN ) { >> - d = get_domain_by_id(result->owner); >> + BUG_ON( bank->mc_domid == DOMID_COW ); >> + if ( bank->mc_domid != DOMID_XEN ) { >> + d = get_domain_by_id(bank->mc_domid); >> ASSERT(d); >> gfn = get_gpfn_from_mfn((bank->mc_addr) >> >> PAGE_SHIFT); >> >> @@ -683,7 +654,7 @@ static void intel_memerr_dhandler( >> global->mc_gstatus) == -1 ) >> { >> mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d >> " - "failed\n", result->owner); >> + "failed\n", bank->mc_domid); >> goto vmce_failed; >> } >> >> @@ -699,7 +670,7 @@ static void intel_memerr_dhandler( >> * For xen, it has contained the error and finished >> * its own recovery job. >> */ >> - result->result = MCA_RECOVERED; >> + *result = MCER_RECOVERED; >> put_domain(d); >> >> return; >> @@ -718,11 +689,13 @@ static int intel_srao_check(uint64_t sta >> >> static void intel_srao_dhandler( >> struct mca_binfo *binfo, >> - struct mca_handle_result *result) >> + enum mce_result *result) >> { >> uint64_t status = binfo->mib->mc_status; >> >> /* For unknown srao error code, no action required */ >> + *result = MCER_CONTINUE; >> + >> if ( status & MCi_STATUS_VAL ) >> { >> switch ( status & INTEL_MCCOD_MASK ) >> @@ -744,7 +717,7 @@ static int intel_default_check(uint64_t >> >> static void intel_default_mce_dhandler( >> struct mca_binfo *binfo, >> - struct mca_handle_result *result) >> + enum mce_result *result) >> { >> uint64_t status = binfo->mib->mc_status; >> enum intel_mce_type type; >> @@ -752,9 +725,9 @@ static void intel_default_mce_dhandler( >> type = intel_check_mce_type(status); >> >> if (type == intel_mce_fatal || type == intel_mce_ucr_srar) >> - result->result = MCA_NEED_RESET; >> - else if (type == intel_mce_ucr_srao) >> - result->result = MCA_NO_ACTION; >> + *result = MCER_RESET; >> + else >> + *result = MCER_CONTINUE; >> } >> >> static const struct mca_error_handler intel_mce_dhandlers[] = { >> @@ -764,7 +737,7 @@ static const struct mca_error_handler in >> >> static void intel_default_mce_uhandler( >> struct mca_binfo *binfo, >> - struct mca_handle_result *result) >> + enum mce_result *result) >> { >> uint64_t status = binfo->mib->mc_status; >> enum intel_mce_type type; >> @@ -776,10 +749,10 @@ static void intel_default_mce_uhandler( >> /* Panic if no handler for SRAR error */ >> case intel_mce_ucr_srar: >> case intel_mce_fatal: >> - result->result = MCA_NEED_RESET; >> + *result = MCER_RESET; >> break; >> default: >> - result->result = MCA_NO_ACTION; >> + *result = MCER_CONTINUE; >> break; >> } >> } >> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/x86_mca.h >> --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 13:39:40 2011 +0800 >> +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 18:15:13 2011 +0800 >> @@ -124,36 +124,6 @@ void mcabanks_free(struct mca_banks *ban >> void mcabanks_free(struct mca_banks *banks); >> extern struct mca_banks *mca_allbanks; >> >> -/* Below interfaces are defined for MCA internal processing: >> - * a. pre_handler will be called early in MCA ISR context, mainly >> for early >> - * need_reset detection for avoiding log missing. Also, it is >> used to judge >> - * impacted DOMAIN if possible. >> - * b. mca_error_handler is actually a (error_action_index, >> - * recovery_hanlder pointer) pair. The defined recovery_handler >> - * performs the actual recovery operations such as page_offline, >> cpu_offline >> - * in softIRQ context when the per_bank MCA error matching the >> corresponding >> - * mca_code index. If pre_handler can''t judge the impacted domain, >> - * recovery_handler must figure it out. >> -*/ >> - >> -/* MCA error has been recovered successfully by the recovery action*/ >> -#define MCA_RECOVERED (0x1 << 0) >> -/* MCA error impact the specified DOMAIN in owner field below */ >> -#define MCA_OWNER (0x1 << 1) >> -/* MCA error can''t be recovered and need reset */ >> -#define MCA_NEED_RESET (0x1 << 2) >> -/* MCA error did not have any action yet */ >> -#define MCA_NO_ACTION (0x1 << 3) >> - >> -struct mca_handle_result >> -{ >> - uint32_t result; >> - /* Used one result & MCA_OWNER */ >> - domid_t owner; >> - /* Used by mca_error_handler, result & MCA_RECOVRED */ >> - struct recovery_action *action; >> -}; >> - >> /*Keep bank so that we can get staus even if mib is NULL */ >> struct mca_binfo { >> int bank; >> @@ -163,8 +133,14 @@ struct mca_binfo { >> struct cpu_user_regs *regs; >> }; >> >> -extern void (*mca_prehandler)( struct cpu_user_regs *regs, >> - struct mca_handle_result *result); >> +enum mce_result >> +{ >> + MCER_NOERROR, >> + MCER_RECOVERED, >> + /* Not recoverd, but can continue */ >> + MCER_CONTINUE, >> + MCER_RESET, >> +}; >> >> struct mca_error_handler >> { >> @@ -175,7 +151,7 @@ struct mca_error_handler >> */ >> int (*owned_error)(uint64_t status); >> void (*recovery_handler)(struct mca_binfo *binfo, >> - struct mca_handle_result *result); >> + enum mce_result *result); >> }; >> >> /* Global variables */ >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2011-May-14 10:01 UTC
Re: [Xen-devel] [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
Ah no, I found it! I''ll take a look at it on Monday. -- Keir On 14/05/2011 10:45, "Keir Fraser" <keir@xen.org> wrote:> I don''t seem to have this one in my queue. You should resend. > > -- Keir > > On 13/05/2011 15:34, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > >> Jan and Keir, >> >> Any comments? >> >> Thanks >> Jinsong >> >> >> Liu, Jinsong wrote: >>> MCA: simplify mce actoin, and some update of mem err handler for >>> future SRAR extension >>> >>> This patch simplify mce_action(). Basically the 2 set of mce status >>> flags MCA_... and MCER_... are highly functional similar. This patch >>> remove the redundant middle-level flag MCA_..., and its related data >>> structure and function, so that mce_action() logic is more clean and >>> simpler. >>> This patch also update mem err handler. intel_memerr_dhandler() will >>> be commonly used by SRAO and SRAR, so for the extension of future >>> SRAR case, this patch remove the default fail flag from >>> intel_memerr_dhandler() outside to SRAO/SRAR level, since only >>> SRAO/SRAR handler itself has the knowledge of how to handle the >>> failure when mem err handler fail. >>> >>> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> >>> >>> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/mce_intel.c >>> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 13:39:40 2011 >>> +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 18:15:13 >>> 2011 +0800 @@ -172,23 +172,14 @@ static unsigned int __read_mostly >>> mce_dh static unsigned int __read_mostly mce_dhandler_num; >>> static unsigned int __read_mostly mce_uhandler_num; >>> >>> -enum mce_result >>> -{ >>> - MCER_NOERROR, >>> - MCER_RECOVERED, >>> - /* Not recoverd, but can continue */ >>> - MCER_CONTINUE, >>> - MCER_RESET, >>> -}; >>> - >>> /* Maybe called in MCE context, no lock, no printk */ >>> static enum mce_result mce_action(struct cpu_user_regs *regs, >>> mctelem_cookie_t mctc) >>> { >>> struct mc_info *local_mi; >>> - enum mce_result ret = MCER_NOERROR; >>> + enum mce_result bank_result = MCER_NOERROR; >>> + enum mce_result worst_result = MCER_NOERROR; >>> struct mcinfo_common *mic = NULL; >>> - struct mca_handle_result mca_res; >>> struct mca_binfo binfo; >>> const struct mca_error_handler *handlers = mce_dhandlers; >>> unsigned int i, handler_num = mce_dhandler_num; >>> @@ -217,7 +208,7 @@ static enum mce_result mce_action(struct >>> /* Processing bank information */ >>> x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK); >>> >>> - for ( ; ret != MCER_RESET && mic && mic->size; >>> + for ( ; bank_result != MCER_RESET && mic && mic->size; >>> mic = x86_mcinfo_next(mic) ) >>> { >>> if (mic->type != MC_TYPE_BANK) { >>> @@ -225,34 +216,20 @@ static enum mce_result mce_action(struct >>> } >>> binfo.mib = (struct mcinfo_bank*)mic; >>> binfo.bank = binfo.mib->mc_bank; >>> - memset(&mca_res, 0x0f, sizeof(mca_res)); >>> + bank_result = MCER_NOERROR; >>> for ( i = 0; i < handler_num; i++ ) { >>> if (handlers[i].owned_error(binfo.mib->mc_status)) >>> { >>> - handlers[i].recovery_handler(&binfo, &mca_res); >>> - >>> - if (mca_res.result & MCA_OWNER) >>> - binfo.mib->mc_domid = mca_res.owner; >>> - >>> - if (mca_res.result == MCA_NEED_RESET) >>> - ret = MCER_RESET; >>> - else if (mca_res.result == MCA_RECOVERED) >>> - { >>> - if (ret < MCER_RECOVERED) >>> - ret = MCER_RECOVERED; >>> - } >>> - else if (mca_res.result == MCA_NO_ACTION) >>> - { >>> - if (ret < MCER_CONTINUE) >>> - ret = MCER_CONTINUE; >>> - } >>> + handlers[i].recovery_handler(&binfo, &bank_result); >>> + if (worst_result < bank_result) >>> + worst_result = bank_result; >>> break; >>> } >>> } >>> ASSERT(i != handler_num); >>> } >>> >>> - return ret; >>> + return worst_result; >>> } >>> >>> /* >>> @@ -608,7 +585,7 @@ struct mcinfo_recovery *mci_add_pageoff_ >>> >>> static void intel_memerr_dhandler( >>> struct mca_binfo *binfo, >>> - struct mca_handle_result *result) >>> + enum mce_result *result) >>> { >>> struct mcinfo_bank *bank = binfo->mib; >>> struct mcinfo_global *global = binfo->mig; >>> @@ -618,7 +595,6 @@ static void intel_memerr_dhandler( >>> uint64_t mc_status, mc_misc; >>> >>> mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n"); >>> - result->result = MCA_NEED_RESET; >>> >>> mc_status = bank->mc_status; >>> mc_misc = bank->mc_misc; >>> @@ -626,7 +602,6 @@ static void intel_memerr_dhandler( >>> !(mc_status & MCi_STATUS_MISCV) || >>> ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) ) >>> { >>> - result->result |= MCA_NO_ACTION; >>> dprintk(XENLOG_WARNING, >>> "No physical address provided for memory error\n"); >>> return; >>> @@ -644,23 +619,19 @@ static void intel_memerr_dhandler( >>> >>> /* This is free page */ >>> if (status & PG_OFFLINE_OFFLINED) >>> - result->result = MCA_RECOVERED; >>> + *result = MCER_RECOVERED; >>> else if (status & PG_OFFLINE_PENDING) { >>> /* This page has owner */ >>> if (status & PG_OFFLINE_OWNED) { >>> - result->result |= MCA_OWNER; >>> - result->owner = status >> PG_OFFLINE_OWNER_SHIFT; >>> + bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT; >>> mce_printk(MCE_QUIET, "MCE: This error page is ownded" >>> - " by DOM %d\n", result->owner); >>> - /* Fill vMCE# injection and vMCE# MSR virtualization " >>> - * "related data */ >>> - bank->mc_domid = result->owner; >>> + " by DOM %d\n", bank->mc_domid); >>> /* XXX: Cannot handle shared pages yet >>> * (this should identify all domains and gfn mapping to >>> * the mfn in question) */ >>> - BUG_ON( result->owner == DOMID_COW ); >>> - if ( result->owner != DOMID_XEN ) { >>> - d = get_domain_by_id(result->owner); >>> + BUG_ON( bank->mc_domid == DOMID_COW ); >>> + if ( bank->mc_domid != DOMID_XEN ) { >>> + d = get_domain_by_id(bank->mc_domid); >>> ASSERT(d); >>> gfn = get_gpfn_from_mfn((bank->mc_addr) >> >>> PAGE_SHIFT); >>> >>> @@ -683,7 +654,7 @@ static void intel_memerr_dhandler( >>> global->mc_gstatus) == -1 ) >>> { >>> mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d >>> " - "failed\n", result->owner); >>> + "failed\n", bank->mc_domid); >>> goto vmce_failed; >>> } >>> >>> @@ -699,7 +670,7 @@ static void intel_memerr_dhandler( >>> * For xen, it has contained the error and finished >>> * its own recovery job. >>> */ >>> - result->result = MCA_RECOVERED; >>> + *result = MCER_RECOVERED; >>> put_domain(d); >>> >>> return; >>> @@ -718,11 +689,13 @@ static int intel_srao_check(uint64_t sta >>> >>> static void intel_srao_dhandler( >>> struct mca_binfo *binfo, >>> - struct mca_handle_result *result) >>> + enum mce_result *result) >>> { >>> uint64_t status = binfo->mib->mc_status; >>> >>> /* For unknown srao error code, no action required */ >>> + *result = MCER_CONTINUE; >>> + >>> if ( status & MCi_STATUS_VAL ) >>> { >>> switch ( status & INTEL_MCCOD_MASK ) >>> @@ -744,7 +717,7 @@ static int intel_default_check(uint64_t >>> >>> static void intel_default_mce_dhandler( >>> struct mca_binfo *binfo, >>> - struct mca_handle_result *result) >>> + enum mce_result *result) >>> { >>> uint64_t status = binfo->mib->mc_status; >>> enum intel_mce_type type; >>> @@ -752,9 +725,9 @@ static void intel_default_mce_dhandler( >>> type = intel_check_mce_type(status); >>> >>> if (type == intel_mce_fatal || type == intel_mce_ucr_srar) >>> - result->result = MCA_NEED_RESET; >>> - else if (type == intel_mce_ucr_srao) >>> - result->result = MCA_NO_ACTION; >>> + *result = MCER_RESET; >>> + else >>> + *result = MCER_CONTINUE; >>> } >>> >>> static const struct mca_error_handler intel_mce_dhandlers[] = { >>> @@ -764,7 +737,7 @@ static const struct mca_error_handler in >>> >>> static void intel_default_mce_uhandler( >>> struct mca_binfo *binfo, >>> - struct mca_handle_result *result) >>> + enum mce_result *result) >>> { >>> uint64_t status = binfo->mib->mc_status; >>> enum intel_mce_type type; >>> @@ -776,10 +749,10 @@ static void intel_default_mce_uhandler( >>> /* Panic if no handler for SRAR error */ >>> case intel_mce_ucr_srar: >>> case intel_mce_fatal: >>> - result->result = MCA_NEED_RESET; >>> + *result = MCER_RESET; >>> break; >>> default: >>> - result->result = MCA_NO_ACTION; >>> + *result = MCER_CONTINUE; >>> break; >>> } >>> } >>> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/x86_mca.h >>> --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 13:39:40 2011 +0800 >>> +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 18:15:13 2011 +0800 >>> @@ -124,36 +124,6 @@ void mcabanks_free(struct mca_banks *ban >>> void mcabanks_free(struct mca_banks *banks); >>> extern struct mca_banks *mca_allbanks; >>> >>> -/* Below interfaces are defined for MCA internal processing: >>> - * a. pre_handler will be called early in MCA ISR context, mainly >>> for early >>> - * need_reset detection for avoiding log missing. Also, it is >>> used to judge >>> - * impacted DOMAIN if possible. >>> - * b. mca_error_handler is actually a (error_action_index, >>> - * recovery_hanlder pointer) pair. The defined recovery_handler >>> - * performs the actual recovery operations such as page_offline, >>> cpu_offline >>> - * in softIRQ context when the per_bank MCA error matching the >>> corresponding >>> - * mca_code index. If pre_handler can''t judge the impacted domain, >>> - * recovery_handler must figure it out. >>> -*/ >>> - >>> -/* MCA error has been recovered successfully by the recovery action*/ >>> -#define MCA_RECOVERED (0x1 << 0) >>> -/* MCA error impact the specified DOMAIN in owner field below */ >>> -#define MCA_OWNER (0x1 << 1) >>> -/* MCA error can''t be recovered and need reset */ >>> -#define MCA_NEED_RESET (0x1 << 2) >>> -/* MCA error did not have any action yet */ >>> -#define MCA_NO_ACTION (0x1 << 3) >>> - >>> -struct mca_handle_result >>> -{ >>> - uint32_t result; >>> - /* Used one result & MCA_OWNER */ >>> - domid_t owner; >>> - /* Used by mca_error_handler, result & MCA_RECOVRED */ >>> - struct recovery_action *action; >>> -}; >>> - >>> /*Keep bank so that we can get staus even if mib is NULL */ >>> struct mca_binfo { >>> int bank; >>> @@ -163,8 +133,14 @@ struct mca_binfo { >>> struct cpu_user_regs *regs; >>> }; >>> >>> -extern void (*mca_prehandler)( struct cpu_user_regs *regs, >>> - struct mca_handle_result *result); >>> +enum mce_result >>> +{ >>> + MCER_NOERROR, >>> + MCER_RECOVERED, >>> + /* Not recoverd, but can continue */ >>> + MCER_CONTINUE, >>> + MCER_RESET, >>> +}; >>> >>> struct mca_error_handler >>> { >>> @@ -175,7 +151,7 @@ struct mca_error_handler >>> */ >>> int (*owned_error)(uint64_t status); >>> void (*recovery_handler)(struct mca_binfo *binfo, >>> - struct mca_handle_result *result); >>> + enum mce_result *result); >>> }; >>> >>> /* Global variables */ >> > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Liu, Jinsong
2011-May-15 05:08 UTC
RE: [Xen-devel] [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
OK, thanks :) Jinsong Keir Fraser wrote:> Ah no, I found it! I''ll take a look at it on Monday. > > -- Keir > > On 14/05/2011 10:45, "Keir Fraser" <keir@xen.org> wrote: > >> I don''t seem to have this one in my queue. You should resend. >> >> -- Keir >> >> On 13/05/2011 15:34, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> >>> Jan and Keir, >>> >>> Any comments? >>> >>> Thanks >>> Jinsong >>> >>> >>> Liu, Jinsong wrote: >>>> MCA: simplify mce actoin, and some update of mem err handler for >>>> future SRAR extension >>>> >>>> This patch simplify mce_action(). Basically the 2 set of mce status >>>> flags MCA_... and MCER_... are highly functional similar. This >>>> patch remove the redundant middle-level flag MCA_..., and its >>>> related data structure and function, so that mce_action() logic is >>>> more clean and simpler. This patch also update mem err handler. >>>> intel_memerr_dhandler() will be commonly used by SRAO and SRAR, so >>>> for the extension of future SRAR case, this patch remove the >>>> default fail flag from intel_memerr_dhandler() outside to >>>> SRAO/SRAR level, since only SRAO/SRAR handler itself has the >>>> knowledge of how to handle the failure when mem err handler fail. >>>> >>>> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> >>>> >>>> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/mce_intel.c >>>> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 13:39:40 2011 >>>> +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sun May 08 18:15:13 >>>> 2011 +0800 @@ -172,23 +172,14 @@ static unsigned int __read_mostly >>>> mce_dh static unsigned int __read_mostly mce_dhandler_num; >>>> static unsigned int __read_mostly mce_uhandler_num; >>>> >>>> -enum mce_result >>>> -{ >>>> - MCER_NOERROR, >>>> - MCER_RECOVERED, >>>> - /* Not recoverd, but can continue */ >>>> - MCER_CONTINUE, >>>> - MCER_RESET, >>>> -}; >>>> - >>>> /* Maybe called in MCE context, no lock, no printk */ >>>> static enum mce_result mce_action(struct cpu_user_regs *regs, >>>> mctelem_cookie_t mctc) >>>> { >>>> struct mc_info *local_mi; >>>> - enum mce_result ret = MCER_NOERROR; >>>> + enum mce_result bank_result = MCER_NOERROR; >>>> + enum mce_result worst_result = MCER_NOERROR; >>>> struct mcinfo_common *mic = NULL; >>>> - struct mca_handle_result mca_res; >>>> struct mca_binfo binfo; >>>> const struct mca_error_handler *handlers = mce_dhandlers; >>>> unsigned int i, handler_num = mce_dhandler_num; >>>> @@ -217,7 +208,7 @@ static enum mce_result mce_action(struct >>>> /* Processing bank information */ >>>> x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK); >>>> >>>> - for ( ; ret != MCER_RESET && mic && mic->size; >>>> + for ( ; bank_result != MCER_RESET && mic && mic->size; >>>> mic = x86_mcinfo_next(mic) ) >>>> { >>>> if (mic->type != MC_TYPE_BANK) { >>>> @@ -225,34 +216,20 @@ static enum mce_result mce_action(struct >>>> } binfo.mib = (struct mcinfo_bank*)mic; >>>> binfo.bank = binfo.mib->mc_bank; >>>> - memset(&mca_res, 0x0f, sizeof(mca_res)); >>>> + bank_result = MCER_NOERROR; >>>> for ( i = 0; i < handler_num; i++ ) { >>>> if (handlers[i].owned_error(binfo.mib->mc_status)) >>>> { >>>> - handlers[i].recovery_handler(&binfo, &mca_res); - >>>> - if (mca_res.result & MCA_OWNER) >>>> - binfo.mib->mc_domid = mca_res.owner; - >>>> - if (mca_res.result == MCA_NEED_RESET) >>>> - ret = MCER_RESET; >>>> - else if (mca_res.result == MCA_RECOVERED) >>>> - { >>>> - if (ret < MCER_RECOVERED) >>>> - ret = MCER_RECOVERED; >>>> - } >>>> - else if (mca_res.result == MCA_NO_ACTION) >>>> - { >>>> - if (ret < MCER_CONTINUE) >>>> - ret = MCER_CONTINUE; >>>> - } >>>> + handlers[i].recovery_handler(&binfo, >>>> &bank_result); + if (worst_result < bank_result) >>>> + worst_result = bank_result; >>>> break; >>>> } >>>> } >>>> ASSERT(i != handler_num); >>>> } >>>> >>>> - return ret; >>>> + return worst_result; >>>> } >>>> >>>> /* >>>> @@ -608,7 +585,7 @@ struct mcinfo_recovery *mci_add_pageoff_ >>>> >>>> static void intel_memerr_dhandler( >>>> struct mca_binfo *binfo, >>>> - struct mca_handle_result *result) >>>> + enum mce_result *result) >>>> { >>>> struct mcinfo_bank *bank = binfo->mib; >>>> struct mcinfo_global *global = binfo->mig; >>>> @@ -618,7 +595,6 @@ static void intel_memerr_dhandler( >>>> uint64_t mc_status, mc_misc; >>>> >>>> mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n"); >>>> - result->result = MCA_NEED_RESET; >>>> >>>> mc_status = bank->mc_status; >>>> mc_misc = bank->mc_misc; >>>> @@ -626,7 +602,6 @@ static void intel_memerr_dhandler( >>>> !(mc_status & MCi_STATUS_MISCV) || >>>> ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) ) >>>> { - result->result |= MCA_NO_ACTION; >>>> dprintk(XENLOG_WARNING, >>>> "No physical address provided for memory error\n"); >>>> return; @@ -644,23 +619,19 @@ static void intel_memerr_dhandler( >>>> >>>> /* This is free page */ >>>> if (status & PG_OFFLINE_OFFLINED) >>>> - result->result = MCA_RECOVERED; >>>> + *result = MCER_RECOVERED; >>>> else if (status & PG_OFFLINE_PENDING) { >>>> /* This page has owner */ >>>> if (status & PG_OFFLINE_OWNED) { >>>> - result->result |= MCA_OWNER; >>>> - result->owner = status >> PG_OFFLINE_OWNER_SHIFT; >>>> + bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT; >>>> mce_printk(MCE_QUIET, "MCE: This error page is ownded" >>>> - " by DOM %d\n", result->owner); >>>> - /* Fill vMCE# injection and vMCE# MSR virtualization " >>>> - * "related data */ >>>> - bank->mc_domid = result->owner; >>>> + " by DOM %d\n", bank->mc_domid); >>>> /* XXX: Cannot handle shared pages yet >>>> * (this should identify all domains and gfn mapping >>>> to >>>> * the mfn in question) */ >>>> - BUG_ON( result->owner == DOMID_COW ); >>>> - if ( result->owner != DOMID_XEN ) { >>>> - d = get_domain_by_id(result->owner); >>>> + BUG_ON( bank->mc_domid == DOMID_COW ); >>>> + if ( bank->mc_domid != DOMID_XEN ) { >>>> + d = get_domain_by_id(bank->mc_domid); >>>> ASSERT(d); gfn >>>> get_gpfn_from_mfn((bank->mc_addr) >> PAGE_SHIFT); >>>> >>>> @@ -683,7 +654,7 @@ static void intel_memerr_dhandler( >>>> global->mc_gstatus) == -1 ) >>>> { >>>> mce_printk(MCE_QUIET, "Fill vMCE# data for >>>> DOM%d " - "failed\n", result->owner); >>>> + "failed\n", bank->mc_domid); >>>> goto vmce_failed; >>>> } >>>> >>>> @@ -699,7 +670,7 @@ static void intel_memerr_dhandler( >>>> * For xen, it has contained the error and >>>> finished >>>> * its own recovery job. >>>> */ >>>> - result->result = MCA_RECOVERED; >>>> + *result = MCER_RECOVERED; >>>> put_domain(d); >>>> >>>> return; >>>> @@ -718,11 +689,13 @@ static int intel_srao_check(uint64_t sta >>>> >>>> static void intel_srao_dhandler( >>>> struct mca_binfo *binfo, >>>> - struct mca_handle_result *result) >>>> + enum mce_result *result) >>>> { >>>> uint64_t status = binfo->mib->mc_status; >>>> >>>> /* For unknown srao error code, no action required */ + >>>> *result = MCER_CONTINUE; + >>>> if ( status & MCi_STATUS_VAL ) >>>> { >>>> switch ( status & INTEL_MCCOD_MASK ) >>>> @@ -744,7 +717,7 @@ static int intel_default_check(uint64_t >>>> >>>> static void intel_default_mce_dhandler( >>>> struct mca_binfo *binfo, >>>> - struct mca_handle_result *result) >>>> + enum mce_result *result) >>>> { >>>> uint64_t status = binfo->mib->mc_status; >>>> enum intel_mce_type type; >>>> @@ -752,9 +725,9 @@ static void intel_default_mce_dhandler( >>>> type = intel_check_mce_type(status); >>>> >>>> if (type == intel_mce_fatal || type == intel_mce_ucr_srar) >>>> - result->result = MCA_NEED_RESET; >>>> - else if (type == intel_mce_ucr_srao) >>>> - result->result = MCA_NO_ACTION; >>>> + *result = MCER_RESET; >>>> + else >>>> + *result = MCER_CONTINUE; >>>> } >>>> >>>> static const struct mca_error_handler intel_mce_dhandlers[] = { >>>> @@ -764,7 +737,7 @@ static const struct mca_error_handler in >>>> >>>> static void intel_default_mce_uhandler( >>>> struct mca_binfo *binfo, >>>> - struct mca_handle_result *result) >>>> + enum mce_result *result) >>>> { >>>> uint64_t status = binfo->mib->mc_status; >>>> enum intel_mce_type type; >>>> @@ -776,10 +749,10 @@ static void intel_default_mce_uhandler( >>>> /* Panic if no handler for SRAR error */ >>>> case intel_mce_ucr_srar: >>>> case intel_mce_fatal: >>>> - result->result = MCA_NEED_RESET; >>>> + *result = MCER_RESET; >>>> break; >>>> default: >>>> - result->result = MCA_NO_ACTION; >>>> + *result = MCER_CONTINUE; >>>> break; >>>> } >>>> } >>>> diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/x86_mca.h >>>> --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 13:39:40 2011 >>>> +0800 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 18:15:13 >>>> 2011 +0800 @@ -124,36 +124,6 @@ void mcabanks_free(struct >>>> mca_banks *ban void mcabanks_free(struct mca_banks *banks); >>>> extern struct mca_banks *mca_allbanks; >>>> >>>> -/* Below interfaces are defined for MCA internal processing: >>>> - * a. pre_handler will be called early in MCA ISR context, mainly >>>> for early >>>> - * need_reset detection for avoiding log missing. Also, it is >>>> used to judge >>>> - * impacted DOMAIN if possible. >>>> - * b. mca_error_handler is actually a (error_action_index, >>>> - * recovery_hanlder pointer) pair. The defined recovery_handler >>>> - * performs the actual recovery operations such as >>>> page_offline, cpu_offline >>>> - * in softIRQ context when the per_bank MCA error matching the >>>> corresponding >>>> - * mca_code index. If pre_handler can''t judge the impacted >>>> domain, >>>> - * recovery_handler must figure it out. >>>> -*/ >>>> - >>>> -/* MCA error has been recovered successfully by the recovery >>>> action*/ >>>> -#define MCA_RECOVERED (0x1 << 0) >>>> -/* MCA error impact the specified DOMAIN in owner field below */ >>>> -#define MCA_OWNER (0x1 << 1) >>>> -/* MCA error can''t be recovered and need reset */ >>>> -#define MCA_NEED_RESET (0x1 << 2) >>>> -/* MCA error did not have any action yet */ >>>> -#define MCA_NO_ACTION (0x1 << 3) >>>> - >>>> -struct mca_handle_result >>>> -{ >>>> - uint32_t result; >>>> - /* Used one result & MCA_OWNER */ >>>> - domid_t owner; >>>> - /* Used by mca_error_handler, result & MCA_RECOVRED */ >>>> - struct recovery_action *action; >>>> -}; >>>> - >>>> /*Keep bank so that we can get staus even if mib is NULL */ >>>> struct mca_binfo { int bank; >>>> @@ -163,8 +133,14 @@ struct mca_binfo { >>>> struct cpu_user_regs *regs; >>>> }; >>>> >>>> -extern void (*mca_prehandler)( struct cpu_user_regs *regs, >>>> - struct mca_handle_result *result); +enum >>>> mce_result +{ >>>> + MCER_NOERROR, >>>> + MCER_RECOVERED, >>>> + /* Not recoverd, but can continue */ >>>> + MCER_CONTINUE, >>>> + MCER_RESET, >>>> +}; >>>> >>>> struct mca_error_handler >>>> { >>>> @@ -175,7 +151,7 @@ struct mca_error_handler >>>> */ >>>> int (*owned_error)(uint64_t status); >>>> void (*recovery_handler)(struct mca_binfo *binfo, >>>> - struct mca_handle_result *result); >>>> + enum mce_result *result); >>>> }; >>>> >>>> /* Global variables */_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel