X86 MCE: Add SRAR handler Currently Intel SDM add 2 kinds of MCE SRAR errors: 1). Data Load error, error code = 0x134 2). Instruction Fetch error, error code = 0x150 This patch add handler to these new SRAR errors. It based on existed mce infrastructure, add code to handle SRAR specific error. Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> diff -r 8d6edc3d26d2 xen/arch/x86/cpu/mcheck/mce_intel.c --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sat Aug 13 10:14:58 2011 +0100 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sat Aug 20 23:53:02 2011 +0800 @@ -37,6 +37,14 @@ static int __read_mostly nr_intel_ext_ms */ #define INTEL_SRAO_MEM_SCRUB 0xC0 ... 0xCF #define INTEL_SRAO_L3_EWB 0x17A + +/* + * Currently Intel SDM define 2 kinds of srar errors: + * 1). Data Load error, error code = 0x134 + * 2). Instruction Fetch error, error code = 0x150 + */ +#define INTEL_SRAR_DATA_LOAD 0x134 +#define INTEL_SRAR_INSTR_FETCH 0x150 /* Thermal Hanlding */ #ifdef CONFIG_X86_MCE_THERMAL @@ -255,7 +263,7 @@ static enum mce_result mce_action(struct for ( i = 0; i < handler_num; i++ ) { if (handlers[i].owned_error(binfo.mib->mc_status)) { - handlers[i].recovery_handler(&binfo, &bank_result); + handlers[i].recovery_handler(&binfo, &bank_result, regs); if (worst_result < bank_result) worst_result = bank_result; break; @@ -621,7 +629,8 @@ struct mcinfo_recovery *mci_add_pageoff_ static void intel_memerr_dhandler( struct mca_binfo *binfo, - enum mce_result *result) + enum mce_result *result, + struct cpu_user_regs *regs) { struct mcinfo_bank *bank = binfo->mib; struct mcinfo_global *global = binfo->mig; @@ -718,6 +727,32 @@ vmce_failed: } } +static int intel_srar_check(uint64_t status) +{ + return ( intel_check_mce_type(status) == intel_mce_ucr_srar ); +} + +static void intel_srar_dhandler( + struct mca_binfo *binfo, + enum mce_result *result, + struct cpu_user_regs *regs) +{ + uint64_t status = binfo->mib->mc_status; + + /* For unknown srar error code, reset system */ + *result = MCER_RESET; + + switch ( status & INTEL_MCCOD_MASK ) + { + case INTEL_SRAR_DATA_LOAD: + case INTEL_SRAR_INSTR_FETCH: + intel_memerr_dhandler(binfo, result, regs); + break; + default: + break; + } +} + static int intel_srao_check(uint64_t status) { return ( intel_check_mce_type(status) == intel_mce_ucr_srao ); @@ -725,7 +760,8 @@ static int intel_srao_check(uint64_t sta static void intel_srao_dhandler( struct mca_binfo *binfo, - enum mce_result *result) + enum mce_result *result, + struct cpu_user_regs *regs) { uint64_t status = binfo->mib->mc_status; @@ -738,7 +774,7 @@ static void intel_srao_dhandler( { case INTEL_SRAO_MEM_SCRUB: case INTEL_SRAO_L3_EWB: - intel_memerr_dhandler(binfo, result); + intel_memerr_dhandler(binfo, result, regs); break; default: break; @@ -753,14 +789,15 @@ static int intel_default_check(uint64_t static void intel_default_mce_dhandler( struct mca_binfo *binfo, - enum mce_result *result) + enum mce_result *result, + struct cpu_user_regs * regs) { uint64_t status = binfo->mib->mc_status; enum intel_mce_type type; type = intel_check_mce_type(status); - if (type == intel_mce_fatal || type == intel_mce_ucr_srar) + if (type == intel_mce_fatal) *result = MCER_RESET; else *result = MCER_CONTINUE; @@ -768,12 +805,14 @@ static void intel_default_mce_dhandler( static const struct mca_error_handler intel_mce_dhandlers[] = { {intel_srao_check, intel_srao_dhandler}, + {intel_srar_check, intel_srar_dhandler}, {intel_default_check, intel_default_mce_dhandler} }; static void intel_default_mce_uhandler( struct mca_binfo *binfo, - enum mce_result *result) + enum mce_result *result, + struct cpu_user_regs *regs) { uint64_t status = binfo->mib->mc_status; enum intel_mce_type type; @@ -782,8 +821,12 @@ static void intel_default_mce_uhandler( switch (type) { - /* Panic if no handler for SRAR error */ case intel_mce_ucr_srar: + if ( !guest_mode(regs) ) + *result = MCER_RESET; + else + *result = MCER_CONTINUE; + break; case intel_mce_fatal: *result = MCER_RESET; break; @@ -958,10 +1001,8 @@ static int intel_recoverable_scan(u64 st /* SRAR error */ else if ( ser_support && !(status & MCi_STATUS_OVER) && !(status & MCi_STATUS_PCC) && (status & MCi_STATUS_S) - && (status & MCi_STATUS_AR) ) { - mce_printk(MCE_VERBOSE, "MCE: No SRAR error defined currently.\n"); - return 0; - } + && (status & MCi_STATUS_AR) && (status & MCi_STATUS_EN) ) + return 1; /* SRAO error */ else if (ser_support && !(status & MCi_STATUS_PCC) && (status & MCi_STATUS_S) && !(status & MCi_STATUS_AR) diff -r 8d6edc3d26d2 xen/arch/x86/cpu/mcheck/x86_mca.h --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sat Aug 13 10:14:58 2011 +0100 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sat Aug 20 23:53:02 2011 +0800 @@ -151,7 +151,7 @@ struct mca_error_handler */ int (*owned_error)(uint64_t status); void (*recovery_handler)(struct mca_binfo *binfo, - enum mce_result *result); + enum mce_result *result, struct cpu_user_regs *regs); }; /* Global variables */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 29.09.11 at 17:20, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > @@ -782,8 +821,12 @@ static void intel_default_mce_uhandler( > > switch (type) > { > - /* Panic if no handler for SRAR error */ > case intel_mce_ucr_srar: > + if ( !guest_mode(regs) ) > + *result = MCER_RESET; > + else > + *result = MCER_CONTINUE; > + break; > case intel_mce_fatal: > *result = MCER_RESET; > break;Using the stack based registers for any decision in an MCE handler seems bogus to me - without knowing that the error occurred synchronously, the result is meaningless. Unfortunately I wasn''t able to spot - throughout your patch - what SRAR actually stands for, and the manual is no help in that respect either. It does state, however, that EIPV in three of four cases would be clear for these, so using the registers on stack is likely wrong here. This made me look at the current source, and there I see in mce_urgent_action() if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) return -1; which I think should say ... _EIPV and use || instead. Thoughts? Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
> -----Original Message----- > From: Jan Beulich [mailto:JBeulich@suse.com] > Sent: Thursday, September 29, 2011 11:42 PM > To: Liu, Jinsong > Cc: Keir Fraser; Jiang, Yunhong; xen-devel@lists.xensource.com > Subject: Re: [Xen-devel] [PATCH] X86 MCE: Add SRAR handler > > >>> On 29.09.11 at 17:20, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > > @@ -782,8 +821,12 @@ static void intel_default_mce_uhandler( > > > > switch (type) > > { > > - /* Panic if no handler for SRAR error */ > > case intel_mce_ucr_srar: > > + if ( !guest_mode(regs) ) > > + *result = MCER_RESET; > > + else > > + *result = MCER_CONTINUE; > > + break; > > case intel_mce_fatal: > > *result = MCER_RESET; > > break; > > Using the stack based registers for any decision in an MCE handler > seems bogus to me - without knowing that the error occurred > synchronously, the result is meaningless. Unfortunately I wasn''tI think the usage of stack in MCE handler should be case by case. For example, it''s ok to use it at data load instruction since the EIPV is valid for it. For the instruction load, not that sure and I will check it internally. But agree that we should not do this depends on the error type (like SRAR), but should depends on the specific error code.> able to spot - throughout your patch - what SRAR actually stands > for, and the manual is no help in that respect either. It does state, > however, that EIPV in three of four cases would be clear for these, > so using the registers on stack is likely wrong here. > > This made me look at the current source, and there I see in > mce_urgent_action() > > if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) > return -1; > > which I think should say ... _EIPV and use || instead. Thoughts?I think this code means, if the error happens in hypervisor mode (i.e. !guest_mode()), and RIPV indicate the RIP in stack can''t be restarted, we have to panic. Thanks --jyh> > Jan_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 30.09.11 at 04:51, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:> >> -----Original Message----- >> From: Jan Beulich [mailto:JBeulich@suse.com] >> Sent: Thursday, September 29, 2011 11:42 PM >> To: Liu, Jinsong >> Cc: Keir Fraser; Jiang, Yunhong; xen-devel@lists.xensource.com >> Subject: Re: [Xen-devel] [PATCH] X86 MCE: Add SRAR handler >> >> >>> On 29.09.11 at 17:20, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> > @@ -782,8 +821,12 @@ static void intel_default_mce_uhandler( >> > >> > switch (type) >> > { >> > - /* Panic if no handler for SRAR error */ >> > case intel_mce_ucr_srar: >> > + if ( !guest_mode(regs) ) >> > + *result = MCER_RESET; >> > + else >> > + *result = MCER_CONTINUE; >> > + break; >> > case intel_mce_fatal: >> > *result = MCER_RESET; >> > break; >> >> Using the stack based registers for any decision in an MCE handler >> seems bogus to me - without knowing that the error occurred >> synchronously, the result is meaningless. Unfortunately I wasn''t > > I think the usage of stack in MCE handler should be case by case. > For example, it''s ok to use it at data load instruction since the EIPV is > valid for it.According to the table in the manual, this is only the case on the local thread.> For the instruction load, not that sure and I will check it internally. > > But agree that we should not do this depends on the error type (like SRAR), > but should depends on the specific error code. > >> able to spot - throughout your patch - what SRAR actually stands >> for, and the manual is no help in that respect either. It does state, >> however, that EIPV in three of four cases would be clear for these, >> so using the registers on stack is likely wrong here. >> >> This made me look at the current source, and there I see in >> mce_urgent_action() >> >> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) >> return -1; >> >> which I think should say ... _EIPV and use || instead. Thoughts? > > I think this code means, if the error happens in hypervisor mode (i.e. > !guest_mode()), and RIPV indicate the RIP in stack can''t be restarted, we > have to panic.Then the guest_mode() check still lacks an extra check of EIPV, like if ( !(gstatus & MCG_STATUS_RIPV) && (!(gstatus & MCG_STATUS_EIPV) || !guest_mode(regs))) return -1; Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich wrote:>>>> On 30.09.11 at 04:51, "Jiang, Yunhong" <yunhong.jiang@intel.com> >>>> wrote: > >> >>> -----Original Message----- >>> From: Jan Beulich [mailto:JBeulich@suse.com] >>> Sent: Thursday, September 29, 2011 11:42 PM >>> To: Liu, Jinsong >>> Cc: Keir Fraser; Jiang, Yunhong; xen-devel@lists.xensource.com >>> Subject: Re: [Xen-devel] [PATCH] X86 MCE: Add SRAR handler >>> >>>>>> On 29.09.11 at 17:20, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>> wrote: >>>> @@ -782,8 +821,12 @@ static void intel_default_mce_uhandler( >>>> >>>> switch (type) >>>> { >>>> - /* Panic if no handler for SRAR error */ >>>> case intel_mce_ucr_srar: >>>> + if ( !guest_mode(regs) ) >>>> + *result = MCER_RESET; >>>> + else >>>> + *result = MCER_CONTINUE; >>>> + break; >>>> case intel_mce_fatal: >>>> *result = MCER_RESET; >>>> break; >>> >>> Using the stack based registers for any decision in an MCE handler >>> seems bogus to me - without knowing that the error occurred >>> synchronously, the result is meaningless. Unfortunately I wasn''t >> >> I think the usage of stack in MCE handler should be case by case. >> For example, it''s ok to use it at data load instruction since the >> EIPV is valid for it. > > According to the table in the manual, this is only the case on the > local thread. >OK, we can remove srar check at mce isr ''uhandler''. so at mce isr, the check if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) return -1; is a total insurance for the error which triggered at hypervisor while cannot restart from RIP.>> For the instruction load, not that sure and I will check it >> internally. >> >> But agree that we should not do this depends on the error type (like >> SRAR), but should depends on the specific error code. >> >>> able to spot - throughout your patch - what SRAR actually stands >>> for, and the manual is no help in that respect either. It does >>> state, however, that EIPV in three of four cases would be clear for >>> these, so using the registers on stack is likely wrong here. >>> >>> This made me look at the current source, and there I see in >>> mce_urgent_action() >>> >>> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) >>> return -1; >>> >>> which I think should say ... _EIPV and use || instead. Thoughts? >> >> I think this code means, if the error happens in hypervisor mode >> (i.e. !guest_mode()), and RIPV indicate the RIP in stack can''t be >> restarted, we have to panic. > > Then the guest_mode() check still lacks an extra check of EIPV, like > > if ( !(gstatus & MCG_STATUS_RIPV) && > (!(gstatus & MCG_STATUS_EIPV) || !guest_mode(regs))) > return -1; >That would be overkilled. Considering instruction fetch error occur at guest context, hypervisor deliver to guest to handle the error is perfer, not panic all system. Thanks, Jinsong _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 30.09.11 at 09:44, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Jan Beulich wrote: >>>>> On 30.09.11 at 04:51, "Jiang, Yunhong" <yunhong.jiang@intel.com> >>>>> wrote: >>>> This made me look at the current source, and there I see in >>>> mce_urgent_action() >>>> >>>> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) >>>> return -1; >>>> >>>> which I think should say ... _EIPV and use || instead. Thoughts? >>> >>> I think this code means, if the error happens in hypervisor mode >>> (i.e. !guest_mode()), and RIPV indicate the RIP in stack can''t be >>> restarted, we have to panic. >> >> Then the guest_mode() check still lacks an extra check of EIPV, like >> >> if ( !(gstatus & MCG_STATUS_RIPV) && >> (!(gstatus & MCG_STATUS_EIPV) || !guest_mode(regs))) >> return -1; >> > > That would be overkilled. > Considering instruction fetch error occur at guest context, hypervisor > deliver to guest to handle the error is perfer, not panic all system.Even if it was hypervisor code that got prefetched while still executing guest code (which ought to be possible at least across a syscall/sysenter instruction)? Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich wrote:>>>> On 30.09.11 at 09:44, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> Jan Beulich wrote: >>>>>> On 30.09.11 at 04:51, "Jiang, Yunhong" <yunhong.jiang@intel.com> >>>>>> wrote: >>>>> This made me look at the current source, and there I see in >>>>> mce_urgent_action() >>>>> >>>>> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) >>>>> return -1; >>>>> >>>>> which I think should say ... _EIPV and use || instead. Thoughts? >>>> >>>> I think this code means, if the error happens in hypervisor mode >>>> (i.e. !guest_mode()), and RIPV indicate the RIP in stack can''t be >>>> restarted, we have to panic. >>> >>> Then the guest_mode() check still lacks an extra check of EIPV, like >>> >>> if ( !(gstatus & MCG_STATUS_RIPV) && >>> (!(gstatus & MCG_STATUS_EIPV) || !guest_mode(regs))) >>> return -1; >>> >> >> That would be overkilled. >> Considering instruction fetch error occur at guest context, >> hypervisor deliver to guest to handle the error is perfer, not panic >> all system. > > Even if it was hypervisor code that got prefetched while still > executing guest code (which ought to be possible at least > across a syscall/sysenter instruction)? >Executing guest code will not satisfy the check if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) return -1; so it would not panic system. Thanks, Jinsong _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 30.09.11 at 10:21, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Jan Beulich wrote: >>>>> On 30.09.11 at 09:44, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >>> Jan Beulich wrote: >>>>>>> On 30.09.11 at 04:51, "Jiang, Yunhong" <yunhong.jiang@intel.com> >>>>>>> wrote: >>>>>> This made me look at the current source, and there I see in >>>>>> mce_urgent_action() >>>>>> >>>>>> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) >>>>>> return -1; >>>>>> >>>>>> which I think should say ... _EIPV and use || instead. Thoughts? >>>>> >>>>> I think this code means, if the error happens in hypervisor mode >>>>> (i.e. !guest_mode()), and RIPV indicate the RIP in stack can''t be >>>>> restarted, we have to panic. >>>> >>>> Then the guest_mode() check still lacks an extra check of EIPV, like >>>> >>>> if ( !(gstatus & MCG_STATUS_RIPV) && >>>> (!(gstatus & MCG_STATUS_EIPV) || !guest_mode(regs))) >>>> return -1; >>>> >>> >>> That would be overkilled. >>> Considering instruction fetch error occur at guest context, >>> hypervisor deliver to guest to handle the error is perfer, not panic >>> all system. >> >> Even if it was hypervisor code that got prefetched while still >> executing guest code (which ought to be possible at least >> across a syscall/sysenter instruction)? >> > > Executing guest code will not satisfy the check > if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) > return -1; > so it would not panic system.Exactly. But it should when the prefetch was to hypervisor code. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich wrote:>>>> On 30.09.11 at 10:21, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> Jan Beulich wrote: >>>>>> On 30.09.11 at 09:44, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>> wrote: >>>> Jan Beulich wrote: >>>>>>>> On 30.09.11 at 04:51, "Jiang, Yunhong" >>>>>>>> <yunhong.jiang@intel.com> wrote: >>>>>>> This made me look at the current source, and there I see in >>>>>>> mce_urgent_action() >>>>>>> >>>>>>> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) >>>>>>> return -1; >>>>>>> >>>>>>> which I think should say ... _EIPV and use || instead. Thoughts? >>>>>> >>>>>> I think this code means, if the error happens in hypervisor mode >>>>>> (i.e. !guest_mode()), and RIPV indicate the RIP in stack can''t be >>>>>> restarted, we have to panic. >>>>> >>>>> Then the guest_mode() check still lacks an extra check of EIPV, >>>>> like >>>>> >>>>> if ( !(gstatus & MCG_STATUS_RIPV) && >>>>> (!(gstatus & MCG_STATUS_EIPV) || !guest_mode(regs))) >>>>> return -1; >>>>> >>>> >>>> That would be overkilled. >>>> Considering instruction fetch error occur at guest context, >>>> hypervisor deliver to guest to handle the error is perfer, not >>>> panic all system. >>> >>> Even if it was hypervisor code that got prefetched while still >>> executing guest code (which ought to be possible at least >>> across a syscall/sysenter instruction)? >>> >> >> Executing guest code will not satisfy the check >> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) return -1; >> so it would not panic system. > > Exactly. But it should when the prefetch was to hypervisor code. >Wouldn''t processor refresh instruction prefetch queue under such case? Thanks, Jinsong _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 30.09.11 at 11:42, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Jan Beulich wrote: >>>>> On 30.09.11 at 10:21, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >>> Executing guest code will not satisfy the check >>> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) return -1; >>> so it would not panic system. >> >> Exactly. But it should when the prefetch was to hypervisor code. >> > > Wouldn''t processor refresh instruction prefetch queue under such case?That''s a question that you are better positioned to answer than me. But the SRAR errors being a sub-category of uncorrected errors I would think it can''t be that simple. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich wrote:>>>> On 30.09.11 at 11:42, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> Jan Beulich wrote: >>>>>> On 30.09.11 at 10:21, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>> wrote: >>>> Executing guest code will not satisfy the check >>>> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) return -1; >>>> so it would not panic system. >>> >>> Exactly. But it should when the prefetch was to hypervisor code. >>> >> >> Wouldn''t processor refresh instruction prefetch queue under such >> case? > > That''s a question that you are better positioned to answer than me. > But the SRAR errors being a sub-category of uncorrected errors I > would think it can''t be that simple. >Hmm, I will check this question internally first. BTW, we would have 7 days holiday (1/10 ~ 7/10), so email reply maybe some slow. Thanks, Jinsong _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Liu, Jinsong wrote:> Jan Beulich wrote: >>>>> On 30.09.11 at 11:42, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>> wrote: >>> Jan Beulich wrote: >>>>>>> On 30.09.11 at 10:21, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>>> wrote: >>>>> Executing guest code will not satisfy the check >>>>> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) return >>>>> -1; so it would not panic system. >>>> >>>> Exactly. But it should when the prefetch was to hypervisor code. >>>> >>> >>> Wouldn''t processor refresh instruction prefetch queue under such >>> case? >> >> That''s a question that you are better positioned to answer than me. >> But the SRAR errors being a sub-category of uncorrected errors I >> would think it can''t be that simple. >> > > Hmm, I will check this question internally first. > BTW, we would have 7 days holiday (1/10 ~ 7/10), so email reply maybe > some slow. > > Thanks, > JinsongAh, just think our talking context: the prefetched instruction would have been flushed since we now at mce exception context. So I think no need to overkill here, just let guest handle it --> who own, who take. Thanks, Jinsong _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Liu, Jinsong wrote:> Liu, Jinsong wrote: >> Jan Beulich wrote: >>>>>> On 30.09.11 at 11:42, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>> wrote: >>>> Jan Beulich wrote: >>>>>>>> On 30.09.11 at 10:21, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>>>> wrote: >>>>>> Executing guest code will not satisfy the check >>>>>> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) return >>>>>> -1; so it would not panic system. >>>>> >>>>> Exactly. But it should when the prefetch was to hypervisor code. >>>>> >>>> >>>> Wouldn''t processor refresh instruction prefetch queue under such >>>> case? >>> >>> That''s a question that you are better positioned to answer than me. >>> But the SRAR errors being a sub-category of uncorrected errors I >>> would think it can''t be that simple. >>> >> >> Hmm, I will check this question internally first. >> BTW, we would have 7 days holiday (1/10 ~ 7/10), so email reply >> maybe some slow. >> >> Thanks, >> Jinsong > > Ah, just think our talking context: the prefetched instruction would > have been flushed since we now at mce exception context. So I think > no need to overkill here, just let guest handle it --> who own, who > take. > > Thanks, > JinsongJan, Do you think following is OK? if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) return -1; Thanks, Jinsong _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 06.10.11 at 20:40, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Liu, Jinsong wrote: >> Liu, Jinsong wrote: >>> Jan Beulich wrote: >>>>>>> On 30.09.11 at 11:42, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>>> wrote: >>>>> Jan Beulich wrote: >>>>>>>>> On 30.09.11 at 10:21, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>>>>> wrote: >>>>>>> Executing guest code will not satisfy the check >>>>>>> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) return >>>>>>> -1; so it would not panic system. >>>>>> >>>>>> Exactly. But it should when the prefetch was to hypervisor code. >>>>>> >>>>> >>>>> Wouldn''t processor refresh instruction prefetch queue under such >>>>> case? >>>> >>>> That''s a question that you are better positioned to answer than me. >>>> But the SRAR errors being a sub-category of uncorrected errors I >>>> would think it can''t be that simple. >>>> >>> >>> Hmm, I will check this question internally first. >>> BTW, we would have 7 days holiday (1/10 ~ 7/10), so email reply >>> maybe some slow. >>> >>> Thanks, >>> Jinsong >> >> Ah, just think our talking context: the prefetched instruction would >> have been flushed since we now at mce exception context. So I think >> no need to overkill here, just let guest handle it --> who own, who >> take. >> >> Thanks, >> Jinsong > > Jan, > > Do you think following is OK? > > if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) > return -1;That''s what we have currently, and as I said earlier I don''t think using the result of guest_mode() for any decision is valid when the EIPV bit is clear. Jan> Thanks, > Jinsong_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
> -----Original Message----- > From: Jan Beulich [mailto:JBeulich@suse.com] > Sent: Friday, September 30, 2011 3:25 PM > To: Liu, Jinsong; Jiang, Yunhong > Cc: keir.xen@gmail.com; xen-devel@lists.xensource.com > Subject: RE: [Xen-devel] [PATCH] X86 MCE: Add SRAR handler > > >>> On 30.09.11 at 04:51, "Jiang, Yunhong" <yunhong.jiang@intel.com>wrote:> > > > >> -----Original Message----- > >> From: Jan Beulich [mailto:JBeulich@suse.com] > >> Sent: Thursday, September 29, 2011 11:42 PM > >> To: Liu, Jinsong > >> Cc: Keir Fraser; Jiang, Yunhong; xen-devel@lists.xensource.com > >> Subject: Re: [Xen-devel] [PATCH] X86 MCE: Add SRAR handler > >> > >> >>> On 29.09.11 at 17:20, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > >> > @@ -782,8 +821,12 @@ static void intel_default_mce_uhandler( > >> > > >> > switch (type) > >> > { > >> > - /* Panic if no handler for SRAR error */ > >> > case intel_mce_ucr_srar: > >> > + if ( !guest_mode(regs) ) > >> > + *result = MCER_RESET; > >> > + else > >> > + *result = MCER_CONTINUE; > >> > + break; > >> > case intel_mce_fatal: > >> > *result = MCER_RESET; > >> > break; > >> > >> Using the stack based registers for any decision in an MCE handler > >> seems bogus to me - without knowing that the error occurred > >> synchronously, the result is meaningless. Unfortunately I wasn''t > > > > I think the usage of stack in MCE handler should be case by case. > > For example, it''s ok to use it at data load instruction since the EIPVis> > valid for it. > > According to the table in the manual, this is only the case on the local > thread.Sorry for long delay response because of china holiday . That''s about the so-called "affected processors". For the un-affected processor, it should not be impact. But current patch does not handle the share bank situation correctly and need some changes. According to the SDM, only affected processor should clear the bank, un-affected process should not clear it, current patch does not handle such situation.> > > For the instruction load, not that sure and I will check it internally. > > > > But agree that we should not do this depends on the error type (likeSRAR),> > but should depends on the specific error code. > > > >> able to spot - throughout your patch - what SRAR actually stands > >> for, and the manual is no help in that respect either. It does state, > >> however, that EIPV in three of four cases would be clear for these, > >> so using the registers on stack is likely wrong here. > >> > >> This made me look at the current source, and there I see in > >> mce_urgent_action() > >> > >> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) > >> return -1; > >> > >> which I think should say ... _EIPV and use || instead. Thoughts? > > > > I think this code means, if the error happens in hypervisor mode (i.e. > > !guest_mode()), and RIPV indicate the RIP in stack can''t be restarted,we> > have to panic. > > Then the guest_mode() check still lacks an extra check of EIPV, like > > if ( !(gstatus & MCG_STATUS_RIPV) && > (!(gstatus & MCG_STATUS_EIPV) || !guest_mode(regs))) > return -1; >The RIPV is not related to the EIPV. RIPV means the context saved in the stack can''t be restarted anymore. According to the SDM, RIPV means "execution can be restarted reliably at the instruction pointed to by the instruction pointer pushed on the stack". It''s not about error happened synchronously or asynchronously. The point is, if the program is running in hypervisor context, and RIPV tells us that the program can''t be restarted, we can''t do anything but panic, because we can''t switch context while we are in xen. So this code have nothing to do with EIPV. If Xen is pre-emptible and can be switched to a new context, possibly sometimes (like in shadow handling) we can switch to a new context and don''t need such detection, but at least currently we don''t want to involve so complex handling. The boundary condition of syscall/sysret is something interesting. If a Instruction Fetch Error happens in hypervisor''s syscall entry point, it will cause system panic because in the end, we should find it''s a xen page broken by checking the MCi_ADDR MSR. And if guest''s sysret rip is broken, it will overkill xen hypervisor for a guest error. But possibly the handling result is acceptable considering the low possibility and there is no potential data pollution. Thanks --jyh> Jan_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
> -----Original Message----- > From: Jan Beulich [mailto:JBeulich@suse.com] > Sent: Friday, October 07, 2011 3:43 PM > To: Liu, Jinsong; Jiang, Yunhong > Cc: keir.xen@gmail.com; xen-devel@lists.xensource.com > Subject: RE: [Xen-devel] [PATCH] X86 MCE: Add SRAR handler > > >>> On 06.10.11 at 20:40, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > > Liu, Jinsong wrote: > >> Liu, Jinsong wrote: > >>> Jan Beulich wrote: > >>>>>>> On 30.09.11 at 11:42, "Liu, Jinsong" <jinsong.liu@intel.com> > >>>>>>> wrote: > >>>>> Jan Beulich wrote: > >>>>>>>>> On 30.09.11 at 10:21, "Liu, Jinsong" <jinsong.liu@intel.com> > >>>>>>>>> wrote: > >>>>>>> Executing guest code will not satisfy the check > >>>>>>> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) > return > >>>>>>> -1; so it would not panic system. > >>>>>> > >>>>>> Exactly. But it should when the prefetch was to hypervisor code. > >>>>>> > >>>>> > >>>>> Wouldn''t processor refresh instruction prefetch queue under such > >>>>> case? > >>>> > >>>> That''s a question that you are better positioned to answer than me. > >>>> But the SRAR errors being a sub-category of uncorrected errors I > >>>> would think it can''t be that simple. > >>>> > >>> > >>> Hmm, I will check this question internally first. > >>> BTW, we would have 7 days holiday (1/10 ~ 7/10), so email reply > >>> maybe some slow. > >>> > >>> Thanks, > >>> Jinsong > >> > >> Ah, just think our talking context: the prefetched instruction would > >> have been flushed since we now at mce exception context. So I think > >> no need to overkill here, just let guest handle it --> who own, who > >> take. > >> > >> Thanks, > >> Jinsong > > > > Jan, > > > > Do you think following is OK? > > > > if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) > > return -1; > > That''s what we have currently, and as I said earlier I don''t think using > the result of guest_mode() for any decision is valid when the EIPV bit > is clear.guest_mode() should be ok to be used together with RIPV, since RIPV is talking about the RIP in the stack. Thanks --jyh> > Jan > > > Thanks, > > Jinsong > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 08.10.11 at 10:29, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:> >> -----Original Message----- >> From: Jan Beulich [mailto:JBeulich@suse.com] >> Sent: Friday, September 30, 2011 3:25 PM >> To: Liu, Jinsong; Jiang, Yunhong >> Cc: keir.xen@gmail.com; xen-devel@lists.xensource.com >> Subject: RE: [Xen-devel] [PATCH] X86 MCE: Add SRAR handler >> >> >>> On 30.09.11 at 04:51, "Jiang, Yunhong" <yunhong.jiang@intel.com> > wrote: >> >> > >> >> -----Original Message----- >> >> From: Jan Beulich [mailto:JBeulich@suse.com] >> >> This made me look at the current source, and there I see in >> >> mce_urgent_action() >> >> >> >> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) >> >> return -1; >> >> >> >> which I think should say ... _EIPV and use || instead. Thoughts? >> > >> > I think this code means, if the error happens in hypervisor mode (i.e. >> > !guest_mode()), and RIPV indicate the RIP in stack can''t be restarted, > we >> > have to panic. >> >> Then the guest_mode() check still lacks an extra check of EIPV, like >> >> if ( !(gstatus & MCG_STATUS_RIPV) && >> (!(gstatus & MCG_STATUS_EIPV) || !guest_mode(regs))) >> return -1; >> > > The RIPV is not related to the EIPV. RIPV means the context saved in the > stack can''t be restarted anymore. According to the SDM, RIPV means > "execution can be restarted reliably at the instruction pointed to by the > instruction pointer pushed on the stack". It''s not about error happened > synchronously or asynchronously. The point is, if the program is running in > hypervisor context, and RIPV tells us that the program can''t be restarted, > we can''t do anything but panic, because we can''t switch context while we are > in xen. So this code have nothing to do with EIPV.I continue to disagree (including the statement in your other response): RIPV only tells us whether we can resume, not in which context the error occurred. EIPV tells us whether, by looking at the saved registers, we can determine the context that the error occurred in. Since with !RIPV we have to determine in what context the error occurred in order to decide whether to panic or just kill a guest, we can''t ignore EIPV (and if it''s not set we have to assume the worst case, since even if the registers indicate guest mode the error may have occurred in hypervisor context or accessing hypervisor structures [consider e.g. a data load error during a GDT access]). Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich wrote:>>>> On 08.10.11 at 10:29, "Jiang, Yunhong" <yunhong.jiang@intel.com> >>>> wrote: > >> >>> -----Original Message----- >>> From: Jan Beulich [mailto:JBeulich@suse.com] >>> Sent: Friday, September 30, 2011 3:25 PM >>> To: Liu, Jinsong; Jiang, Yunhong >>> Cc: keir.xen@gmail.com; xen-devel@lists.xensource.com >>> Subject: RE: [Xen-devel] [PATCH] X86 MCE: Add SRAR handler >>> >>>>>> On 30.09.11 at 04:51, "Jiang, Yunhong" <yunhong.jiang@intel.com> >>>>>> wrote: >>> >>>> >>>>> -----Original Message----- >>>>> From: Jan Beulich [mailto:JBeulich@suse.com] >>>>> This made me look at the current source, and there I see in >>>>> mce_urgent_action() >>>>> >>>>> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) >>>>> return -1; >>>>> >>>>> which I think should say ... _EIPV and use || instead. Thoughts? >>>> >>>> I think this code means, if the error happens in hypervisor mode >>>> (i.e. !guest_mode()), and RIPV indicate the RIP in stack can''t be >>>> restarted, we have to panic. >>> >>> Then the guest_mode() check still lacks an extra check of EIPV, like >>> >>> if ( !(gstatus & MCG_STATUS_RIPV) && >>> (!(gstatus & MCG_STATUS_EIPV) || !guest_mode(regs))) >>> return -1; >>> >> >> The RIPV is not related to the EIPV. RIPV means the context saved in >> the stack can''t be restarted anymore. According to the SDM, RIPV >> means "execution can be restarted reliably at the instruction >> pointed to by the instruction pointer pushed on the stack". It''s not >> about error happened synchronously or asynchronously. The point is, >> if the program is running in hypervisor context, and RIPV tells us >> that the program can''t be restarted, we can''t do anything but panic, >> because we can''t switch context while we are in xen. So this code >> have nothing to do with EIPV. > > I continue to disagree (including the statement in your other > response): RIPV only tells us whether we can resume, not in which > context the error occurred. EIPV tells us whether, by looking at the > saved registers, we can determine the context that the error occurred > in. Since with !RIPV > we have to determine in what context the error occurred in order to > decide whether to panic or just kill a guest, we can''t ignore EIPV > (and if it''s not set we have to assume the worst case, since even if > the registers indicate guest mode the error may have occurred in > hypervisor context or accessing hypervisor structures [consider e.g. a > data load error during a GDT access]). > > JanYes, I agree EIPV=0 may indicate async error, but I think your solution *overkilled* most cases (i.e. the real guest instruction fetch error). Our idea is, * xen mce would flush prefetched instruction so we can delay handle it until if real need; * a h/w error will not disappear, but if it was not being *consumed*, it''s OK for system keep going (like SRAO error which do not need s/w handle immediately); Suppose an async instruction fetch error (RIVP=EIVP=0), triggered at guest context but instruction prefetch hypervisor context. The scenario is, * at xen mce, the prefetched instruction has been flushed. xen mce handler needn''t panic, instead it mark the page as broken page, then trigger vmce to guest; * guest may kill app, kernel thread, guest itself, or whatever; The error is still an error, w/ 2 possibilities in the future: 1. it may not be consumed as an SRAR error, system keep going, h/w mechanism may detect a SRAO error (i.e. memroy scrub) at some time point and handled then; 2. it may be consumed at some time point and a SRAR error triggered again. At this time, 1). if srar occurred at hypervisor context, xen will panic. or, 2). if srar occurred at guest context, xen kill the guest as a malicious one (as what the 2nd patch do), and move the page to broken page list; Considering the rare possibility of the above case, I think it''s acceptable to handle it in this way. Thoughts? Thanks, Jinsong _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 11.10.11 at 10:15, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Jan Beulich wrote: >>>>> On 08.10.11 at 10:29, "Jiang, Yunhong" <yunhong.jiang@intel.com> >>>>> wrote: >> >>> >>>> -----Original Message----- >>>> From: Jan Beulich [mailto:JBeulich@suse.com] >>>> Sent: Friday, September 30, 2011 3:25 PM >>>> To: Liu, Jinsong; Jiang, Yunhong >>>> Cc: keir.xen@gmail.com; xen-devel@lists.xensource.com >>>> Subject: RE: [Xen-devel] [PATCH] X86 MCE: Add SRAR handler >>>> >>>>>>> On 30.09.11 at 04:51, "Jiang, Yunhong" <yunhong.jiang@intel.com> >>>>>>> wrote: >>>> >>>>> >>>>>> -----Original Message----- >>>>>> From: Jan Beulich [mailto:JBeulich@suse.com] >>>>>> This made me look at the current source, and there I see in >>>>>> mce_urgent_action() >>>>>> >>>>>> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) >>>>>> return -1; >>>>>> >>>>>> which I think should say ... _EIPV and use || instead. Thoughts? >>>>> >>>>> I think this code means, if the error happens in hypervisor mode >>>>> (i.e. !guest_mode()), and RIPV indicate the RIP in stack can''t be >>>>> restarted, we have to panic. >>>> >>>> Then the guest_mode() check still lacks an extra check of EIPV, like >>>> >>>> if ( !(gstatus & MCG_STATUS_RIPV) && >>>> (!(gstatus & MCG_STATUS_EIPV) || !guest_mode(regs))) >>>> return -1; >>>> >>> >>> The RIPV is not related to the EIPV. RIPV means the context saved in >>> the stack can''t be restarted anymore. According to the SDM, RIPV >>> means "execution can be restarted reliably at the instruction >>> pointed to by the instruction pointer pushed on the stack". It''s not >>> about error happened synchronously or asynchronously. The point is, >>> if the program is running in hypervisor context, and RIPV tells us >>> that the program can''t be restarted, we can''t do anything but panic, >>> because we can''t switch context while we are in xen. So this code >>> have nothing to do with EIPV. >> >> I continue to disagree (including the statement in your other >> response): RIPV only tells us whether we can resume, not in which >> context the error occurred. EIPV tells us whether, by looking at the >> saved registers, we can determine the context that the error occurred >> in. Since with !RIPV >> we have to determine in what context the error occurred in order to >> decide whether to panic or just kill a guest, we can''t ignore EIPV >> (and if it''s not set we have to assume the worst case, since even if >> the registers indicate guest mode the error may have occurred in >> hypervisor context or accessing hypervisor structures [consider e.g. a >> data load error during a GDT access]). >> >> Jan > > Yes, I agree EIPV=0 may indicate async error, but I think your solution > *overkilled* most cases (i.e. the real guest instruction fetch error). > > Our idea is, > * xen mce would flush prefetched instruction so we can delay handle it > until if real need; > * a h/w error will not disappear, but if it was not being *consumed*, it''s > OK for system keep going (like SRAO error which do not need s/w handle > immediately); > > Suppose an async instruction fetch error (RIVP=EIVP=0), triggered at guest > context but instruction prefetch hypervisor context. The scenario is, > * at xen mce, the prefetched instruction has been flushed. xen mce handler > needn''t panic, instead it mark the page as broken page, then trigger vmce to > guest;If the prefetch was from Xen space (only in guest context), delivering a vMCE to the guest is pointless (and perhaps confusing to the guest).> * guest may kill app, kernel thread, guest itself, or whatever; > > The error is still an error, w/ 2 possibilities in the future: > 1. it may not be consumed as an SRAR error, system keep going, h/w > mechanism may detect a SRAO error (i.e. memroy scrub) at some time point and > handled then; > 2. it may be consumed at some time point and a SRAR error triggered again. > At this time, > 1). if srar occurred at hypervisor context, xen will panic. or, > 2). if srar occurred at guest context, xen kill the guest as a malicious > one (as what the 2nd patch do), and move the page to broken page list; > > Considering the rare possibility of the above case, I think it''s acceptable > to handle it in this way. > Thoughts?You''re only discussing instruction fetches (which can be discarded), but you''re not covering the other example I gave (GDT access from guest context - just like this is a ring-0 operations from the paging unit''s pov, this ought to be an out-of-context operation from MCE''s perspective). Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich wrote:>>>> On 11.10.11 at 10:15, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> Jan Beulich wrote: >>>>>> On 08.10.11 at 10:29, "Jiang, Yunhong" <yunhong.jiang@intel.com> >>>>>> wrote: >>> >>>> >>>>> -----Original Message----- >>>>> From: Jan Beulich [mailto:JBeulich@suse.com] >>>>> Sent: Friday, September 30, 2011 3:25 PM >>>>> To: Liu, Jinsong; Jiang, Yunhong >>>>> Cc: keir.xen@gmail.com; xen-devel@lists.xensource.com >>>>> Subject: RE: [Xen-devel] [PATCH] X86 MCE: Add SRAR handler >>>>> >>>>>>>> On 30.09.11 at 04:51, "Jiang, Yunhong" >>>>>>>> <yunhong.jiang@intel.com> wrote: >>>>> >>>>>> >>>>>>> -----Original Message----- >>>>>>> From: Jan Beulich [mailto:JBeulich@suse.com] >>>>>>> This made me look at the current source, and there I see in >>>>>>> mce_urgent_action() >>>>>>> >>>>>>> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) >>>>>>> return -1; >>>>>>> >>>>>>> which I think should say ... _EIPV and use || instead. Thoughts? >>>>>> >>>>>> I think this code means, if the error happens in hypervisor mode >>>>>> (i.e. !guest_mode()), and RIPV indicate the RIP in stack can''t be >>>>>> restarted, we have to panic. >>>>> >>>>> Then the guest_mode() check still lacks an extra check of EIPV, >>>>> like >>>>> >>>>> if ( !(gstatus & MCG_STATUS_RIPV) && >>>>> (!(gstatus & MCG_STATUS_EIPV) || !guest_mode(regs))) >>>>> return -1; >>>>> >>>> >>>> The RIPV is not related to the EIPV. RIPV means the context saved >>>> in the stack can''t be restarted anymore. According to the SDM, RIPV >>>> means "execution can be restarted reliably at the instruction >>>> pointed to by the instruction pointer pushed on the stack". It''s >>>> not about error happened synchronously or asynchronously. The >>>> point is, if the program is running in hypervisor context, and >>>> RIPV tells us that the program can''t be restarted, we can''t do >>>> anything but panic, because we can''t switch context while we are >>>> in xen. So this code have nothing to do with EIPV. >>> >>> I continue to disagree (including the statement in your other >>> response): RIPV only tells us whether we can resume, not in which >>> context the error occurred. EIPV tells us whether, by looking at the >>> saved registers, we can determine the context that the error >>> occurred in. Since with !RIPV we have to determine in what context >>> the error occurred in order to decide whether to panic or just kill >>> a guest, we can''t ignore EIPV (and if it''s not set we have to >>> assume the worst case, since even if the registers indicate guest >>> mode the error may have occurred in hypervisor context or accessing >>> hypervisor structures [consider e.g. a data load error during a GDT >>> access]). >>> >>> Jan >> >> Yes, I agree EIPV=0 may indicate async error, but I think your >> solution *overkilled* most cases (i.e. the real guest instruction >> fetch error). >> >> Our idea is, >> * xen mce would flush prefetched instruction so we can delay >> handle it until if real need; >> * a h/w error will not disappear, but if it was not being >> *consumed*, it''s OK for system keep going (like SRAO error which do >> not need s/w handle immediately); >> >> Suppose an async instruction fetch error (RIVP=EIVP=0), triggered at >> guest context but instruction prefetch hypervisor context. The >> scenario is, * at xen mce, the prefetched instruction has been >> flushed. xen mce handler needn''t panic, instead it mark the page as >> broken page, then trigger vmce to guest; > > If the prefetch was from Xen space (only in guest context), > delivering a vMCE to the guest is pointless (and perhaps confusing to > the guest). >Yes, exactly. how about delay handle it as: * at mce isr if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) xen panic; * at mce softirq if ( (srar error) && (EIPV ==0) && (broken page owned by hypervisor) ) xen panic;>> * guest may kill app, kernel thread, guest itself, or whatever; >> >> The error is still an error, w/ 2 possibilities in the future: >> 1. it may not be consumed as an SRAR error, system keep going, h/w >> mechanism may detect a SRAO error (i.e. memroy scrub) at some time >> point and handled then; >> 2. it may be consumed at some time point and a SRAR error >> triggered again. At this time, 1). if srar occurred at hypervisor >> context, xen will panic. or, 2). if srar occurred at guest >> context, xen kill the guest as a malicious one (as what the 2nd >> patch do), and move the page to broken page list; >> >> Considering the rare possibility of the above case, I think it''s >> acceptable to handle it in this way. Thoughts? > > You''re only discussing instruction fetches (which can be discarded), > but you''re not covering the other example I gave (GDT access from > guest context - just like this is a ring-0 operations from the paging > unit''s pov, this ought to be an out-of-context operation from MCE''s > perspective). > > JanThat would be data load error (EIPV=1), a sync error. Thanks, Jinsong _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 11.10.11 at 11:51, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Jan Beulich wrote: >> If the prefetch was from Xen space (only in guest context), >> delivering a vMCE to the guest is pointless (and perhaps confusing to >> the guest). >> > > Yes, exactly. how about delay handle it as: > * at mce isr > if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) > xen panic; > * at mce softirq > if ( (srar error) && (EIPV ==0) && (broken page owned by hypervisor) ) > xen panic;Possible, but I''m not convinced.>>> * guest may kill app, kernel thread, guest itself, or whatever; >>> >>> The error is still an error, w/ 2 possibilities in the future: >>> 1. it may not be consumed as an SRAR error, system keep going, h/w >>> mechanism may detect a SRAO error (i.e. memroy scrub) at some time >>> point and handled then; >>> 2. it may be consumed at some time point and a SRAR error >>> triggered again. At this time, 1). if srar occurred at hypervisor >>> context, xen will panic. or, 2). if srar occurred at guest >>> context, xen kill the guest as a malicious one (as what the 2nd >>> patch do), and move the page to broken page list; >>> >>> Considering the rare possibility of the above case, I think it''s >>> acceptable to handle it in this way. Thoughts? >> >> You''re only discussing instruction fetches (which can be discarded), >> but you''re not covering the other example I gave (GDT access from >> guest context - just like this is a ring-0 operations from the paging >> unit''s pov, this ought to be an out-of-context operation from MCE''s >> perspective). > > That would be data load error (EIPV=1), a sync error.If indeed implemented that way in hardware, that would make the handling ambiguous: A GDT access must not (unconditionally) be attributed to the (pv) guest, as it is not a problem the guest can (necessarily) deal with (considering the split page ownership of what constitutes the GDT under Xen, the guest should only be accountable for the non-reserved part of the GDT, the rest should be attributed back to Xen). The same would go for (perhaps speculative) page table walks. Furthermore, data prefetching is possible too - how would a problem there get reported? Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich wrote:>>>> On 11.10.11 at 11:51, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> Jan Beulich wrote: >>> If the prefetch was from Xen space (only in guest context), >>> delivering a vMCE to the guest is pointless (and perhaps confusing >>> to the guest). >>> >> >> Yes, exactly. how about delay handle it as: >> * at mce isr >> if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) xen panic; >> * at mce softirq >> if ( (srar error) && (EIPV ==0) && (broken page owned by >> hypervisor) ) xen panic; > > Possible, but I''m not convinced. > >>>> * guest may kill app, kernel thread, guest itself, or whatever; >>>> >>>> The error is still an error, w/ 2 possibilities in the future: >>>> 1. it may not be consumed as an SRAR error, system keep going, >>>> h/w mechanism may detect a SRAO error (i.e. memroy scrub) at some >>>> time point and handled then; >>>> 2. it may be consumed at some time point and a SRAR error >>>> triggered again. At this time, 1). if srar occurred at >>>> hypervisor context, xen will panic. or, 2). if srar occurred at >>>> guest >>>> context, xen kill the guest as a malicious one (as what the 2nd >>>> patch do), and move the page to broken page list; >>>> >>>> Considering the rare possibility of the above case, I think it''s >>>> acceptable to handle it in this way. Thoughts? >>> >>> You''re only discussing instruction fetches (which can be discarded), >>> but you''re not covering the other example I gave (GDT access from >>> guest context - just like this is a ring-0 operations from the >>> paging unit''s pov, this ought to be an out-of-context operation >>> from MCE''s perspective). >> >> That would be data load error (EIPV=1), a sync error. > > If indeed implemented that way in hardware, that would make the > handling ambiguous: A GDT access must not (unconditionally) be > attributed to the (pv) guest, as it is not a problem the guest can > (necessarily) deal with (considering the split page ownership of > what constitutes the GDT under Xen, the guest should only be > accountable for the non-reserved part of the GDT, the rest should > be attributed back to Xen). > > The same would go for (perhaps speculative) page table walks. >Seems not ambiguous here: who own, who take. If error caused by hypervisor access broken page, xen panic; If error caused by guest access, guest would handle it (I guess normally kill itself); If guest maliciously access again, it would be killed by hypervisor.> Furthermore, data prefetching is possible too - how would a problem > there get reported? >It may be reported as unkown error, or nothing, but not as srar data load error w/ EIPV=1. Thanks, Jinsong _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 11.10.11 at 13:58, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Jan Beulich wrote: >>>>> On 11.10.11 at 11:51, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >>> That would be data load error (EIPV=1), a sync error. >> >> If indeed implemented that way in hardware, that would make the >> handling ambiguous: A GDT access must not (unconditionally) be >> attributed to the (pv) guest, as it is not a problem the guest can >> (necessarily) deal with (considering the split page ownership of >> what constitutes the GDT under Xen, the guest should only be >> accountable for the non-reserved part of the GDT, the rest should >> be attributed back to Xen). >> >> The same would go for (perhaps speculative) page table walks. >> > > Seems not ambiguous here: who own, who take. > If error caused by hypervisor access broken page, xen panic; > If error caused by guest access, guest would handle it (I guess normally > kill itself);If a guest accesses the hypervisor part of the GDT or page tables, or some other shared data structure owned by the hypervisor (like the M2P table), its handler may get utterly confused by being presented an address it doesn''t own and knows nothing about (i.e. in violation of your "who own, who take"). And even from a theoretical pov, a hypervisor should panic if one of its data structures got corrupted, no matter whether that was due to its own or a guest''s access. Delaying the panic here will only lead to the situation becoming worse. (The same would go for a "normal" kernel: If an application causes an MCE due to e.g. a GDT access, it shouldn''t be just the application that gets killed. Of course, with the interesting GDT descriptors all being located close to each other, there''s little chance the kernel would be able to handle that, but that''s an implementation aspect of the kernel, not something that should matter to the hardware design.) Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan, I update a little for my former patch, as attached. For my former patch, you mainly have 2 concerns (list below). I double check xen mce code, w/ my opinion append: Concern 1: for SRAR IFU error, since RIPV=EIPV=0, it maybe an async error which occur at guest but root from hypervisor. [Jinsong]: Yes, but EIPV didn''t tell us where the error root from (it''s just a hint, warning us async possibility). It no need to overkill xen at mce isr, instead, at mce softirq we can find out error root location and then handle accordingly: * at mce isr: /* a total insurance */ /* if error is async, we delay handle it at mce softirq */ if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) return -1; * at mce softirq: /* detect error location by bank->mc_addr */ /* handle different page OWNER cases at intel_memerr_dhandler() and offline_page() */ /* who own, who take */ if (error page owner is guest) trigger vmce to guest; else panic xen; Concern 2: If a guest accesses the hypervisor part of the GDT or page tables, or some other shared data structure owned by the hypervisor (like the M2P table), its handler may get utterly confused by being presented an address it doesn''t own and knows nothing about. [Jinsong]: for such cases, page owner would be dom_xen/dom_cow or NULL, but not guest --> it would be handled at hypervisor, not triggering vmce to guest --> who own, who take. Thanks, Jinsong ============================X86 MCE: Add SRAR handler Currently Intel SDM add 2 kinds of MCE SRAR errors: 1). Data Load error, error code = 0x134 2). Instruction Fetch error, error code = 0x150 This patch add handler to these new SRAR errors. It based on existed mce infrastructure, add code to handle SRAR specific error. Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> diff -r 1515484353c6 xen/arch/x86/cpu/mcheck/mce_intel.c --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Oct 13 10:09:28 2011 +0200 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Sat Oct 22 01:40:41 2011 +0800 @@ -37,6 +37,14 @@ static int __read_mostly nr_intel_ext_ms */ #define INTEL_SRAO_MEM_SCRUB 0xC0 ... 0xCF #define INTEL_SRAO_L3_EWB 0x17A + +/* + * Currently Intel SDM define 2 kinds of srar errors: + * 1). Data Load error, error code = 0x134 + * 2). Instruction Fetch error, error code = 0x150 + */ +#define INTEL_SRAR_DATA_LOAD 0x134 +#define INTEL_SRAR_INSTR_FETCH 0x150 /* Thermal Hanlding */ #ifdef CONFIG_X86_MCE_THERMAL @@ -256,7 +264,7 @@ static enum mce_result mce_action(struct for ( i = 0; i < handler_num; i++ ) { if (handlers[i].owned_error(binfo.mib->mc_status)) { - handlers[i].recovery_handler(&binfo, &bank_result); + handlers[i].recovery_handler(&binfo, &bank_result, regs); if (worst_result < bank_result) worst_result = bank_result; break; @@ -622,7 +630,8 @@ struct mcinfo_recovery *mci_add_pageoff_ static void intel_memerr_dhandler( struct mca_binfo *binfo, - enum mce_result *result) + enum mce_result *result, + struct cpu_user_regs *regs) { struct mcinfo_bank *bank = binfo->mib; struct mcinfo_global *global = binfo->mig; @@ -721,6 +730,32 @@ vmce_failed: } } +static int intel_srar_check(uint64_t status) +{ + return ( intel_check_mce_type(status) == intel_mce_ucr_srar ); +} + +static void intel_srar_dhandler( + struct mca_binfo *binfo, + enum mce_result *result, + struct cpu_user_regs *regs) +{ + uint64_t status = binfo->mib->mc_status; + + /* For unknown srar error code, reset system */ + *result = MCER_RESET; + + switch ( status & INTEL_MCCOD_MASK ) + { + case INTEL_SRAR_DATA_LOAD: + case INTEL_SRAR_INSTR_FETCH: + intel_memerr_dhandler(binfo, result, regs); + break; + default: + break; + } +} + static int intel_srao_check(uint64_t status) { return ( intel_check_mce_type(status) == intel_mce_ucr_srao ); @@ -728,7 +763,8 @@ static int intel_srao_check(uint64_t sta static void intel_srao_dhandler( struct mca_binfo *binfo, - enum mce_result *result) + enum mce_result *result, + struct cpu_user_regs *regs) { uint64_t status = binfo->mib->mc_status; @@ -741,7 +777,7 @@ static void intel_srao_dhandler( { case INTEL_SRAO_MEM_SCRUB: case INTEL_SRAO_L3_EWB: - intel_memerr_dhandler(binfo, result); + intel_memerr_dhandler(binfo, result, regs); break; default: break; @@ -756,14 +792,15 @@ static int intel_default_check(uint64_t static void intel_default_mce_dhandler( struct mca_binfo *binfo, - enum mce_result *result) + enum mce_result *result, + struct cpu_user_regs * regs) { uint64_t status = binfo->mib->mc_status; enum intel_mce_type type; type = intel_check_mce_type(status); - if (type == intel_mce_fatal || type == intel_mce_ucr_srar) + if (type == intel_mce_fatal) *result = MCER_RESET; else *result = MCER_CONTINUE; @@ -771,12 +808,14 @@ static void intel_default_mce_dhandler( static const struct mca_error_handler intel_mce_dhandlers[] = { {intel_srao_check, intel_srao_dhandler}, + {intel_srar_check, intel_srar_dhandler}, {intel_default_check, intel_default_mce_dhandler} }; static void intel_default_mce_uhandler( struct mca_binfo *binfo, - enum mce_result *result) + enum mce_result *result, + struct cpu_user_regs *regs) { uint64_t status = binfo->mib->mc_status; enum intel_mce_type type; @@ -785,8 +824,6 @@ static void intel_default_mce_uhandler( switch (type) { - /* Panic if no handler for SRAR error */ - case intel_mce_ucr_srar: case intel_mce_fatal: *result = MCER_RESET; break; @@ -961,10 +998,8 @@ static int intel_recoverable_scan(u64 st /* SRAR error */ else if ( ser_support && !(status & MCi_STATUS_OVER) && !(status & MCi_STATUS_PCC) && (status & MCi_STATUS_S) - && (status & MCi_STATUS_AR) ) { - mce_printk(MCE_VERBOSE, "MCE: No SRAR error defined currently.\n"); - return 0; - } + && (status & MCi_STATUS_AR) && (status & MCi_STATUS_EN) ) + return 1; /* SRAO error */ else if (ser_support && !(status & MCi_STATUS_PCC) && (status & MCi_STATUS_S) && !(status & MCi_STATUS_AR) diff -r 1515484353c6 xen/arch/x86/cpu/mcheck/x86_mca.h --- a/xen/arch/x86/cpu/mcheck/x86_mca.h Thu Oct 13 10:09:28 2011 +0200 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sat Oct 22 01:40:41 2011 +0800 @@ -151,7 +151,7 @@ struct mca_error_handler */ int (*owned_error)(uint64_t status); void (*recovery_handler)(struct mca_binfo *binfo, - enum mce_result *result); + enum mce_result *result, struct cpu_user_regs *regs); }; /* Global variables */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 21.10.11 at 22:25, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > I update a little for my former patch, as attached. > For my former patch, you mainly have 2 concerns (list below). I double check > xen mce code, w/ my opinion append:Those concerns were really just triggered by the patch, not directly related to it. The patch itself looks okay to me.> Concern 1: for SRAR IFU error, since RIPV=EIPV=0, it maybe an async error > which occur at guest but root from hypervisor. > [Jinsong]: > Yes, but EIPV didn''t tell us where the error root from (it''s just a > hint, warning us async possibility). > It no need to overkill xen at mce isr, instead, at mce softirq we can > find out error root location and then handle accordingly: > * at mce isr: > /* a total insurance */ > /* if error is async, we delay handle it at mce softirq */ > if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) > return -1;I continue to think that guest_mode() must not be used without EIPV, no matter what the purpose of the use.> * at mce softirq: > /* detect error location by bank->mc_addr */ > /* handle different page OWNER cases at intel_memerr_dhandler() > and offline_page() */ > /* who own, who take */ > if (error page owner is guest) > trigger vmce to guest; > else > panic xen;That part is certainly fine.> Concern 2: If a guest accesses the hypervisor part of the GDT or page > tables, or some other shared data structure owned by the hypervisor (like the > M2P table), its handler may get utterly confused by being presented an > address it doesn''t own and knows nothing about. > [Jinsong]: for such cases, page owner would be dom_xen/dom_cow or NULL, but > not guest --> it would be handled at hypervisor, not triggering vmce to guest --> > who own, who take.That latter part of your explanation is fine too, but with the caveat that the bogus use of guest_mode() above may have an overall effect on the behavior. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
> > Concern 1: for SRAR IFU error, since RIPV=EIPV=0, it maybe an asyncerror> > which occur at guest but root from hypervisor. > > [Jinsong]: > > Yes, but EIPV didn''t tell us where the error root from (it''s just a > > hint, warning us async possibility). > > It no need to overkill xen at mce isr, instead, at mce softirq wecan> > find out error root location and then handle accordingly: > > * at mce isr: > > /* a total insurance */ > > /* if error is async, we delay handle it at mce softirq */ > > if ( !(gstatus & MCG_STATUS_RIPV) && !guest_mode(regs)) > > return -1; > > I continue to think that guest_mode() must not be used without > EIPV, no matter what the purpose of the use.Jan, as explained before, this is purely a sanity check to make sure we can go to next level error handling, and the check for guest_mode have nothing to do with where the error happen, but just make sure that if the context in the stack is for hypervisor mode, and RIPV told us that we can''t switch context, then we can''t do anything more but just panic since it''s complexity to pre-emption/ context switch in hypervisor mode. Thanks --jyh _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel