commit 420bacd209e31917fd732ef3c1aeae03d6d14d18 Author: Liu Jinsong <jinsong.liu@intel.com> Date: Sat Nov 16 06:15:11 2013 +0800 X86: Fix vcpu xsave bug When nonlazy xstates used, it should be xsaved though lazy xstates are not dirty. Signed-off-by: Liu Jinsong <jinsong.liu@intel.com> diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c index 7649274..f1d2ccc 100644 --- a/xen/arch/x86/i387.c +++ b/xen/arch/x86/i387.c @@ -134,7 +134,7 @@ static inline void fpu_frstor(struct vcpu *v) /* FPU Save Functions */ /*******************************/ /* Save x87 extended state */ -static inline void fpu_xsave(struct vcpu *v) +static inline void fpu_xsave(struct vcpu *v, uint64_t mask) { bool_t ok; @@ -145,7 +145,7 @@ static inline void fpu_xsave(struct vcpu *v) */ ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE); ASSERT(ok); - xsave(v, v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY); + xsave(v, mask); ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE); ASSERT(ok); } @@ -257,22 +257,29 @@ void vcpu_restore_fpu_lazy(struct vcpu *v) */ void vcpu_save_fpu(struct vcpu *v) { - if ( !v->fpu_dirtied ) - return; - ASSERT(!is_idle_vcpu(v)); - /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */ + /* Avoid recursion */ clts(); - - if ( cpu_has_xsave ) - fpu_xsave(v); - else if ( cpu_has_fxsr ) - fpu_fxsave(v); + if ( !v->fpu_dirtied ) + { + if ( v->arch.nonlazy_xstate_used ) + { + ASSERT(cpu_has_xsave); + fpu_xsave(v, XSTATE_NONLAZY); + } + } else - fpu_fsave(v); + { + if ( cpu_has_xsave ) + fpu_xsave(v, XSTATE_ALL); + else if ( cpu_has_fxsr ) + fpu_fxsave(v); + else + fpu_fsave(v); - v->fpu_dirtied = 0; + v->fpu_dirtied = 0; + } stts(); } _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
On 15/11/13 16:55, Liu, Jinsong wrote:> commit 420bacd209e31917fd732ef3c1aeae03d6d14d18 > Author: Liu Jinsong <jinsong.liu@intel.com> > Date: Sat Nov 16 06:15:11 2013 +0800 > > X86: Fix vcpu xsave bug > > When nonlazy xstates used, it should be xsaved though lazy xstates are not dirty. > > Signed-off-by: Liu Jinsong <jinsong.liu@intel.com>Do you mean "...xsaved as though lazy...", are you stating that currently, lazy states are not actually dirty? Furthermore, can you describe why, and what goes wrong if you dont? It is not obvious why this change is needed.> > diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c > index 7649274..f1d2ccc 100644 > --- a/xen/arch/x86/i387.c > +++ b/xen/arch/x86/i387.c > @@ -134,7 +134,7 @@ static inline void fpu_frstor(struct vcpu *v) > /* FPU Save Functions */ > /*******************************/ > /* Save x87 extended state */ > -static inline void fpu_xsave(struct vcpu *v) > +static inline void fpu_xsave(struct vcpu *v, uint64_t mask) > { > bool_t ok; > > @@ -145,7 +145,7 @@ static inline void fpu_xsave(struct vcpu *v) > */ > ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE); > ASSERT(ok); > - xsave(v, v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY); > + xsave(v, mask); > ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE); > ASSERT(ok); > } > @@ -257,22 +257,29 @@ void vcpu_restore_fpu_lazy(struct vcpu *v) > */ > void vcpu_save_fpu(struct vcpu *v) > { > - if ( !v->fpu_dirtied ) > - return; > - > ASSERT(!is_idle_vcpu(v)); > > - /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */ > + /* Avoid recursion */Why are you changing this comment? ~Andrew> clts(); > - > - if ( cpu_has_xsave ) > - fpu_xsave(v); > - else if ( cpu_has_fxsr ) > - fpu_fxsave(v); > + if ( !v->fpu_dirtied ) > + { > + if ( v->arch.nonlazy_xstate_used ) > + { > + ASSERT(cpu_has_xsave); > + fpu_xsave(v, XSTATE_NONLAZY); > + } > + } > else > - fpu_fsave(v); > + { > + if ( cpu_has_xsave ) > + fpu_xsave(v, XSTATE_ALL); > + else if ( cpu_has_fxsr ) > + fpu_fxsave(v); > + else > + fpu_fsave(v); > > - v->fpu_dirtied = 0; > + v->fpu_dirtied = 0; > + } > stts(); > } > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
No, what I meant is, nonlazy xstates should be xsaved each time vcpu_save_fpu. Operation to nonlazy xstates will not trigger #NM exception, so whenever vcpu scheduled in it should get restored and whenever vcpu scheduled out it should get saved. As for the comments I just want to unify it with other ''clts'' comments. Thanks, Jinsong ________________________________ From: Andrew Cooper [mailto:andrew.cooper3@citrix.com] Sent: Saturday, November 16, 2013 1:52 AM To: Liu, Jinsong Cc: Jan Beulich; xen-devel@lists.xen.org Subject: Re: [Xen-devel] [PATCH] X86: Fix vcpu xsave bug On 15/11/13 16:55, Liu, Jinsong wrote: commit 420bacd209e31917fd732ef3c1aeae03d6d14d18 Author: Liu Jinsong <jinsong.liu@intel.com><mailto:jinsong.liu@intel.com> Date: Sat Nov 16 06:15:11 2013 +0800 X86: Fix vcpu xsave bug When nonlazy xstates used, it should be xsaved though lazy xstates are not dirty. Signed-off-by: Liu Jinsong <jinsong.liu@intel.com><mailto:jinsong.liu@intel.com> Do you mean "...xsaved as though lazy...", are you stating that currently, lazy states are not actually dirty? Furthermore, can you describe why, and what goes wrong if you dont? It is not obvious why this change is needed. diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c index 7649274..f1d2ccc 100644 --- a/xen/arch/x86/i387.c +++ b/xen/arch/x86/i387.c @@ -134,7 +134,7 @@ static inline void fpu_frstor(struct vcpu *v) /* FPU Save Functions */ /*******************************/ /* Save x87 extended state */ -static inline void fpu_xsave(struct vcpu *v) +static inline void fpu_xsave(struct vcpu *v, uint64_t mask) { bool_t ok; @@ -145,7 +145,7 @@ static inline void fpu_xsave(struct vcpu *v) */ ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE); ASSERT(ok); - xsave(v, v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY); + xsave(v, mask); ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE); ASSERT(ok); } @@ -257,22 +257,29 @@ void vcpu_restore_fpu_lazy(struct vcpu *v) */ void vcpu_save_fpu(struct vcpu *v) { - if ( !v->fpu_dirtied ) - return; - ASSERT(!is_idle_vcpu(v)); - /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */ + /* Avoid recursion */ Why are you changing this comment? ~Andrew clts(); - - if ( cpu_has_xsave ) - fpu_xsave(v); - else if ( cpu_has_fxsr ) - fpu_fxsave(v); + if ( !v->fpu_dirtied ) + { + if ( v->arch.nonlazy_xstate_used ) + { + ASSERT(cpu_has_xsave); + fpu_xsave(v, XSTATE_NONLAZY); + } + } else - fpu_fsave(v); + { + if ( cpu_has_xsave ) + fpu_xsave(v, XSTATE_ALL); + else if ( cpu_has_fxsr ) + fpu_fxsave(v); + else + fpu_fsave(v); - v->fpu_dirtied = 0; + v->fpu_dirtied = 0; + } stts(); } _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org<mailto:Xen-devel@lists.xen.org> http://lists.xen.org/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
>>> On 15.11.13 at 19:52, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:Please don''t to-post.> No, what I meant is, nonlazy xstates should be xsaved each time > vcpu_save_fpu. > Operation to nonlazy xstates will not trigger #NM exception, so whenever > vcpu scheduled in it should get restored and whenever vcpu scheduled out it > should get saved.So you''re saying that AMD''s LWP would be broken in that respect currently too. Hence in any event this needs to be coordinated with AMD, and the description needs improving.> As for the comments I just want to unify it with other ''clts'' comments.But you''re making an admittedly non-optimal comment completely wrong - there''s no recursion involved in triggering #NM there. Jan
>>> On 15.11.13 at 17:55, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > --- a/xen/arch/x86/i387.c > +++ b/xen/arch/x86/i387.c > @@ -134,7 +134,7 @@ static inline void fpu_frstor(struct vcpu *v) > /* FPU Save Functions */ > /*******************************/ > /* Save x87 extended state */ > -static inline void fpu_xsave(struct vcpu *v) > +static inline void fpu_xsave(struct vcpu *v, uint64_t mask)You get v passed here, so no need to add a new parameter.> @@ -145,7 +145,7 @@ static inline void fpu_xsave(struct vcpu *v) > */ > ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE); > ASSERT(ok); > - xsave(v, v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY); > + xsave(v, mask);Instead, you can check v->fpu_dirtied here.> @@ -257,22 +257,29 @@ void vcpu_restore_fpu_lazy(struct vcpu *v) > */ > void vcpu_save_fpu(struct vcpu *v) > { > - if ( !v->fpu_dirtied ) > - return; > -And the - afaict - the only changed needed to this function is the deletion above. Jan
Jan Beulich wrote:>>>> On 15.11.13 at 19:52, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > > Please don''t to-post. > >> No, what I meant is, nonlazy xstates should be xsaved each time >> vcpu_save_fpu. Operation to nonlazy xstates will not trigger #NM >> exception, so whenever vcpu scheduled in it should get restored and >> whenever vcpu scheduled out it should get saved. > > So you''re saying that AMD''s LWP would be broken in that respect > currently too. Hence in any event this needs to be coordinated > with AMD, and the description needs improving.Yes, and will update description.> >> As for the comments I just want to unify it with other ''clts'' >> comments. > > But you''re making an admittedly non-optimal comment completely > wrong - there''s no recursion involved in triggering #NM there. >OK, keep original comment. Thanks, Jinsong
Jan Beulich wrote:>>>> On 15.11.13 at 17:55, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> --- a/xen/arch/x86/i387.c >> +++ b/xen/arch/x86/i387.c >> @@ -134,7 +134,7 @@ static inline void fpu_frstor(struct vcpu *v) >> /* FPU Save Functions */ >> /*******************************/ >> /* Save x87 extended state */ >> -static inline void fpu_xsave(struct vcpu *v) >> +static inline void fpu_xsave(struct vcpu *v, uint64_t mask) > > You get v passed here, so no need to add a new parameter. > >> @@ -145,7 +145,7 @@ static inline void fpu_xsave(struct vcpu *v) >> */ ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE); >> ASSERT(ok); - xsave(v, v->arch.nonlazy_xstate_used ? XSTATE_ALL : >> XSTATE_LAZY); + xsave(v, mask); > > Instead, you can check v->fpu_dirtied here.OK, remove mask parameter.> >> @@ -257,22 +257,29 @@ void vcpu_restore_fpu_lazy(struct vcpu *v) */ >> void vcpu_save_fpu(struct vcpu *v) >> { >> - if ( !v->fpu_dirtied ) >> - return; >> - > > And the - afaict - the only changed needed to this function is the > deletion above. >If I didn''t misunderstand your meaning, it can not only delete these 2 lines, say, when (!v->fpu_dirtied) and in old platform that do fpu_fxsave/fpu_fsave? Thanks, Jinsong
>>> On 18.11.13 at 11:35, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Jan Beulich wrote: >>>>> On 15.11.13 at 17:55, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >>> @@ -257,22 +257,29 @@ void vcpu_restore_fpu_lazy(struct vcpu *v) */ >>> void vcpu_save_fpu(struct vcpu *v) >>> { >>> - if ( !v->fpu_dirtied ) >>> - return; >>> - >> >> And the - afaict - the only changed needed to this function is the >> deletion above. >> > > If I didn''t misunderstand your meaning, it can not only delete these 2 > lines, say, when (!v->fpu_dirtied) and in old platform that do > fpu_fxsave/fpu_fsave?Sorry, I don''t understand what you''re asking. Jan
Jan Beulich wrote:>>>> On 18.11.13 at 11:35, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> Jan Beulich wrote: >>>>>> On 15.11.13 at 17:55, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>> wrote: >>>> @@ -257,22 +257,29 @@ void vcpu_restore_fpu_lazy(struct vcpu *v) >>>> */ void vcpu_save_fpu(struct vcpu *v) >>>> { >>>> - if ( !v->fpu_dirtied ) >>>> - return; >>>> - >>> >>> And the - afaict - the only changed needed to this function is the >>> deletion above. >>> >> >> If I didn''t misunderstand your meaning, it can not only delete these >> 2 lines, say, when (!v->fpu_dirtied) and in old platform that do >> fpu_fxsave/fpu_fsave? > > Sorry, I don''t understand what you''re asking. >The problem is I don''t understand your last comments: ''And the - afaict - the only changed needed to this function is the deletion above.'' Seems some misunderstanding here :) So would you please give me the code of your thought based on the patch below? Thanks, Jinsong ====================== diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c index 7649274..f1d2ccc 100644 --- a/xen/arch/x86/i387.c +++ b/xen/arch/x86/i387.c @@ -134,7 +134,7 @@ static inline void fpu_frstor(struct vcpu *v) /* FPU Save Functions */ /*******************************/ /* Save x87 extended state */ -static inline void fpu_xsave(struct vcpu *v) +static inline void fpu_xsave(struct vcpu *v, uint64_t mask) { bool_t ok; @@ -145,7 +145,7 @@ static inline void fpu_xsave(struct vcpu *v) */ ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE); ASSERT(ok); - xsave(v, v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY); + xsave(v, mask); ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE); ASSERT(ok); } @@ -257,22 +257,29 @@ void vcpu_restore_fpu_lazy(struct vcpu *v) */ void vcpu_save_fpu(struct vcpu *v) { - if ( !v->fpu_dirtied ) - return; - ASSERT(!is_idle_vcpu(v)); - /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */ + /* Avoid recursion */ clts(); - - if ( cpu_has_xsave ) - fpu_xsave(v); - else if ( cpu_has_fxsr ) - fpu_fxsave(v); + if ( !v->fpu_dirtied ) + { + if ( v->arch.nonlazy_xstate_used ) + { + ASSERT(cpu_has_xsave); + fpu_xsave(v, XSTATE_NONLAZY); + } + } else - fpu_fsave(v); + { + if ( cpu_has_xsave ) + fpu_xsave(v, XSTATE_ALL); + else if ( cpu_has_fxsr ) + fpu_fxsave(v); + else + fpu_fsave(v); - v->fpu_dirtied = 0; + v->fpu_dirtied = 0; + } stts(); } -- 1.7.1
>>> On 18.11.13 at 13:24, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Jan Beulich wrote: >>>>> On 18.11.13 at 11:35, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >>> Jan Beulich wrote: >>>>>>> On 15.11.13 at 17:55, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>>> wrote: >>>>> @@ -257,22 +257,29 @@ void vcpu_restore_fpu_lazy(struct vcpu *v) >>>>> */ void vcpu_save_fpu(struct vcpu *v) >>>>> { >>>>> - if ( !v->fpu_dirtied ) >>>>> - return; >>>>> - >>>> >>>> And the - afaict - the only changed needed to this function is the >>>> deletion above. >>>> >>> >>> If I didn''t misunderstand your meaning, it can not only delete these >>> 2 lines, say, when (!v->fpu_dirtied) and in old platform that do >>> fpu_fxsave/fpu_fsave? >> >> Sorry, I don''t understand what you''re asking. >> > > The problem is I don''t understand your last comments: > ''And the - afaict - the only changed needed to this function is the deletion > above.'' > > Seems some misunderstanding here :) > So would you please give me the code of your thought based on the patch > below?void vcpu_save_fpu(struct vcpu *v) { ASSERT(!is_idle_vcpu(v)); /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */ clts(); if ( cpu_has_xsave ) fpu_xsave(v); else if ( !v->fpu_dirtied ) /* nothing */; else if ( cpu_has_fxsr ) fpu_fxsave(v); else fpu_fsave(v); v->fpu_dirtied = 0; stts(); } Of course this - as much as your earlier variant - has the downside of there being a patch consisting of just a clts()/stts(), and it would clearly be nice to avoid that. Jan
Jan Beulich wrote:>>>> On 18.11.13 at 13:24, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> Jan Beulich wrote: >>>>>> On 18.11.13 at 11:35, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>> wrote: >>>> Jan Beulich wrote: >>>>>>>> On 15.11.13 at 17:55, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>>>> wrote: >>>>>> @@ -257,22 +257,29 @@ void vcpu_restore_fpu_lazy(struct vcpu *v) >>>>>> */ void vcpu_save_fpu(struct vcpu *v) >>>>>> { >>>>>> - if ( !v->fpu_dirtied ) >>>>>> - return; >>>>>> - >>>>> >>>>> And the - afaict - the only changed needed to this function is >>>>> the deletion above. >>>>> >>>> >>>> If I didn''t misunderstand your meaning, it can not only delete >>>> these 2 lines, say, when (!v->fpu_dirtied) and in old platform >>>> that do fpu_fxsave/fpu_fsave? >>> >>> Sorry, I don''t understand what you''re asking. >>> >> >> The problem is I don''t understand your last comments: >> ''And the - afaict - the only changed needed to this function is the >> deletion above.'' >> >> Seems some misunderstanding here :) >> So would you please give me the code of your thought based on the >> patch below? > > void vcpu_save_fpu(struct vcpu *v) > { > ASSERT(!is_idle_vcpu(v)); > > /* This can happen, if a paravirtualised guest OS has set its > CR0.TS. */ clts(); > > if ( cpu_has_xsave ) > fpu_xsave(v); > else if ( !v->fpu_dirtied ) > /* nothing */; > else if ( cpu_has_fxsr ) > fpu_fxsave(v); > else > fpu_fsave(v); > > v->fpu_dirtied = 0; > stts(); > } >But that we need add logic at fpu_xsave(), like if ( v->fpu_dirtied ) { if ( v->arch.nonlazy_xstate_used ) mask = XSTATE_ALL; else mask = XSTATE_LAZY; } else { if ( v->arch.nonlazy_xstate_used ) mask = XSTATE_NONLAZY; else mask = 0; } xsave(v, mask); This way (new vcpu_save_fpu + new fpu_xsave) is some obscure, and calling fpu_xsave may do nothing. So how about keep old patch, use ''mask'' to directly tell fpu_xsave what we want it to save? Thanks, Jinsong
Liu, Jinsong wrote:> Jan Beulich wrote: >>>>> On 18.11.13 at 13:24, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>> wrote: >>> Jan Beulich wrote: >>>>>>> On 18.11.13 at 11:35, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>>> wrote: >>>>> Jan Beulich wrote: >>>>>>>>> On 15.11.13 at 17:55, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>>>>> wrote: >>>>>>> @@ -257,22 +257,29 @@ void vcpu_restore_fpu_lazy(struct vcpu *v) >>>>>>> */ void vcpu_save_fpu(struct vcpu *v) >>>>>>> { >>>>>>> - if ( !v->fpu_dirtied ) >>>>>>> - return; >>>>>>> - >>>>>> >>>>>> And the - afaict - the only changed needed to this function is >>>>>> the deletion above. >>>>>> >>>>> >>>>> If I didn''t misunderstand your meaning, it can not only delete >>>>> these 2 lines, say, when (!v->fpu_dirtied) and in old platform >>>>> that do fpu_fxsave/fpu_fsave? >>>> >>>> Sorry, I don''t understand what you''re asking. >>>> >>> >>> The problem is I don''t understand your last comments: >>> ''And the - afaict - the only changed needed to this function is the >>> deletion above.'' >>> >>> Seems some misunderstanding here :) >>> So would you please give me the code of your thought based on the >>> patch below? >> >> void vcpu_save_fpu(struct vcpu *v) >> { >> ASSERT(!is_idle_vcpu(v)); >> >> /* This can happen, if a paravirtualised guest OS has set its >> CR0.TS. */ clts(); >> >> if ( cpu_has_xsave ) >> fpu_xsave(v); >> else if ( !v->fpu_dirtied ) >> /* nothing */; >> else if ( cpu_has_fxsr ) >> fpu_fxsave(v); >> else >> fpu_fsave(v); >> >> v->fpu_dirtied = 0; >> stts(); >> } >> > > But that we need add logic at fpu_xsave(), like > > if ( v->fpu_dirtied ) > { > if ( v->arch.nonlazy_xstate_used ) > mask = XSTATE_ALL; > else > mask = XSTATE_LAZY; > } > else > { > if ( v->arch.nonlazy_xstate_used ) > mask = XSTATE_NONLAZY; > else > mask = 0; > } > > xsave(v, mask); > > This way (new vcpu_save_fpu + new fpu_xsave) is some obscure, and > calling fpu_xsave may do nothing. So how about keep old patch, use > ''mask'' to directly tell fpu_xsave what we want it to save? >Hmm, just find that my old patch also has issue: it didn''t handle the case of XSTATE_LAZY ( v->fpu_dirtied && !v->arch.nonlazy_xstate_used ) I will update based on your approach. Thanks, Jinsong