<suravee.suthikulpanit@amd.com>
2013-Mar-01 20:49 UTC
[PATCH] Always save/restore performance counters when HVM guest switching VCPU
From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> Currently, the performance counter registers are saved/restores when the HVM guest switchs VCPUs only if they are running. However, PERF has one check where it writes the MSR and read back the value to check if the MSR is working. This has shown to fails the check if the VCPU is moved in between rdmsr and wrmsr and resulting in the values are different. Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> --- xen/arch/x86/hvm/svm/vpmu.c | 62 ++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c index bf186fe..4854cf3 100644 --- a/xen/arch/x86/hvm/svm/vpmu.c +++ b/xen/arch/x86/hvm/svm/vpmu.c @@ -172,12 +172,16 @@ static inline void context_restore(struct vcpu *v) { wrmsrl(counters[i], ctxt->counters[i]); + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ) + continue; + /* Force an interrupt to allow guest reset the counter, if the value is positive */ if ( is_overflowed(ctxt->counters[i]) && (ctxt->counters[i] > 0) ) { gdprintk(XENLOG_WARNING, "VPMU: Force a performance counter " - "overflow interrupt!\n"); + "overflow interrupt! (counter:%u value:0x%lx)\n", + i, ctxt->counters[i]); amd_vpmu_do_interrupt(0); } } @@ -188,12 +192,13 @@ static void amd_vpmu_restore(struct vcpu *v) struct vpmu_struct *vpmu = vcpu_vpmu(v); struct amd_vpmu_context *ctxt = vpmu->context; - if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) && - vpmu_is_set(vpmu, VPMU_RUNNING)) ) + if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) ) return; context_restore(v); - apic_write(APIC_LVTPC, ctxt->hw_lapic_lvtpc); + + if ( vpmu_is_set(vpmu, VPMU_RUNNING) ) + apic_write(APIC_LVTPC, ctxt->hw_lapic_lvtpc); } static inline void context_save(struct vcpu *v) @@ -214,13 +219,16 @@ static void amd_vpmu_save(struct vcpu *v) struct vpmu_struct *vpmu = vcpu_vpmu(v); struct amd_vpmu_context *ctx = vpmu->context; - if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) && - vpmu_is_set(vpmu, VPMU_RUNNING)) ) + if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) ) return; context_save(v); - ctx->hw_lapic_lvtpc = apic_read(APIC_LVTPC); - apic_write(APIC_LVTPC, ctx->hw_lapic_lvtpc | APIC_LVT_MASKED); + + if ( vpmu_is_set(vpmu, VPMU_RUNNING) ) + { + ctx->hw_lapic_lvtpc = apic_read(APIC_LVTPC); + apic_write(APIC_LVTPC, ctx->hw_lapic_lvtpc | APIC_LVT_MASKED); + } } static void context_update(unsigned int msr, u64 msr_content) @@ -303,25 +311,25 @@ static int amd_vpmu_initialise(struct vcpu *v) if ( counters == NULL ) { - switch ( family ) - { - case 0x15: - num_counters = F15H_NUM_COUNTERS; - counters = AMD_F15H_COUNTERS; - ctrls = AMD_F15H_CTRLS; - k7_counters_mirrored = 1; - break; - case 0x10: - case 0x12: - case 0x14: - case 0x16: - default: - num_counters = F10H_NUM_COUNTERS; - counters = AMD_F10H_COUNTERS; - ctrls = AMD_F10H_CTRLS; - k7_counters_mirrored = 0; - break; - } + switch ( family ) + { + case 0x15: + num_counters = F15H_NUM_COUNTERS; + counters = AMD_F15H_COUNTERS; + ctrls = AMD_F15H_CTRLS; + k7_counters_mirrored = 1; + break; + case 0x10: + case 0x12: + case 0x14: + case 0x16: + default: + num_counters = F10H_NUM_COUNTERS; + counters = AMD_F10H_COUNTERS; + ctrls = AMD_F10H_CTRLS; + k7_counters_mirrored = 0; + break; + } } ctxt = xzalloc(struct amd_vpmu_context); -- 1.7.10.4
Boris Ostrovsky
2013-Mar-01 23:02 UTC
Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
On 03/01/2013 03:49 PM, suravee.suthikulpanit@amd.com wrote:> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > > Currently, the performance counter registers are saved/restores > when the HVM guest switchs VCPUs only if they are running. > However, PERF has one check where it writes the MSR and read back > the value to check if the MSR is working. This has shown to fails > the check if the VCPU is moved in between rdmsr and wrmsr and > resulting in the values are different. > > Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>Description may need to be cleaned up a bit but other than that Acked-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>> --- > xen/arch/x86/hvm/svm/vpmu.c | 62 ++++++++++++++++++++++++------------------- > 1 file changed, 35 insertions(+), 27 deletions(-) > > diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c > index bf186fe..4854cf3 100644 > --- a/xen/arch/x86/hvm/svm/vpmu.c > +++ b/xen/arch/x86/hvm/svm/vpmu.c > @@ -172,12 +172,16 @@ static inline void context_restore(struct vcpu *v) > { > wrmsrl(counters[i], ctxt->counters[i]); > > + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ) > + continue; > + > /* Force an interrupt to allow guest reset the counter, > if the value is positive */ > if ( is_overflowed(ctxt->counters[i]) && (ctxt->counters[i] > 0) ) > { > gdprintk(XENLOG_WARNING, "VPMU: Force a performance counter " > - "overflow interrupt!\n"); > + "overflow interrupt! (counter:%u value:0x%lx)\n", > + i, ctxt->counters[i]); > amd_vpmu_do_interrupt(0); > } > } > @@ -188,12 +192,13 @@ static void amd_vpmu_restore(struct vcpu *v) > struct vpmu_struct *vpmu = vcpu_vpmu(v); > struct amd_vpmu_context *ctxt = vpmu->context; > > - if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) && > - vpmu_is_set(vpmu, VPMU_RUNNING)) ) > + if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) ) > return; > > context_restore(v); > - apic_write(APIC_LVTPC, ctxt->hw_lapic_lvtpc); > + > + if ( vpmu_is_set(vpmu, VPMU_RUNNING) ) > + apic_write(APIC_LVTPC, ctxt->hw_lapic_lvtpc); > } > > static inline void context_save(struct vcpu *v) > @@ -214,13 +219,16 @@ static void amd_vpmu_save(struct vcpu *v) > struct vpmu_struct *vpmu = vcpu_vpmu(v); > struct amd_vpmu_context *ctx = vpmu->context; > > - if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) && > - vpmu_is_set(vpmu, VPMU_RUNNING)) ) > + if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) ) > return; > > context_save(v); > - ctx->hw_lapic_lvtpc = apic_read(APIC_LVTPC); > - apic_write(APIC_LVTPC, ctx->hw_lapic_lvtpc | APIC_LVT_MASKED); > + > + if ( vpmu_is_set(vpmu, VPMU_RUNNING) ) > + { > + ctx->hw_lapic_lvtpc = apic_read(APIC_LVTPC); > + apic_write(APIC_LVTPC, ctx->hw_lapic_lvtpc | APIC_LVT_MASKED); > + } > } > > static void context_update(unsigned int msr, u64 msr_content) > @@ -303,25 +311,25 @@ static int amd_vpmu_initialise(struct vcpu *v) > > if ( counters == NULL ) > { > - switch ( family ) > - { > - case 0x15: > - num_counters = F15H_NUM_COUNTERS; > - counters = AMD_F15H_COUNTERS; > - ctrls = AMD_F15H_CTRLS; > - k7_counters_mirrored = 1; > - break; > - case 0x10: > - case 0x12: > - case 0x14: > - case 0x16: > - default: > - num_counters = F10H_NUM_COUNTERS; > - counters = AMD_F10H_COUNTERS; > - ctrls = AMD_F10H_CTRLS; > - k7_counters_mirrored = 0; > - break; > - } > + switch ( family ) > + { > + case 0x15: > + num_counters = F15H_NUM_COUNTERS; > + counters = AMD_F15H_COUNTERS; > + ctrls = AMD_F15H_CTRLS; > + k7_counters_mirrored = 1; > + break; > + case 0x10: > + case 0x12: > + case 0x14: > + case 0x16: > + default: > + num_counters = F10H_NUM_COUNTERS; > + counters = AMD_F10H_COUNTERS; > + ctrls = AMD_F10H_CTRLS; > + k7_counters_mirrored = 0; > + break; > + } > } > > ctxt = xzalloc(struct amd_vpmu_context);
George Dunlap
2013-Mar-04 12:42 UTC
Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
On Fri, Mar 1, 2013 at 8:49 PM, <suravee.suthikulpanit@amd.com> wrote:> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > > Currently, the performance counter registers are saved/restores > when the HVM guest switchs VCPUs only if they are running. > However, PERF has one check where it writes the MSR and read back > the value to check if the MSR is working. This has shown to fails > the check if the VCPU is moved in between rdmsr and wrmsr and > resulting in the values are different.Many moons ago (circa 2005) when I used performance counters, I found that adding them to the save/restore path added a non-neligible overhead -- something like 5% slow-down. Do you have any reason to believe this is no longer the case? Have you done any benchmarks before and after? If there is a performance slow-down, you may have to implement something like the "lazy FPU" save/restore, where you remove access to the VPMU MSRs to detect that the guest is accessing them. -George
Jan Beulich
2013-Mar-08 08:47 UTC
Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
>>> On 04.03.13 at 13:42, George Dunlap <George.Dunlap@eu.citrix.com> wrote: > On Fri, Mar 1, 2013 at 8:49 PM, <suravee.suthikulpanit@amd.com> wrote: >> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> >> >> Currently, the performance counter registers are saved/restores >> when the HVM guest switchs VCPUs only if they are running. >> However, PERF has one check where it writes the MSR and read back >> the value to check if the MSR is working. This has shown to fails >> the check if the VCPU is moved in between rdmsr and wrmsr and >> resulting in the values are different. > > Many moons ago (circa 2005) when I used performance counters, I found > that adding them to the save/restore path added a non-neligible > overhead -- something like 5% slow-down. Do you have any reason to > believe this is no longer the case? Have you done any benchmarks > before and after? > > If there is a performance slow-down, you may have to implement > something like the "lazy FPU" save/restore, where you remove access to > the VPMU MSRs to detect that the guest is accessing them.Suravee, without addressing George''s concerns, I don''t think you can expect the patch to be committed (the more that Boris, along with his ack, also asked to adjust the description). Jan
Boris Ostrovsky
2013-Mar-08 14:50 UTC
Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
----- JBeulich@suse.com wrote:> >>> On 04.03.13 at 13:42, George Dunlap <George.Dunlap@eu.citrix.com> > wrote: > > On Fri, Mar 1, 2013 at 8:49 PM, <suravee.suthikulpanit@amd.com> > wrote: > >> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > >> > >> Currently, the performance counter registers are saved/restores > >> when the HVM guest switchs VCPUs only if they are running. > >> However, PERF has one check where it writes the MSR and read back > >> the value to check if the MSR is working. This has shown to fails > >> the check if the VCPU is moved in between rdmsr and wrmsr and > >> resulting in the values are different. > > > > Many moons ago (circa 2005) when I used performance counters, I > found > > that adding them to the save/restore path added a non-neligible > > overhead -- something like 5% slow-down. Do you have any reason to > > believe this is no longer the case? Have you done any benchmarks > > before and after?I was doing some VPMU tracing a couple of weeks ago and by looking at trace timestamps I think I saw about 4000 cycles on VPMU save and ~9000 cycles on restore. Don''t remember what it was percentage-wise of a whole context switch. This was on Intel. -boris> > > > If there is a performance slow-down, you may have to implement > > something like the "lazy FPU" save/restore, where you remove access > to > > the VPMU MSRs to detect that the guest is accessing them. > > Suravee, > > without addressing George''s concerns, I don''t think you can > expect the patch to be committed (the more that Boris, along > with his ack, also asked to adjust the description). > > Jan > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
George Dunlap
2013-Mar-08 14:56 UTC
Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
On 08/03/13 14:50, Boris Ostrovsky wrote:> ----- JBeulich@suse.com wrote: > >>>>> On 04.03.13 at 13:42, George Dunlap <George.Dunlap@eu.citrix.com> >> wrote: >>> On Fri, Mar 1, 2013 at 8:49 PM, <suravee.suthikulpanit@amd.com> >> wrote: >>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> >>>> >>>> Currently, the performance counter registers are saved/restores >>>> when the HVM guest switchs VCPUs only if they are running. >>>> However, PERF has one check where it writes the MSR and read back >>>> the value to check if the MSR is working. This has shown to fails >>>> the check if the VCPU is moved in between rdmsr and wrmsr and >>>> resulting in the values are different. >>> Many moons ago (circa 2005) when I used performance counters, I >> found >>> that adding them to the save/restore path added a non-neligible >>> overhead -- something like 5% slow-down. Do you have any reason to >>> believe this is no longer the case? Have you done any benchmarks >>> before and after? > I was doing some VPMU tracing a couple of weeks ago and by looking at > trace timestamps I think I saw about 4000 cycles on VPMU save and > ~9000 cycles on restore. Don''t remember what it was percentage-wise of > a whole context switch. > > This was on Intel.That''s a really hefty expense to make all users pay on every context switch, on behalf of a random check in a piece of software that only a handful of people are going to be actually using. I''m having a hard time telling what PERF is being talked about here -- couldn''t this check be fixed on their side, by perhaps checking the CPUID leaf for the existence of Xen? If not I think a "lazy vpmu activation" is going to be the only option. -George
Boris Ostrovsky
2013-Mar-08 15:11 UTC
Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
----- george.dunlap@eu.citrix.com wrote:> On 08/03/13 14:50, Boris Ostrovsky wrote: > > ----- JBeulich@suse.com wrote: > > > >>>>> On 04.03.13 at 13:42, George Dunlap > <George.Dunlap@eu.citrix.com> > >> wrote: > >>> On Fri, Mar 1, 2013 at 8:49 PM, <suravee.suthikulpanit@amd.com> > >> wrote: > >>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > >>>> > >>>> Currently, the performance counter registers are saved/restores > >>>> when the HVM guest switchs VCPUs only if they are running. > >>>> However, PERF has one check where it writes the MSR and read > back > >>>> the value to check if the MSR is working. This has shown to > fails > >>>> the check if the VCPU is moved in between rdmsr and wrmsr and > >>>> resulting in the values are different. > >>> Many moons ago (circa 2005) when I used performance counters, I > >> found > >>> that adding them to the save/restore path added a non-neligible > >>> overhead -- something like 5% slow-down. Do you have any reason > to > >>> believe this is no longer the case? Have you done any benchmarks > >>> before and after? > > I was doing some VPMU tracing a couple of weeks ago and by looking > at > > trace timestamps I think I saw about 4000 cycles on VPMU save and > > ~9000 cycles on restore. Don''t remember what it was percentage-wise > of > > a whole context switch. > > > > This was on Intel. > > That''s a really hefty expense to make all users pay on every context > switch, on behalf of a random check in a piece of software that only a > handful of people are going to be actually using.I believe Linux uses perf infrastructure to implement the watchdog.> > I''m having a hard time telling what PERF is being talked about here -- > couldn''t this check be fixed on their side, by perhaps checking the > CPUID leaf for the existence of Xen?If by "here" you refer to the problem that Suravee''s patch is trying to address then I suspect it''s this: http://lxr.linux.no/#linux+v3.8.2/arch/x86/kernel/cpu/perf_event.c#L210> > If not I think a "lazy vpmu activation" is going to be the only > option.Yes, I actually was going to look at that. -boris
Jan Beulich
2013-Mar-08 15:15 UTC
Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
>>> On 08.03.13 at 15:56, George Dunlap <george.dunlap@eu.citrix.com> wrote: > On 08/03/13 14:50, Boris Ostrovsky wrote: >> ----- JBeulich@suse.com wrote: >> >>>>>> On 04.03.13 at 13:42, George Dunlap <George.Dunlap@eu.citrix.com> >>> wrote: >>>> On Fri, Mar 1, 2013 at 8:49 PM, <suravee.suthikulpanit@amd.com> >>> wrote: >>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> >>>>> >>>>> Currently, the performance counter registers are saved/restores >>>>> when the HVM guest switchs VCPUs only if they are running. >>>>> However, PERF has one check where it writes the MSR and read back >>>>> the value to check if the MSR is working. This has shown to fails >>>>> the check if the VCPU is moved in between rdmsr and wrmsr and >>>>> resulting in the values are different. >>>> Many moons ago (circa 2005) when I used performance counters, I >>> found >>>> that adding them to the save/restore path added a non-neligible >>>> overhead -- something like 5% slow-down. Do you have any reason to >>>> believe this is no longer the case? Have you done any benchmarks >>>> before and after? >> I was doing some VPMU tracing a couple of weeks ago and by looking at >> trace timestamps I think I saw about 4000 cycles on VPMU save and >> ~9000 cycles on restore. Don''t remember what it was percentage-wise of >> a whole context switch. >> >> This was on Intel. > > That''s a really hefty expense to make all users pay on every context > switch, on behalf of a random check in a piece of software that only a > handful of people are going to be actually using. > > I''m having a hard time telling what PERF is being talked about here -- > couldn''t this check be fixed on their side, by perhaps checking the > CPUID leaf for the existence of Xen? > > If not I think a "lazy vpmu activation" is going to be the only option.Fully agree. Jan
Suravee Suthikulanit
2013-Mar-08 22:52 UTC
Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
On 3/8/2013 2:47 AM, Jan Beulich wrote:>>>> On 04.03.13 at 13:42, George Dunlap <George.Dunlap@eu.citrix.com> wrote: >> On Fri, Mar 1, 2013 at 8:49 PM, <suravee.suthikulpanit@amd.com> wrote: >>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> >>> >>> Currently, the performance counter registers are saved/restores >>> when the HVM guest switchs VCPUs only if they are running. >>> However, PERF has one check where it writes the MSR and read back >>> the value to check if the MSR is working. This has shown to fails >>> the check if the VCPU is moved in between rdmsr and wrmsr and >>> resulting in the values are different. >> Many moons ago (circa 2005) when I used performance counters, I found >> that adding them to the save/restore path added a non-neligible >> overhead -- something like 5% slow-down. Do you have any reason to >> believe this is no longer the case? Have you done any benchmarks >> before and after? >> >> If there is a performance slow-down, you may have to implement >> something like the "lazy FPU" save/restore, where you remove access to >> the VPMU MSRs to detect that the guest is accessing them. > Suravee, > > without addressing George''s concerns, I don''t think you can > expect the patch to be committed (the more that Boris, along > with his ack, also asked to adjust the description). > > Jan > >I understand that we don''t want to introduce this overhead. Let me look into: 1. Measuring the overhead in this case. 2. Looking into the alternative approach (lazy save/restore) and get back to you all. Suravee
George Dunlap
2013-Mar-11 11:11 UTC
Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
On 08/03/13 15:11, Boris Ostrovsky wrote:> ----- george.dunlap@eu.citrix.com wrote: > >> On 08/03/13 14:50, Boris Ostrovsky wrote: >>> ----- JBeulich@suse.com wrote: >>> >>>>>>> On 04.03.13 at 13:42, George Dunlap >> <George.Dunlap@eu.citrix.com> >>>> wrote: >>>>> On Fri, Mar 1, 2013 at 8:49 PM, <suravee.suthikulpanit@amd.com> >>>> wrote: >>>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> >>>>>> >>>>>> Currently, the performance counter registers are saved/restores >>>>>> when the HVM guest switchs VCPUs only if they are running. >>>>>> However, PERF has one check where it writes the MSR and read >> back >>>>>> the value to check if the MSR is working. This has shown to >> fails >>>>>> the check if the VCPU is moved in between rdmsr and wrmsr and >>>>>> resulting in the values are different. >>>>> Many moons ago (circa 2005) when I used performance counters, I >>>> found >>>>> that adding them to the save/restore path added a non-neligible >>>>> overhead -- something like 5% slow-down. Do you have any reason >> to >>>>> believe this is no longer the case? Have you done any benchmarks >>>>> before and after? >>> I was doing some VPMU tracing a couple of weeks ago and by looking >> at >>> trace timestamps I think I saw about 4000 cycles on VPMU save and >>> ~9000 cycles on restore. Don''t remember what it was percentage-wise >> of >>> a whole context switch. >>> >>> This was on Intel. >> That''s a really hefty expense to make all users pay on every context >> switch, on behalf of a random check in a piece of software that only a >> handful of people are going to be actually using. > I believe Linux uses perf infrastructure to implement the watchdog.Hmm -- well if it is the case that adding performance counters to the vcpu context switch path will add a measurable overhead, then we probably don''t want them enabled for typical guests anyway. If people are actually using the performance counters to measure performance, that makes sense; but for watchdogs it seems like Xen should be able to provide something that is useful for a watchdog without the extra overhead of saving and restoring performance counters. Konrad, any thoughts? -George
Konrad Rzeszutek Wilk
2013-Mar-11 14:53 UTC
Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
On Mon, Mar 11, 2013 at 11:11:02AM +0000, George Dunlap wrote:> On 08/03/13 15:11, Boris Ostrovsky wrote: > >----- george.dunlap@eu.citrix.com wrote: > > > >>On 08/03/13 14:50, Boris Ostrovsky wrote: > >>>----- JBeulich@suse.com wrote: > >>> > >>>>>>>On 04.03.13 at 13:42, George Dunlap > >><George.Dunlap@eu.citrix.com> > >>>>wrote: > >>>>>On Fri, Mar 1, 2013 at 8:49 PM, <suravee.suthikulpanit@amd.com> > >>>>wrote: > >>>>>>From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > >>>>>> > >>>>>>Currently, the performance counter registers are saved/restores > >>>>>>when the HVM guest switchs VCPUs only if they are running. > >>>>>>However, PERF has one check where it writes the MSR and read > >>back > >>>>>>the value to check if the MSR is working. This has shown to > >>fails > >>>>>>the check if the VCPU is moved in between rdmsr and wrmsr and > >>>>>>resulting in the values are different. > >>>>>Many moons ago (circa 2005) when I used performance counters, I > >>>>found > >>>>>that adding them to the save/restore path added a non-neligible > >>>>>overhead -- something like 5% slow-down. Do you have any reason > >>to > >>>>>believe this is no longer the case? Have you done any benchmarks > >>>>>before and after? > >>>I was doing some VPMU tracing a couple of weeks ago and by looking > >>at > >>>trace timestamps I think I saw about 4000 cycles on VPMU save and > >>>~9000 cycles on restore. Don''t remember what it was percentage-wise > >>of > >>>a whole context switch. > >>> > >>>This was on Intel. > >>That''s a really hefty expense to make all users pay on every context > >>switch, on behalf of a random check in a piece of software that only a > >>handful of people are going to be actually using. > >I believe Linux uses perf infrastructure to implement the watchdog.And by default it won''t work as for Intel you need these flags: cpuid=[''0xa:eax=0x07300403,ebx=0x00000004,ecx=0x00000000,edx=0x00000603'' ] What we get right now when booting PVHVM under Intel is: [ 0.160989] Performance Events: unsupported p6 CPU model 45 no PMU driver, software events only. [ 0.168098] NMI watchdog disabled (cpu0): hardware events not enabled Unless said above CPUID flag is provided.> > Hmm -- well if it is the case that adding performance counters to > the vcpu context switch path will add a measurable overhead, then we > probably don''t want them enabled for typical guests anyway. If > people are actually using the performance counters to measure > performance, that makes sense; but for watchdogs it seems like Xen > should be able to provide something that is useful for a watchdog > without the extra overhead of saving and restoring performance > counters. > > Konrad, any thoughts?The other thing is that there is an Xen watchdog. The one that Jan Beulich wrote which should also work under PVHVM: drivers/watchdog/xen_wdt.c> > -George
George Dunlap
2013-Mar-11 14:59 UTC
Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
On 11/03/13 14:53, Konrad Rzeszutek Wilk wrote:> On Mon, Mar 11, 2013 at 11:11:02AM +0000, George Dunlap wrote: >> On 08/03/13 15:11, Boris Ostrovsky wrote: >>> ----- george.dunlap@eu.citrix.com wrote: >>> >>>> On 08/03/13 14:50, Boris Ostrovsky wrote: >>>>> ----- JBeulich@suse.com wrote: >>>>> >>>>>>>>> On 04.03.13 at 13:42, George Dunlap >>>> <George.Dunlap@eu.citrix.com> >>>>>> wrote: >>>>>>> On Fri, Mar 1, 2013 at 8:49 PM, <suravee.suthikulpanit@amd.com> >>>>>> wrote: >>>>>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> >>>>>>>> >>>>>>>> Currently, the performance counter registers are saved/restores >>>>>>>> when the HVM guest switchs VCPUs only if they are running. >>>>>>>> However, PERF has one check where it writes the MSR and read >>>> back >>>>>>>> the value to check if the MSR is working. This has shown to >>>> fails >>>>>>>> the check if the VCPU is moved in between rdmsr and wrmsr and >>>>>>>> resulting in the values are different. >>>>>>> Many moons ago (circa 2005) when I used performance counters, I >>>>>> found >>>>>>> that adding them to the save/restore path added a non-neligible >>>>>>> overhead -- something like 5% slow-down. Do you have any reason >>>> to >>>>>>> believe this is no longer the case? Have you done any benchmarks >>>>>>> before and after? >>>>> I was doing some VPMU tracing a couple of weeks ago and by looking >>>> at >>>>> trace timestamps I think I saw about 4000 cycles on VPMU save and >>>>> ~9000 cycles on restore. Don''t remember what it was percentage-wise >>>> of >>>>> a whole context switch. >>>>> >>>>> This was on Intel. >>>> That''s a really hefty expense to make all users pay on every context >>>> switch, on behalf of a random check in a piece of software that only a >>>> handful of people are going to be actually using. >>> I believe Linux uses perf infrastructure to implement the watchdog. > And by default it won''t work as for Intel you need these flags: > > cpuid=[''0xa:eax=0x07300403,ebx=0x00000004,ecx=0x00000000,edx=0x00000603'' ] > > What we get right now when booting PVHVM under Intel is: > > [ 0.160989] Performance Events: unsupported p6 CPU model 45 no PMU driver, software events only. > [ 0.168098] NMI watchdog disabled (cpu0): hardware events not enabled > > Unless said above CPUID flag is provided. >> Hmm -- well if it is the case that adding performance counters to >> the vcpu context switch path will add a measurable overhead, then we >> probably don''t want them enabled for typical guests anyway. If >> people are actually using the performance counters to measure >> performance, that makes sense; but for watchdogs it seems like Xen >> should be able to provide something that is useful for a watchdog >> without the extra overhead of saving and restoring performance >> counters. >> >> Konrad, any thoughts? > The other thing is that there is an Xen watchdog. The one that Jan Beulich > wrote which should also work under PVHVM: > > drivers/watchdog/xen_wdt.cBut my main question is: If the Linux perf system successfully detects a vpmu, will it use the Xen watchdog, or will it try to use the vpmu? Do we need to do anything to make sure that when running under Xen, Linux will *not* try to use the vpmu for the watchdog? -George
Boris Ostrovsky
2013-Mar-11 15:54 UTC
Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
On 03/11/2013 10:59 AM, George Dunlap wrote:> On 11/03/13 14:53, Konrad Rzeszutek Wilk wrote: >> On Mon, Mar 11, 2013 at 11:11:02AM +0000, George Dunlap wrote: >>> On 08/03/13 15:11, Boris Ostrovsky wrote: >>>> ----- george.dunlap@eu.citrix.com wrote: >>>> >>>>> On 08/03/13 14:50, Boris Ostrovsky wrote: >>>>>> ----- JBeulich@suse.com wrote: >>>>>> >>>>>>>>>> On 04.03.13 at 13:42, George Dunlap >>>>> <George.Dunlap@eu.citrix.com> >>>>>>> wrote: >>>>>>>> On Fri, Mar 1, 2013 at 8:49 PM, <suravee.suthikulpanit@amd.com> >>>>>>> wrote: >>>>>>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> >>>>>>>>> >>>>>>>>> Currently, the performance counter registers are saved/restores >>>>>>>>> when the HVM guest switchs VCPUs only if they are running. >>>>>>>>> However, PERF has one check where it writes the MSR and read >>>>> back >>>>>>>>> the value to check if the MSR is working. This has shown to >>>>> fails >>>>>>>>> the check if the VCPU is moved in between rdmsr and wrmsr and >>>>>>>>> resulting in the values are different. >>>>>>>> Many moons ago (circa 2005) when I used performance counters, I >>>>>>> found >>>>>>>> that adding them to the save/restore path added a non-neligible >>>>>>>> overhead -- something like 5% slow-down. Do you have any reason >>>>> to >>>>>>>> believe this is no longer the case? Have you done any benchmarks >>>>>>>> before and after? >>>>>> I was doing some VPMU tracing a couple of weeks ago and by looking >>>>> at >>>>>> trace timestamps I think I saw about 4000 cycles on VPMU save and >>>>>> ~9000 cycles on restore. Don''t remember what it was percentage-wise >>>>> of >>>>>> a whole context switch. >>>>>> >>>>>> This was on Intel. >>>>> That''s a really hefty expense to make all users pay on every context >>>>> switch, on behalf of a random check in a piece of software that >>>>> only a >>>>> handful of people are going to be actually using. >>>> I believe Linux uses perf infrastructure to implement the watchdog. >> And by default it won''t work as for Intel you need these flags: >> >> cpuid=[''0xa:eax=0x07300403,ebx=0x00000004,ecx=0x00000000,edx=0x00000603'' >> ] >> >> What we get right now when booting PVHVM under Intel is: >> >> [ 0.160989] Performance Events: unsupported p6 CPU model 45 no PMU >> driver, software events only. >> [ 0.168098] NMI watchdog disabled (cpu0): hardware events not enabled >> >> Unless said above CPUID flag is provided. >>> Hmm -- well if it is the case that adding performance counters to >>> the vcpu context switch path will add a measurable overhead, then we >>> probably don''t want them enabled for typical guests anyway. If >>> people are actually using the performance counters to measure >>> performance, that makes sense; but for watchdogs it seems like Xen >>> should be able to provide something that is useful for a watchdog >>> without the extra overhead of saving and restoring performance >>> counters. >>> >>> Konrad, any thoughts? >> The other thing is that there is an Xen watchdog. The one that Jan >> Beulich >> wrote which should also work under PVHVM: >> >> drivers/watchdog/xen_wdt.c > > But my main question is: If the Linux perf system successfully detects > a vpmu, will it use the Xen watchdog, or will it try to use the vpmu? > Do we need to do anything to make sure that when running under Xen, > Linux will *not* try to use the vpmu for the watchdog?It looks to me that both watchdogs are running. Perf''s counter 0 (which is what watchdog uses) is definitely enabled. -boris
Jan Beulich
2013-Mar-11 16:03 UTC
Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
>>> On 11.03.13 at 15:53, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> wrote: > On Mon, Mar 11, 2013 at 11:11:02AM +0000, George Dunlap wrote: >> Konrad, any thoughts? > > The other thing is that there is an Xen watchdog. The one that Jan Beulich > wrote which should also work under PVHVM: > > drivers/watchdog/xen_wdt.cBut that''s a different kind of watchdog. The perf one is to check CPUs are alive, whereas this driver is a system (VM) wide one. Jan
Dietmar Hahn
2013-Mar-12 08:18 UTC
Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
Am Montag 11 März 2013, 10:53:49 schrieb Konrad Rzeszutek Wilk:> On Mon, Mar 11, 2013 at 11:11:02AM +0000, George Dunlap wrote: > > On 08/03/13 15:11, Boris Ostrovsky wrote: > > >----- george.dunlap@eu.citrix.com wrote: > > > > > >>On 08/03/13 14:50, Boris Ostrovsky wrote: > > >>>----- JBeulich@suse.com wrote: > > >>> > > >>>>>>>On 04.03.13 at 13:42, George Dunlap > > >><George.Dunlap@eu.citrix.com> > > >>>>wrote: > > >>>>>On Fri, Mar 1, 2013 at 8:49 PM, <suravee.suthikulpanit@amd.com> > > >>>>wrote: > > >>>>>>From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > > >>>>>> > > >>>>>>Currently, the performance counter registers are saved/restores > > >>>>>>when the HVM guest switchs VCPUs only if they are running. > > >>>>>>However, PERF has one check where it writes the MSR and read > > >>back > > >>>>>>the value to check if the MSR is working. This has shown to > > >>fails > > >>>>>>the check if the VCPU is moved in between rdmsr and wrmsr and > > >>>>>>resulting in the values are different. > > >>>>>Many moons ago (circa 2005) when I used performance counters, I > > >>>>found > > >>>>>that adding them to the save/restore path added a non-neligible > > >>>>>overhead -- something like 5% slow-down. Do you have any reason > > >>to > > >>>>>believe this is no longer the case? Have you done any benchmarks > > >>>>>before and after? > > >>>I was doing some VPMU tracing a couple of weeks ago and by looking > > >>at > > >>>trace timestamps I think I saw about 4000 cycles on VPMU save and > > >>>~9000 cycles on restore. Don't remember what it was percentage-wise > > >>of > > >>>a whole context switch. > > >>> > > >>>This was on Intel. > > >>That's a really hefty expense to make all users pay on every context > > >>switch, on behalf of a random check in a piece of software that only a > > >>handful of people are going to be actually using. > > >I believe Linux uses perf infrastructure to implement the watchdog. > > And by default it won't work as for Intel you need these flags: > > cpuid=['0xa:eax=0x07300403,ebx=0x00000004,ecx=0x00000000,edx=0x00000603' ]This cpuid config variable should not be needed if your cpu is supported in vmx_vpmu_initialise() where you added a lot of processors with your patch. If not supported and you should see a message in the xen logs.> > What we get right now when booting PVHVM under Intel is: > > [ 0.160989] Performance Events: unsupported p6 CPU model 45 no PMU driver, software events only. > [ 0.168098] NMI watchdog disabled (cpu0): hardware events not enabledDid you add vpmu to the xen boot parameter list? I installed opensuse-12.2 as a HVM guest with xen-unstable running and the kernel log says: Mar 7 15:06:18 linux kernel: [ 0.183217] CPU0: Intel(R) Core(TM)2 Duo CPU P8800 @ 2.66GHz stepping 0a Mar 7 15:06:18 linux kernel: [ 0.183980] Performance Events: 4-deep LBR, Core2 events, Intel PMU driver. Mar 7 15:06:18 linux kernel: [ 0.189994] ... version: 2 Mar 7 15:06:18 linux kernel: [ 0.189997] ... bit width: 40 Mar 7 15:06:18 linux kernel: [ 0.190000] ... generic registers: 2 Mar 7 15:06:18 linux kernel: [ 0.190002] ... value mask: 000000ffffffffff Mar 7 15:06:18 linux kernel: [ 0.190005] ... max period: 000000007fffffff Mar 7 15:06:18 linux kernel: [ 0.190008] ... fixed-purpose events: 3 Mar 7 15:06:18 linux kernel: [ 0.190011] ... event mask: 0000000700000003 Mar 7 15:06:18 linux kernel: [ 0.198203] NMI watchdog: enabled, takes one hw-pmu counter. When I call perf: # perf stat ls acpid cups kdm.log mail.err news wtmp zypper.log alternatives.log faillog krb5 mail.info ntp Xorg.0.log boot.log firewall lastlog mail.warn pm-powersave.log Xorg.0.log.old btmp hp localmessages messages samba YaST2 ConsoleKit journal mail NetworkManager warn zypp Performance counter stats for 'ls': 7.840869 task-clock # 0.590 CPUs utilized 59 context-switches # 0.008 M/sec 0 CPU-migrations # 0.000 K/sec 304 page-faults # 0.039 M/sec 6,583,834 cycles # 0.840 GHz [40.38%] <not supported> stalled-cycles-frontend <not supported> stalled-cycles-backend 2,168,931 instructions # 0.33 insns per cycle [73.20%] 525,628 branches # 67.037 M/sec [79.06%] 27,138 branch-misses # 5.16% of all branches [83.55%] 0.013283672 seconds time elapsed As you can see performance counters are working for instructions, branches and branch-misses. When I call this command in the dom0 it's a bit different: # perf stat ls acpid journal messages wpa_supplicant.log alternatives.log kdm.log NetworkManager wtmp boot.log krb5 news xen btmp lastlog ntp Xorg.0.log ConsoleKit localmessages pk_backend_zypp Xorg.0.log.old cups mail pk_backend_zypp-1 YaST2 faillog mail.err pm-powersave.log zypp firewall mail.info samba zypper.log hp mail.warn warn zypper.log-20130307.xz Performance counter stats for 'ls': 6.959326 task-clock # 0.714 CPUs utilized 11 context-switches # 0.002 M/sec 0 CPU-migrations # 0.000 K/sec 304 page-faults # 0.044 M/sec <not supported> cycles <not supported> stalled-cycles-frontend <not supported> stalled-cycles-backend <not supported> instructions <not supported> branches <not supported> branch-misses 0.009746152 seconds time elapsed This is because the hardware events are not supported in PV. Dietmar.> Unless said above CPUID flag is provided. > > > > Hmm -- well if it is the case that adding performance counters to > > the vcpu context switch path will add a measurable overhead, then we > > probably don't want them enabled for typical guests anyway. If > > people are actually using the performance counters to measure > > performance, that makes sense; but for watchdogs it seems like Xen > > should be able to provide something that is useful for a watchdog > > without the extra overhead of saving and restoring performance > > counters. > > > > Konrad, any thoughts? > > The other thing is that there is an Xen watchdog. The one that Jan Beulich > wrote which should also work under PVHVM: > > drivers/watchdog/xen_wdt.c > > > > > > -George-- Company details: http://ts.fujitsu.com/imprint.html _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Konrad Rzeszutek Wilk
2013-Mar-12 15:12 UTC
Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
> > > >>>This was on Intel. > > > >>That''s a really hefty expense to make all users pay on every context > > > >>switch, on behalf of a random check in a piece of software that only a > > > >>handful of people are going to be actually using. > > > >I believe Linux uses perf infrastructure to implement the watchdog.And as mentioned here, this never gets enabled by default b/c to use the perf infrastructure you need ''vpmu=1'' on the bootup line.> > > > And by default it won''t work as for Intel you need these flags: > > > > cpuid=[''0xa:eax=0x07300403,ebx=0x00000004,ecx=0x00000000,edx=0x00000603'' ] > > This cpuid config variable should not be needed if your cpu is supported in > vmx_vpmu_initialise() where you added a lot of processors with your patch. > If not supported and you should see a message in the xen logs.Found out that is b/c I was using ''xend''. With that there are some CPUID flags cleared while ''xl'' does not do it.> > > > > What we get right now when booting PVHVM under Intel is: > > > > [ 0.160989] Performance Events: unsupported p6 CPU model 45 no PMU driver, software events only. > > [ 0.168098] NMI watchdog disabled (cpu0): hardware events not enabled > > Did you add vpmu to the xen boot parameter list?Yes.> > I installed opensuse-12.2 as a HVM guest with xen-unstable running and the kernel > log says:With Xen 4.3 and using ''xl'' I get it working too. Albeit on that particular box (model 45) I am hitting some weird crashes while on on a lesser SandyBridge (model 44) it works OK. Anyhow that is a different thread.> > Mar 7 15:06:18 linux kernel: [ 0.183217] CPU0: Intel(R) Core(TM)2 Duo CPU P8800 @ 2.66GHz stepping 0a > Mar 7 15:06:18 linux kernel: [ 0.183980] Performance Events: 4-deep LBR, Core2 events, Intel PMU driver. > Mar 7 15:06:18 linux kernel: [ 0.189994] ... version: 2 > Mar 7 15:06:18 linux kernel: [ 0.189997] ... bit width: 40 > Mar 7 15:06:18 linux kernel: [ 0.190000] ... generic registers: 2 > Mar 7 15:06:18 linux kernel: [ 0.190002] ... value mask: 000000ffffffffff > Mar 7 15:06:18 linux kernel: [ 0.190005] ... max period: 000000007fffffff > Mar 7 15:06:18 linux kernel: [ 0.190008] ... fixed-purpose events: 3 > Mar 7 15:06:18 linux kernel: [ 0.190011] ... event mask: 0000000700000003 > Mar 7 15:06:18 linux kernel: [ 0.198203] NMI watchdog: enabled, takes one hw-pmu counter. > > When I call perf: > > # perf stat ls > acpid cups kdm.log mail.err news wtmp zypper.log > alternatives.log faillog krb5 mail.info ntp Xorg.0.log > boot.log firewall lastlog mail.warn pm-powersave.log Xorg.0.log.old > btmp hp localmessages messages samba YaST2 > ConsoleKit journal mail NetworkManager warn zypp > > Performance counter stats for ''ls'': > > 7.840869 task-clock # 0.590 CPUs utilized > 59 context-switches # 0.008 M/sec > 0 CPU-migrations # 0.000 K/sec > 304 page-faults # 0.039 M/sec > 6,583,834 cycles # 0.840 GHz [40.38%] > <not supported> stalled-cycles-frontend > <not supported> stalled-cycles-backend > 2,168,931 instructions # 0.33 insns per cycle [73.20%] > 525,628 branches # 67.037 M/sec [79.06%] > 27,138 branch-misses # 5.16% of all branches [83.55%] > > 0.013283672 seconds time elapsed > > As you can see performance counters are working for instructions, branches > and branch-misses. > > When I call this command in the dom0 it''s a bit different: > > # perf stat ls > acpid journal messages wpa_supplicant.log > alternatives.log kdm.log NetworkManager wtmp > boot.log krb5 news xen > btmp lastlog ntp Xorg.0.log > ConsoleKit localmessages pk_backend_zypp Xorg.0.log.old > cups mail pk_backend_zypp-1 YaST2 > faillog mail.err pm-powersave.log zypp > firewall mail.info samba zypper.log > hp mail.warn warn zypper.log-20130307.xz > > Performance counter stats for ''ls'': > > 6.959326 task-clock # 0.714 CPUs utilized > 11 context-switches # 0.002 M/sec > 0 CPU-migrations # 0.000 K/sec > 304 page-faults # 0.044 M/sec > <not supported> cycles > <not supported> stalled-cycles-frontend > <not supported> stalled-cycles-backend > <not supported> instructions > <not supported> branches > <not supported> branch-misses > > 0.009746152 seconds time elapsed > > This is because the hardware events are not supported in PV.Right.