Dan Magenheimer
2009-Sep-27 19:22 UTC
[Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
(I''m still struggling against the coils of python on the tools part of this patch, so thought I''d post the hypervisor side separately first.) Switches rdtsc emulation from boot option to per-domain and enables it by default. Also removes hvm tsc scaling as it is no longer necessary. Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com> diff -r 72d130772f36 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Wed Sep 16 09:30:41 2009 +0100 +++ b/xen/arch/x86/domain.c Sun Sep 27 13:08:12 2009 -0600 @@ -494,6 +494,10 @@ int arch_domain_create(struct domain *d, mce_init_msr(d); } + if ( d->domain_id == 0 ) + d->arch.tsc_native = 1; + spin_lock_init(&d->arch.vtsc_lock); + if ( is_hvm_domain(d) ) { if ( (rc = hvm_domain_initialise(d)) != 0 ) @@ -503,11 +507,9 @@ int arch_domain_create(struct domain *d, } } else - { /* 32-bit PV guest by default only if Xen is not 64-bit. */ d->arch.is_32bit_pv = d->arch.has_32bit_shinfo (CONFIG_PAGING_LEVELS != 4); - } memset(d->arch.cpuids, 0, sizeof(d->arch.cpuids)); for ( i = 0; i < MAX_CPUID_INPUT; i++ ) @@ -515,10 +517,6 @@ int arch_domain_create(struct domain *d, d->arch.cpuids[i].input[0] = XEN_CPUID_INPUT_UNUSED; d->arch.cpuids[i].input[1] = XEN_CPUID_INPUT_UNUSED; } - - /* For now, per-domain SoftTSC status is taken from global boot param. */ - d->arch.vtsc = opt_softtsc; - spin_lock_init(&d->arch.vtsc_lock); return 0; diff -r 72d130772f36 xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Wed Sep 16 09:30:41 2009 +0100 +++ b/xen/arch/x86/domctl.c Sun Sep 27 13:08:12 2009 -0600 @@ -1060,6 +1060,20 @@ long arch_do_domctl( } break; + case XEN_DOMCTL_set_tsc_native: + { + struct domain *d; + + ret = -ESRCH; + d = rcu_lock_domain_by_id(domctl->domain); + if ( d == NULL ) + break; + d->arch.tsc_native = 1; + rcu_unlock_domain(d); + ret = 0; + } + break; + case XEN_DOMCTL_suppress_spurious_page_faults: { struct domain *d; diff -r 72d130772f36 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Wed Sep 16 09:30:41 2009 +0100 +++ b/xen/arch/x86/hvm/hvm.c Sun Sep 27 13:08:12 2009 -0600 @@ -146,63 +146,39 @@ void hvm_enable_rdtsc_exiting(struct dom { struct vcpu *v; - if ( opt_softtsc || !hvm_funcs.enable_rdtsc_exiting ) + if ( !hvm_funcs.enable_rdtsc_exiting ) return; for_each_vcpu ( d, v ) hvm_funcs.enable_rdtsc_exiting(v); } -int hvm_gtsc_need_scale(struct domain *d) -{ - uint32_t gtsc_mhz, htsc_mhz; - - gtsc_mhz = d->arch.hvm_domain.gtsc_khz / 1000; - htsc_mhz = opt_softtsc ? 1000 : ((uint32_t)cpu_khz / 1000); - - d->arch.hvm_domain.tsc_scaled = (gtsc_mhz && (gtsc_mhz != htsc_mhz)); - return d->arch.hvm_domain.tsc_scaled; -} - -static u64 hvm_h2g_scale_tsc(struct vcpu *v, u64 host_tsc) -{ - uint32_t gtsc_khz, htsc_khz; - - if ( !v->domain->arch.hvm_domain.tsc_scaled ) - return host_tsc; - - htsc_khz = opt_softtsc ? 1000000 : cpu_khz; - gtsc_khz = v->domain->arch.hvm_domain.gtsc_khz; - return muldiv64(host_tsc, gtsc_khz, htsc_khz); -} - void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc) { - uint64_t host_tsc, scaled_htsc; + uint64_t host_tsc; - if ( opt_softtsc ) + if ( v->domain->arch.tsc_native ) + rdtscll(host_tsc); + else host_tsc = hvm_get_guest_time(v); - else - rdtscll(host_tsc); - scaled_htsc = hvm_h2g_scale_tsc(v, host_tsc); - - v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - scaled_htsc; + v->arch.hvm_vcpu.cache_tsc_offset = guest_tsc - host_tsc; hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset); } u64 hvm_get_guest_tsc(struct vcpu *v) { - uint64_t host_tsc, scaled_htsc; + uint64_t tsc; - if ( opt_softtsc ) - host_tsc = hvm_get_guest_time(v); + if ( v->domain->arch.tsc_native ) + rdtscll(tsc); else - rdtscll(host_tsc); + { + v->domain->arch.vtsc_kerncount++; + tsc = hvm_get_guest_time(v); + } - scaled_htsc = hvm_h2g_scale_tsc(v, host_tsc); - - return scaled_htsc + v->arch.hvm_vcpu.cache_tsc_offset; + return tsc + v->arch.hvm_vcpu.cache_tsc_offset; } void hvm_migrate_timers(struct vcpu *v) diff -r 72d130772f36 xen/arch/x86/hvm/save.c --- a/xen/arch/x86/hvm/save.c Wed Sep 16 09:30:41 2009 +0100 +++ b/xen/arch/x86/hvm/save.c Sun Sep 27 13:08:12 2009 -0600 @@ -31,9 +31,6 @@ void arch_hvm_save(struct domain *d, str /* Save some CPUID bits */ cpuid(1, &eax, &ebx, &ecx, &edx); hdr->cpuid = eax; - - /* Save guest''s preferred TSC. */ - hdr->gtsc_khz = d->arch.hvm_domain.gtsc_khz; } int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr) @@ -60,17 +57,8 @@ int arch_hvm_load(struct domain *d, stru gdprintk(XENLOG_WARNING, "HVM restore: saved CPUID (%#"PRIx32") " "does not match host (%#"PRIx32").\n", hdr->cpuid, eax); - /* Restore guest''s preferred TSC frequency. */ - d->arch.hvm_domain.gtsc_khz = hdr->gtsc_khz; - - if ( hdr->gtsc_khz && hvm_gtsc_need_scale(d) ) - { + if ( !d->arch.tsc_native ) hvm_enable_rdtsc_exiting(d); - gdprintk(XENLOG_WARNING, "Domain %d expects freq %uMHz " - "but host''s freq is %luMHz: trap and emulate rdtsc\n", - d->domain_id, hdr->gtsc_khz / 1000, opt_softtsc ? 1000ul : - cpu_khz / 1000); - } /* VGA state is not saved/restored, so we nobble the cache. */ d->arch.hvm_domain.stdvga.cache = 0; diff -r 72d130772f36 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Wed Sep 16 09:30:41 2009 +0100 +++ b/xen/arch/x86/hvm/svm/vmcb.c Sun Sep 27 13:08:12 2009 -0600 @@ -167,7 +167,7 @@ static int construct_vmcb(struct vcpu *v /* TSC. */ vmcb->tsc_offset = 0; - if ( opt_softtsc ) + if ( !v->domain->arch.tsc_native ) vmcb->general1_intercepts |= GENERAL1_INTERCEPT_RDTSC; /* Guest EFER. */ diff -r 72d130772f36 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Wed Sep 16 09:30:41 2009 +0100 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Sun Sep 27 13:08:12 2009 -0600 @@ -133,7 +133,7 @@ static void vmx_init_vmcs_config(void) CPU_BASED_MOV_DR_EXITING | CPU_BASED_ACTIVATE_IO_BITMAP | CPU_BASED_USE_TSC_OFFSETING | - (opt_softtsc ? CPU_BASED_RDTSC_EXITING : 0)); + CPU_BASED_RDTSC_EXITING ); opt = (CPU_BASED_ACTIVATE_MSR_BITMAP | CPU_BASED_TPR_SHADOW | CPU_BASED_MONITOR_TRAP_FLAG | diff -r 72d130772f36 xen/arch/x86/hvm/vpt.c --- a/xen/arch/x86/hvm/vpt.c Wed Sep 16 09:30:41 2009 +0100 +++ b/xen/arch/x86/hvm/vpt.c Sun Sep 27 13:08:12 2009 -0600 @@ -32,9 +32,6 @@ void hvm_init_guest_time(struct domain * spin_lock_init(&pl->pl_time_lock); pl->stime_offset = -(u64)get_s_time(); pl->last_guest_time = 0; - - d->arch.hvm_domain.gtsc_khz = opt_softtsc ? 1000000 : cpu_khz; - d->arch.hvm_domain.tsc_scaled = 0; } u64 hvm_get_guest_time(struct vcpu *v) diff -r 72d130772f36 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Wed Sep 16 09:30:41 2009 +0100 +++ b/xen/arch/x86/time.c Sun Sep 27 13:08:12 2009 -0600 @@ -35,9 +35,6 @@ /* opt_clocksource: Force clocksource to one of: pit, hpet, cyclone, acpi. */ static char __initdata opt_clocksource[10]; string_param("clocksource", opt_clocksource); - -int opt_softtsc; -boolean_param("softtsc", opt_softtsc); /* * opt_consistent_tscs: All TSCs tick at the exact same rate, allowing @@ -1438,20 +1435,18 @@ struct tm wallclock_time(void) * PV SoftTSC Emulation. */ -static unsigned long rdtsc_kerncount, rdtsc_usercount; - void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs) { s_time_t now; if ( guest_kernel_mode(v, regs) ) { - rdtsc_kerncount++; + v->domain->arch.vtsc_kerncount++; rdtsc(regs->eax, regs->edx); } else { - rdtsc_usercount++; + v->domain->arch.vtsc_kerncount++; spin_lock(&v->domain->arch.vtsc_lock); now = get_s_time() + v->domain->arch.vtsc_stime_offset; if ( (int64_t)(now - v->domain->arch.vtsc_last) > 0 ) @@ -1464,10 +1459,27 @@ void pv_soft_rdtsc(struct vcpu *v, struc } } +/* vtsc may incur measurable performance degradation, diagnose with this */ static void dump_softtsc(unsigned char key) { - printk("softtsc count: %lu kernel, %lu user\n", - rdtsc_kerncount, rdtsc_usercount); + struct domain *d; + int domcnt = 0; + + for_each_domain ( d ) + { + if ( !d->arch.tsc_native ) + { + if ( is_hvm_domain(d) ) + printk("dom%u (hvm) vtsc count: %"PRIu64" total\n", + d->domain_id, d->arch.vtsc_kerncount); + else + printk("dom%u vtsc count: %"PRIu64" kernel, %"PRIu64" user\n", + d->domain_id, d->arch.vtsc_kerncount, d->arch.vtsc_usercount); + domcnt++; + } + } + if ( !domcnt ) + printk("all domains have tsc_native enabled\n"); } static struct keyhandler dump_softtsc_keyhandler = { @@ -1478,8 +1490,7 @@ static struct keyhandler dump_softtsc_ke static int __init setup_dump_softtsc(void) { - if ( opt_softtsc ) - register_keyhandler(''s'', &dump_softtsc_keyhandler); + register_keyhandler(''s'', &dump_softtsc_keyhandler); return 0; } __initcall(setup_dump_softtsc); diff -r 72d130772f36 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Wed Sep 16 09:30:41 2009 +0100 +++ b/xen/arch/x86/traps.c Sun Sep 27 13:08:12 2009 -0600 @@ -2010,7 +2010,7 @@ static int emulate_privileged_op(struct */ opcode = insn_fetch(u8, code_base, eip, code_limit); if ( !guest_kernel_mode(v, regs) && - !((opcode == 0x31) && v->domain->arch.vtsc) ) + !((opcode == 0x31) && !v->domain->arch.tsc_native) ) goto fail; if ( lock && (opcode & ~3) != 0x20 ) diff -r 72d130772f36 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Wed Sep 16 09:30:41 2009 +0100 +++ b/xen/include/asm-x86/domain.h Sun Sep 27 13:08:12 2009 -0600 @@ -301,9 +301,11 @@ struct arch_domain struct domain_mca_msrs vmca_msrs; /* SoftTSC emulation */ - bool_t vtsc; + bool_t tsc_native; /* true means vtsc is disabled */ s_time_t vtsc_last; spinlock_t vtsc_lock; + uint64_t vtsc_kerncount; /* for hvm, counts all vtsc */ + uint64_t vtsc_usercount; /* not used for hvm */ int64_t vtsc_stime_offset; } __cacheline_aligned; @@ -435,7 +437,7 @@ unsigned long pv_guest_cr4_fixup(unsigne #define pv_guest_cr4_to_real_cr4(v) \ (((v)->arch.guest_context.ctrlreg[4] \ | (mmu_cr4_features & (X86_CR4_PGE | X86_CR4_PSE)) \ - | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \ + | ((v)->domain->arch.tsc_native ? 0: X86_CR4_TSD)) \ & ~X86_CR4_DE) #define real_cr4_to_pv_guest_cr4(c) \ ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD)) diff -r 72d130772f36 xen/include/asm-x86/hvm/domain.h --- a/xen/include/asm-x86/hvm/domain.h Wed Sep 16 09:30:41 2009 +0100 +++ b/xen/include/asm-x86/hvm/domain.h Sun Sep 27 13:08:12 2009 -0600 @@ -45,8 +45,6 @@ struct hvm_domain { struct hvm_ioreq_page ioreq; struct hvm_ioreq_page buf_ioreq; - uint32_t gtsc_khz; /* kHz */ - bool_t tsc_scaled; struct pl_time pl_time; struct hvm_io_handler io_handler; diff -r 72d130772f36 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Wed Sep 16 09:30:41 2009 +0100 +++ b/xen/include/asm-x86/hvm/hvm.h Sun Sep 27 13:08:12 2009 -0600 @@ -286,7 +286,6 @@ uint8_t hvm_combine_hw_exceptions(uint8_ uint8_t hvm_combine_hw_exceptions(uint8_t vec1, uint8_t vec2); void hvm_enable_rdtsc_exiting(struct domain *d); -int hvm_gtsc_need_scale(struct domain *d); static inline int hvm_cpu_up(void) { diff -r 72d130772f36 xen/include/asm-x86/time.h --- a/xen/include/asm-x86/time.h Wed Sep 16 09:30:41 2009 +0100 +++ b/xen/include/asm-x86/time.h Sun Sep 27 13:08:12 2009 -0600 @@ -41,7 +41,6 @@ uint64_t acpi_pm_tick_to_ns(uint64_t tic uint64_t acpi_pm_tick_to_ns(uint64_t ticks); uint64_t ns_to_acpi_pm_tick(uint64_t ns); -extern int opt_softtsc; void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs); #endif /* __X86_TIME_H__ */ diff -r 72d130772f36 xen/include/public/arch-x86/hvm/save.h --- a/xen/include/public/arch-x86/hvm/save.h Wed Sep 16 09:30:41 2009 +0100 +++ b/xen/include/public/arch-x86/hvm/save.h Sun Sep 27 13:08:12 2009 -0600 @@ -38,7 +38,6 @@ struct hvm_save_header { uint32_t version; /* File format version */ uint64_t changeset; /* Version of Xen that saved this file */ uint32_t cpuid; /* CPUID[0x01][%eax] on the saving machine */ - uint32_t gtsc_khz; /* Guest''s TSC frequency in kHz */ }; DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header); diff -r 72d130772f36 xen/include/public/domctl.h --- a/xen/include/public/domctl.h Wed Sep 16 09:30:41 2009 +0100 +++ b/xen/include/public/domctl.h Sun Sep 27 13:08:12 2009 -0600 @@ -400,7 +400,8 @@ struct xen_domctl_settimeoffset { }; typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t); - +/* allow rdtsc to be natively executed (is trapped/emulated by default */ +#define XEN_DOMCTL_set_tsc_native 57 #define XEN_DOMCTL_gethvmcontext 33 #define XEN_DOMCTL_sethvmcontext 34 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Sep-27 21:39 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 27/09/2009 20:22, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:> (I''m still struggling against the coils of python on > the tools part of this patch, so thought I''d post the > hypervisor side separately first.) > > Switches rdtsc emulation from boot option to per-domain > and enables it by default. Also removes hvm tsc scaling > as it is no longer necessary.The hvm tsc scaling will still be useful in the case that trap-&-emulate was originally disabled for an hvm domain. So I''d keep it, and also keep the flag called d->arch.vtsc rather than flipping it and renaming it, and the patch will end up about 20 lines long. I''ll rework this patch myself and check it in at the same time as the tools patch, when you have that ready. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2009-Sep-27 23:19 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
Sorry I should have explained my logic for those changes in the original posting. The scaling is a strange corner case where a domain can invisibly change from native-tsc to virtual-tsc. I started to add a domctl so that the current state could be probed, but decided that the scaling is really a policy decision that is best made at config time and best retained in the VM information carried along with a save file (or live migration). Allowing rdtsc to change dynamically according to whether a save/restore/migration happened to occur and whether the restored VM happened to land on a machine with a different clock rate seems like it could get very confusing. The domain adminstrator either assumes correctness (the default) or turns off emulation to get better performance and deals with ALL the consequences. So IMHO removing the scaling simplifies an already complex-to-explain issue. As for flipping/renaming the flag, I coded the patch using the original vtsc name, but decided that if the hypervisor default is truly going to be tsc_emulation, then tsc_native is the unusual case and the code should reflect that syntactically by requiring tsc_native to be enabled. This is especially true when the equivalent of the flag is managed in the config file: Nearly all config parameters use a default as 0 and setting the parameter causes behavior to change, so setting tsc_native=1 to change from the default tsc behavior seemed more consistent. Let me know your decisions on these as the tools code depends on them. Thanks, Dan> -----Original Message----- > From: Keir Fraser [mailto:keir.fraser@eu.citrix.com] > Sent: Sunday, September 27, 2009 3:39 PM > To: Dan Magenheimer; Xen-Devel (E-mail) > Subject: Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen > boot option with per-domain (hypervisor part) > > > On 27/09/2009 20:22, "Dan Magenheimer" > <dan.magenheimer@oracle.com> wrote: > > > (I''m still struggling against the coils of python on > > the tools part of this patch, so thought I''d post the > > hypervisor side separately first.) > > > > Switches rdtsc emulation from boot option to per-domain > > and enables it by default. Also removes hvm tsc scaling > > as it is no longer necessary. > > The hvm tsc scaling will still be useful in the case that > trap-&-emulate was > originally disabled for an hvm domain. So I''d keep it, and > also keep the > flag called d->arch.vtsc rather than flipping it and renaming > it, and the > patch will end up about 20 lines long. > > I''ll rework this patch myself and check it in at the same > time as the tools > patch, when you have that ready. > > -- Keir > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Sep-28 07:25 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 28/09/2009 00:19, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:> Let me know your decisions on these as the tools code > depends on them.I''ll knock together a patch for both hypervisor and tools this morning. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Zhang, Xiantao
2009-Sep-28 08:22 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
Dan Magenheimer wrote:> Sorry I should have explained my logic for those changes > in the original posting. > > The scaling is a strange corner case where a domain > can invisibly change from native-tsc to virtual-tsc. > I started to add a domctl so that the current state > could be probed, but decided that the scaling is > really a policy decision that is best made at > config time and best retained in the VM information > carried along with a save file (or live migration). > Allowing rdtsc to change dynamically according to > whether a save/restore/migration happened to occur > and whether the restored VM happened to land on > a machine with a different clock rate seems like > it could get very confusing. The domain adminstrator > either assumes correctness (the default) or turns > off emulation to get better performance and deals > with ALL the consequences. So IMHO removing the > scaling simplifies an already complex-to-explain > issue.Hi, Dan I didn''t see the necessity to remove the scaling logic. This logic just allows the domain can run in different TSC-freq platforms without strange timing behaviors. If the domain is migrated between tow machines with different TSC rate, system administrator can get the tsc emulation knowledge from Xen''s system log, and if administrator doesn''t like to suffer performance loss in emulation way, he can migrate the guest back. Besides, I don''t think this logic conflicts with your boot-stage option.IMO, if the boot-stage option is set to use emulation, the scaling policy can be switched off, but if the option is not specified, I think the default scaling policy should be adopted in migration case. Xiantao> >> -----Original Message----- >> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com] >> Sent: Sunday, September 27, 2009 3:39 PM >> To: Dan Magenheimer; Xen-Devel (E-mail) >> Subject: Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native >> xen boot option with per-domain (hypervisor part) >> >> >> On 27/09/2009 20:22, "Dan Magenheimer" >> <dan.magenheimer@oracle.com> wrote: >> >>> (I''m still struggling against the coils of python on >>> the tools part of this patch, so thought I''d post the >>> hypervisor side separately first.) >>> >>> Switches rdtsc emulation from boot option to per-domain >>> and enables it by default. Also removes hvm tsc scaling >>> as it is no longer necessary. >> >> The hvm tsc scaling will still be useful in the case that >> trap-&-emulate was originally disabled for an hvm domain. So I''d >> keep it, and >> also keep the >> flag called d->arch.vtsc rather than flipping it and renaming >> it, and the >> patch will end up about 20 lines long. >> >> I''ll rework this patch myself and check it in at the same >> time as the tools >> patch, when you have that ready. >> >> -- Keir >> >> >> >> _______________________________________________ >> Xen-devel mailing list >> Xen-devel@lists.xensource.com >> http://lists.xensource.com/xen-devel >> > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Sep-28 09:04 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 28/09/2009 08:25, "Keir Fraser" <keir.fraser@eu.citrix.com> wrote:> On 28/09/2009 00:19, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote: > >> Let me know your decisions on these as the tools code >> depends on them. > > I''ll knock together a patch for both hypervisor and tools this morning.Changeset 20257. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2009-Sep-28 15:13 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
> if administrator doesn''t > like to suffer performance loss in emulation way, he can > migrate the guest backOK, that makes sense.> system administrator can get the tsc emulation > knowledge from Xen''s system logGetting information from the log is probably not a good longterm solution though. I think there needs to be a management/tools mechanism to query if a migrated/restored domain has transitioned from tsc-native to tsc-emulated or vice versa. Thanks, Dan> -----Original Message----- > From: Zhang, Xiantao [mailto:xiantao.zhang@intel.com] > Sent: Monday, September 28, 2009 2:23 AM > To: Dan Magenheimer; Keir Fraser; Xen-Devel (E-mail) > Subject: RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen > boot option with per-domain (hypervisor part) > > > Dan Magenheimer wrote: > > Sorry I should have explained my logic for those changes > > in the original posting. > > > > The scaling is a strange corner case where a domain > > can invisibly change from native-tsc to virtual-tsc. > > I started to add a domctl so that the current state > > could be probed, but decided that the scaling is > > really a policy decision that is best made at > > config time and best retained in the VM information > > carried along with a save file (or live migration). > > Allowing rdtsc to change dynamically according to > > whether a save/restore/migration happened to occur > > and whether the restored VM happened to land on > > a machine with a different clock rate seems like > > it could get very confusing. The domain adminstrator > > either assumes correctness (the default) or turns > > off emulation to get better performance and deals > > with ALL the consequences. So IMHO removing the > > scaling simplifies an already complex-to-explain > > issue. > Hi, Dan > I didn''t see the necessity to remove the scaling logic. > This logic just allows the domain can run in different > TSC-freq platforms without strange timing behaviors. If the > domain is migrated between tow machines with different TSC > rate, system administrator can get the tsc emulation > knowledge from Xen''s system log, and if administrator doesn''t > like to suffer performance loss in emulation way, he can > migrate the guest back. Besides, I don''t think this logic > conflicts with your boot-stage option.IMO, if the boot-stage > option is set to use emulation, the scaling policy can be > switched off, but if the option is not specified, I think the > default scaling policy should be adopted in migration case. > Xiantao > > > > >> -----Original Message----- > >> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com] > >> Sent: Sunday, September 27, 2009 3:39 PM > >> To: Dan Magenheimer; Xen-Devel (E-mail) > >> Subject: Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native > >> xen boot option with per-domain (hypervisor part) > >> > >> > >> On 27/09/2009 20:22, "Dan Magenheimer" > >> <dan.magenheimer@oracle.com> wrote: > >> > >>> (I''m still struggling against the coils of python on > >>> the tools part of this patch, so thought I''d post the > >>> hypervisor side separately first.) > >>> > >>> Switches rdtsc emulation from boot option to per-domain > >>> and enables it by default. Also removes hvm tsc scaling > >>> as it is no longer necessary. > >> > >> The hvm tsc scaling will still be useful in the case that > >> trap-&-emulate was originally disabled for an hvm domain. So I''d > >> keep it, and > >> also keep the > >> flag called d->arch.vtsc rather than flipping it and renaming > >> it, and the > >> patch will end up about 20 lines long. > >> > >> I''ll rework this patch myself and check it in at the same > >> time as the tools > >> patch, when you have that ready. > >> > >> -- Keir > >> > >> > >> > >> _______________________________________________ > >> Xen-devel mailing list > >> Xen-devel@lists.xensource.com > >> http://lists.xensource.com/xen-devel > >> > > > > _______________________________________________ > > Xen-devel mailing list > > Xen-devel@lists.xensource.com > > http://lists.xensource.com/xen-devel > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2009-Sep-28 15:48 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
(sorry for the delay... qemu git update takes forever!) Hmmm.... I''m getting an Error 38, Function not implemented, on XendDomainInfo.py line 2429 (xc.domain_set_tsc_native). Got this for xm create for both PV and HVM. Perhaps there''s something strange in my environment or maybe a python version diff (I''m using 2.4.3).> -----Original Message----- > From: Keir Fraser [mailto:keir.fraser@eu.citrix.com] > Sent: Monday, September 28, 2009 3:04 AM > To: Dan Magenheimer; Xen-Devel (E-mail) > Subject: Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen > boot option with per-domain (hypervisor part) > > > On 28/09/2009 08:25, "Keir Fraser" <keir.fraser@eu.citrix.com> wrote: > > > On 28/09/2009 00:19, "Dan Magenheimer" > <dan.magenheimer@oracle.com> wrote: > > > >> Let me know your decisions on these as the tools code > >> depends on them. > > > > I''ll knock together a patch for both hypervisor and tools > this morning. > > Changeset 20257. > > -- Keir > > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Sep-28 16:07 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
Have you really definitely reinstalled the C extension package xc.so? I have tested the changeset and it does work for me. -- Keir On 28/09/2009 16:48, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:> (sorry for the delay... qemu git update takes forever!) > > Hmmm.... I''m getting an Error 38, Function not implemented, > on XendDomainInfo.py line 2429 (xc.domain_set_tsc_native). > Got this for xm create for both PV and HVM. > Perhaps there''s something strange in my environment or > maybe a python version diff (I''m using 2.4.3). > >> -----Original Message----- >> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com] >> Sent: Monday, September 28, 2009 3:04 AM >> To: Dan Magenheimer; Xen-Devel (E-mail) >> Subject: Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen >> boot option with per-domain (hypervisor part) >> >> >> On 28/09/2009 08:25, "Keir Fraser" <keir.fraser@eu.citrix.com> wrote: >> >>> On 28/09/2009 00:19, "Dan Magenheimer" >> <dan.magenheimer@oracle.com> wrote: >>> >>>> Let me know your decisions on these as the tools code >>>> depends on them. >>> >>> I''ll knock together a patch for both hypervisor and tools >> this morning. >> >> Changeset 20257. >> >> -- Keir >> >> >>_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2009-Sep-28 16:24 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
/usr/lib/python2.4/site-packages/lowlevel/xc.so is definitely newly built and this was on a fresh clone. However, I did only a "make install-tools", not a complete "make" so maybe that resulted in something screwy. I''ll try again with another fresh clone and full build.> -----Original Message----- > From: Keir Fraser [mailto:keir.fraser@eu.citrix.com] > Sent: Monday, September 28, 2009 10:08 AM > To: Dan Magenheimer; Xen-Devel (E-mail) > Subject: Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen > boot option with per-domain (hypervisor part) > > > Have you really definitely reinstalled the C extension > package xc.so? I have > tested the changeset and it does work for me. > > -- Keir > > On 28/09/2009 16:48, "Dan Magenheimer" > <dan.magenheimer@oracle.com> wrote: > > > (sorry for the delay... qemu git update takes forever!) > > > > Hmmm.... I''m getting an Error 38, Function not implemented, > > on XendDomainInfo.py line 2429 (xc.domain_set_tsc_native). > > Got this for xm create for both PV and HVM. > > Perhaps there''s something strange in my environment or > > maybe a python version diff (I''m using 2.4.3). > > > >> -----Original Message----- > >> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com] > >> Sent: Monday, September 28, 2009 3:04 AM > >> To: Dan Magenheimer; Xen-Devel (E-mail) > >> Subject: Re: [Xen-devel] [PATCH] replace rdtsc > emulation-vs-native xen > >> boot option with per-domain (hypervisor part) > >> > >> > >> On 28/09/2009 08:25, "Keir Fraser" > <keir.fraser@eu.citrix.com> wrote: > >> > >>> On 28/09/2009 00:19, "Dan Magenheimer" > >> <dan.magenheimer@oracle.com> wrote: > >>> > >>>> Let me know your decisions on these as the tools code > >>>> depends on them. > >>> > >>> I''ll knock together a patch for both hypervisor and tools > >> this morning. > >> > >> Changeset 20257. > >> > >> -- Keir > >> > >> > >> > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2009-Sep-28 19:44 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
Rebuild from scratch didn''t help. Oddly I accidentally ran a 32-bit hypervisor and didn''t see the problem. I''m seeing the problem on a 32-bit dom0 with 64-bit hypervisor. Are you testing on a 64-bit dom0? (e.g. maybe a conpat mode problem?) Continuing to look into it. Dan> -----Original Message----- > From: Dan Magenheimer > Sent: Monday, September 28, 2009 10:24 AM > To: Keir Fraser; Xen-Devel (E-mail) > Subject: RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen > boot option with per-domain (hypervisor part) > > > /usr/lib/python2.4/site-packages/lowlevel/xc.so is > definitely newly built and this was on a fresh clone. > However, I did only a "make install-tools", not a > complete "make" so maybe that resulted in something > screwy. I''ll try again with another fresh clone > and full build. > > > -----Original Message----- > > From: Keir Fraser [mailto:keir.fraser@eu.citrix.com] > > Sent: Monday, September 28, 2009 10:08 AM > > To: Dan Magenheimer; Xen-Devel (E-mail) > > Subject: Re: [Xen-devel] [PATCH] replace rdtsc > emulation-vs-native xen > > boot option with per-domain (hypervisor part) > > > > > > Have you really definitely reinstalled the C extension > > package xc.so? I have > > tested the changeset and it does work for me. > > > > -- Keir > > > > On 28/09/2009 16:48, "Dan Magenheimer" > > <dan.magenheimer@oracle.com> wrote: > > > > > (sorry for the delay... qemu git update takes forever!) > > > > > > Hmmm.... I''m getting an Error 38, Function not implemented, > > > on XendDomainInfo.py line 2429 (xc.domain_set_tsc_native). > > > Got this for xm create for both PV and HVM. > > > Perhaps there''s something strange in my environment or > > > maybe a python version diff (I''m using 2.4.3). > > > > > >> -----Original Message----- > > >> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com] > > >> Sent: Monday, September 28, 2009 3:04 AM > > >> To: Dan Magenheimer; Xen-Devel (E-mail) > > >> Subject: Re: [Xen-devel] [PATCH] replace rdtsc > > emulation-vs-native xen > > >> boot option with per-domain (hypervisor part) > > >> > > >> > > >> On 28/09/2009 08:25, "Keir Fraser" > > <keir.fraser@eu.citrix.com> wrote: > > >> > > >>> On 28/09/2009 00:19, "Dan Magenheimer" > > >> <dan.magenheimer@oracle.com> wrote: > > >>> > > >>>> Let me know your decisions on these as the tools code > > >>>> depends on them. > > >>> > > >>> I''ll knock together a patch for both hypervisor and tools > > >> this morning. > > >> > > >> Changeset 20257. > > >> > > >> -- Keir > > >> > > >> > > >> > > > > > > > > _______________________________________________ > > Xen-devel mailing list > > Xen-devel@lists.xensource.com > > http://lists.xensource.com/xen-devel > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Sep-28 20:55 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 28/09/2009 20:44, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:> Rebuild from scratch didn''t help. Oddly I > accidentally ran a 32-bit hypervisor and didn''t > see the problem. I''m seeing the problem on a > 32-bit dom0 with 64-bit hypervisor. Are you > testing on a 64-bit dom0? (e.g. maybe a conpat > mode problem?)I tested 64-bit dom0 on 64-bit hypervisor. Compat mode problems shouldn''t be an issue since domctls do not have a compat shim. Our automated tests also check 32-bit dom0 on 32-bit Xen and on 64-bit Xen, and they all passed fine too. -- Keir> Continuing to look into it._______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2009-Sep-28 22:24 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 09/27/09 12:22, Dan Magenheimer wrote:> Switches rdtsc emulation from boot option to per-domain > and enables it by default.Is this just on hvm domains, or PV ones too? J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2009-Sep-28 22:43 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
PV ones too. An app doesn''t have any idea (and doesn''t and shouldn''t care) if it is running on a PV or HVM domain.> -----Original Message----- > From: Jeremy Fitzhardinge [mailto:jeremy@goop.org] > Sent: Monday, September 28, 2009 4:24 PM > To: Dan Magenheimer > Cc: Xen-Devel (E-mail) > Subject: Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen > boot option with per-domain (hypervisor part) > > > On 09/27/09 12:22, Dan Magenheimer wrote: > > Switches rdtsc emulation from boot option to per-domain > > and enables it by default. > > Is this just on hvm domains, or PV ones too? > > J >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2009-Sep-28 23:10 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 09/28/09 15:43, Dan Magenheimer wrote:> PV ones too. An app doesn''t have any idea (and doesn''t > and shouldn''t care) if it is running on a PV or HVM domain. >Well, it shouldn''t be enabled by default. That slows down all rdtsc operations for the benefit of very niche applications. The Xen clocksource assumes that rdtsc is fast, unemulated and in need of correction. If someone really needs an artificial tsc, then they can enable the option for themselves. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2009-Sep-28 23:36 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
> On 09/28/09 15:43, Dan Magenheimer wrote: > > PV ones too. An app doesn''t have any idea (and doesn''t > > and shouldn''t care) if it is running on a PV or HVM domain. > > Well, it shouldn''t be enabled by default. That slows down all rdtsc > operations for the benefit of very niche applications. The Xen > clocksource assumes that rdtsc is fast, unemulated and in need of > correction. > > If someone really needs an artificial tsc, then they can enable the > option for themselves.While I understand and sympathize (and the idealistic side of me even agrees with) your position, let me argue your point "they can enable the option for themselves" by mangling a famous movie quote: "In a cloud, no-one can hear rdtsc scream" _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Pratt
2009-Sep-29 00:27 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
> > Well, it shouldn''t be enabled by default. That slows down all rdtsc > > operations for the benefit of very niche applications. The Xen > > clocksource assumes that rdtsc is fast, unemulated and in need of > > correction. > > > > If someone really needs an artificial tsc, then they can enable the > > option for themselves. > > While I understand and sympathize (and the idealistic side of me even > agrees with) your position, let me argue your point "they can enable > the option for themselves" by mangling a famous movie quote: > > "In a cloud, no-one can hear rdtsc scream"If we had some actual bug reports detailing failures of real-world applications then the decision would be easy. I realise it could be hard to pin strange application behaviour down to TSC issues, but I can''t readily spot any bugs on the XenServer or xen.org bug trackers that might even be candidates. Ian _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Zhang, Xiantao
2009-Sep-29 01:42 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
Ian Pratt wrote:>>> Well, it shouldn''t be enabled by default. That slows down all rdtsc >>> operations for the benefit of very niche applications. The Xen >>> clocksource assumes that rdtsc is fast, unemulated and in need of >>> correction. >>> >>> If someone really needs an artificial tsc, then they can enable the >>> option for themselves. >> >> While I understand and sympathize (and the idealistic side of me even >> agrees with) your position, let me argue your point "they can enable >> the option for themselves" by mangling a famous movie quote: >> >> "In a cloud, no-one can hear rdtsc scream" > > If we had some actual bug reports detailing failures of real-world > applications then the decision would be easy. > I realise it could be hard to pin strange application behaviour down > to TSC issues, but I can''t readily spot any bugs on the XenServer or > xen.org bug trackers that might even be candidates.Maybe Dan can give more reasons. I think the TSC skew issue should be thre key one. But IMO, the TSC skew issue maybe not that important. In real-world apps, rdtsc is mostly read by the kernel syscall gettimeofday, and kernel can eliminate the skew before returning to user app through returning the value "plus 1 on last_tsc". Even if apps use "raw rdtsc" instruction to get timestamp in rare cases, I think the issue also can be solved by apps'' ownself through a simple check. Therefore, I still can''t figure out the suasive reasons why adopts emulation way for the tsc read so far. Dan, Any other necessary reasons to introduce it except fixing skew issue between cpus? Xiantao _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2009-Sep-29 03:13 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
> Any other necessary reasons to introduce it except fixing > skew issue between cpus?I think the reasons were discussed at length in a previous thread. To summarize, some applications already do (or can or will) use rdtsc as a legal non-privileged instruction and expect to get the Intel SDM-defined behavior of the instruction. There are some hardware+software environments that do not correctly provide this behavior, including some older SMP machines and early generation AMD desktop multi-core machines. There are many more that DO correctly provide this behavior, including all recent generation Intel machines and nearly all recent generation AMD machines... and, very importantly, any VM running on VMware. Because rdtsc is "unsafe" on some machines does not stop application programmers from using it on machines where it is safe; it will be increasingly likely that an application programmer may never experience an "unsafe" hardware/software environment. We cannot tell app programmers that they cannot use rdtsc, only warn them that it is risky for some older machines. Some will use it anyway and I have personally talked to some that are. We cannot predict or legislate how rdtsc will be used in the future. It is easy to imagine a scenario where a transaction-oriented application timestamps transactions with rdtsc and then, when an infrequent error condition occurs, tries to replay the transactions; this would work fine on any new hardware and on VMware and even on Xen for awhile... but without rdtsc-emulaion could mysteriously fail and cause data corruption, perhaps only after a migration (or two or three) and only when the infrequent error condition occurs after a few migrations. Would you want to be on the support team that tries to diagnose that? VMware does not have this problem. The cost for Xen is some performance. I do not take loss of performance lightly and have spent weeks now looking for a better solution. I have not found one and am open to any creative alternative. But pretending that apps, today and in the future will NOT use rdtsc, or that they will use it only following Xen-prescribed constraints, is just wishful thinking and not appropriate for hardware/software vendors selling to enterprise customers (or selling to cloud providers that expect to provide services for enterprise customers). The patch provided records and reports frequency of rdtsc emulations. If an administrator cares to improve performance and can verify that all apps that are (now or ever will be) running on this VM are using rdtsc safely, a per-domain option can be specified to get the performance back. The opposite is nearly impossible to ascertain. So I believe, for rdtsc, correctness is more important than a small amount of performance. Dan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2009-Sep-29 16:44 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 09/28/09 16:36, Dan Magenheimer wrote:>> Well, it shouldn''t be enabled by default. That slows down all rdtsc >> operations for the benefit of very niche applications. The Xen >> clocksource assumes that rdtsc is fast, unemulated and in need of >> correction. >> >> If someone really needs an artificial tsc, then they can enable the >> option for themselves. >> > While I understand and sympathize (and the idealistic side of me even > agrees with) your position, >Please, no. I think you''ve managed to completely misunderstand everything I''ve said on this subject. The TSC is not, and has never been reliable. At best it has periods of reliability in which its possible to get coherent results, but never for indefinite periods. That includes the recent architecture updates, though they have considerably increased the periods of reliability. This is not an idealistic statement; it is the conclusion from years of attempts to use the tsc as a time source. It does not matter what the documents say, there''s always a creative new way for hardware to break the tsc''s behaviour. There is no requirement for Xen to emulate a hardware feature better than native. In particular, there''s no need for Xen to synthesize the platonic ideal of a TSC when existing hardware platform does not do so. That said, I have no problem with making such emulation an option if it really helps real programs in the field. I wouldn''t even mind having it on by default... Except that it comes with a terrible cost. rdtsc is a very heavily used instruction, because its part of the Xen clock ABI. It gets executed many thousands of times a second, including at least once every context switch. Adding the overhead of an extra synchronous emulated instruction trap into Xen is going to have a very noticeable performance hit. This is a massive regression in everyday, every-system, every-user performance to satisfy the hypothetical requirement of abstract apps which may or may not exist. The fact that you haven''t named a single real app that breaks in the current system and then works with a synthetic TSC further undermines your argument. Are you really arguing on the basis that "some apps might use tsc in a fragile way" or do you actually have a specific list of apps that are really suffering? What are they? How do they break? How many users do they have? What are they doing with the tsc? Why can''t they use some other mechanism? I think a change of this magnitude needs a lot more justification than some handwavy arguments from (dubious) first principles. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2009-Sep-29 17:24 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 09/28/09 20:13, Dan Magenheimer wrote:> So I believe, for rdtsc, correctness is more important > than a small amount of performance. >So run the apps in an HVM domain, where emulating everything is appropriate, and leave PV domains alone. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2009-Sep-29 17:34 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
Hi Jeremy -- I think it''s fair for us both to completely air our opinions, but since you''ve contradicted some of my "hand-wavy" points and since much of your position is based on the contradictions, I feel it''s necessary to respond.> The TSC is not, and has never been reliable.Your data is stale. Please discuss this with processor and system vendors (I have) and look at the latest upstream Linux. I agree that this presumed reliability should be carefully tested. (See my proposed patch posted yesterday.) If you are correct and I am not, your position (that "all apps that use rdtsc are fundamentally broken") is much stronger.> Except that it comes with a terrible cost... > This is a massive regression...It is certainly significant but "terrible" and "massive" are a bit strong. Based on my measurements, the examples you cite will degrade performance by a fraction of a percent. And this loss can be eliminated IF the administrator understands the risks and chooses performance over correctness by specifying an option at guest creation.> The fact that you haven''t named a single real app... > Are you really arguing on the basis that "some apps > might use tsc in a fragile way" or do you actually have a > specific listI have a (small) specific list. For various reasons, I cannot go into further detail. But if TSC is indeed reliable on newer processors/systems (see above) AND on VMware, there will certainly be more. Thanks, Dan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2009-Sep-29 19:01 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 09/29/09 10:34, Dan Magenheimer wrote:>> The TSC is not, and has never been reliable. >> > Your data is stale. Please discuss this with processor > and system vendors (I have)I''m sure they would say that, as they frequently have in the past. And then it breaks again. Even then their guarantee only applies while the processor is powered up and hasn''t been reset. But resets can occur while the system is "running" in the form of S3 suspend events, or even completely powered off when suspending to disk. Besides, the SDM makes no claims about tsc synchronization between CPUs, only that on a given CPU/core is at a constant rate (at least from now on, promise!). At that point you''re relying on motherboard/system design, which has a lot more scope for brokenness than just core CPUs. Large systems simply don''t keep all their CPUs in the same clock domain, and certainly won''t guarantee that for all future system designs.> and look at the latest upstream Linux. >The kernel does what it needs to do to make the tsc usable for itself. It does not make (and has never made) any guarantees about how the tsc appears in usermode (except for the purposes of implementing vgettimeofday). You won''t find many Linux kernel developers who are sympathetic with the idea of making any hard guarantees for bare usermode tsc use.>> Except that it comes with a terrible cost... >> This is a massive regression... >> > It is certainly significant but "terrible" and "massive" > are a bit strong. Based on my measurements, the examples > you cite will degrade performance by a fraction of a percent. >How have you measured this? On what systems? Your patch introduces this regression on all systems for everyone; it isn''t enough to measure it on a new Nehalem machine.>> The fact that you haven''t named a single real app... >> Are you really arguing on the basis that "some apps >> might use tsc in a fragile way" or do you actually have a >> specific list >> > I have a (small) specific list. For various reasons, > I cannot go into further detail. >Well, that goes back to my point about spending a lot of effort on something that can only possibility benefit a (small) set of niche apps. Spending the effort on a vsyscall approach would be fast, correct and widely beneficial. You can default it on within Oracle, or even in Oracle''s Xen distro. It''s unreasonable to make this a global default when you''re trying to solve a local problem. You haven''t established this is something that anyone else need be concerned about. Besides, if they want a global sequence number, why not just keep a global counter? That''s going to be much cheaper and more reliable than anything time-based. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2009-Sep-29 23:45 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
Let me attempt to summarize our disagreement and then I''d like to stop arguing. 1) You think rdtsc is never safe for an app to use. I think it is safe in many hardware/software enviroments and that the number of safe environments will continue to increase. 2) You think the performance hit from rdtsc-emulation is horrible. I think it is significant but relatively small and acceptable and, if there are cases where it is not, administrators or virtual appliance providers can make an informed choice to turn it off. 3) You think app developers can and should be told to not use rdtsc because it is inherently unsafe and so Xen doesn''t need to ever be concerned with making it safe. I think app developers will do what they please, ignorant to the subtleties of rdtsc, and if their app works on their hardware and on VMware but not on Xen, they will blame Xen or Linux or their OS provider or their cloud provider, and probably never know that their app doesn''t work because of rdtsc. Do you agree that those are the key points of disagreement? Thanks, Dan> -----Original Message----- > From: Jeremy Fitzhardinge [mailto:jeremy@goop.org] > Sent: Tuesday, September 29, 2009 1:02 PM > To: Dan Magenheimer > Cc: Xen-Devel (E-mail); Keir Fraser > Subject: Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen > boot option with per-domain (hypervisor part) > > > On 09/29/09 10:34, Dan Magenheimer wrote: > >> The TSC is not, and has never been reliable. > >> > > Your data is stale. Please discuss this with processor > > and system vendors (I have) > > I''m sure they would say that, as they frequently have in the > past. And > then it breaks again. > > Even then their guarantee only applies while the processor is > powered up > and hasn''t been reset. But resets can occur while the system is > "running" in the form of S3 suspend events, or even completely powered > off when suspending to disk. > > Besides, the SDM makes no claims about tsc synchronization > between CPUs, > only that on a given CPU/core is at a constant rate (at least from now > on, promise!). At that point you''re relying on motherboard/system > design, which has a lot more scope for brokenness than just > core CPUs. > Large systems simply don''t keep all their CPUs in the same > clock domain, > and certainly won''t guarantee that for all future system designs. > > > and look at the latest upstream Linux. > > > The kernel does what it needs to do to make the tsc usable > for itself. > It does not make (and has never made) any guarantees about how the tsc > appears in usermode (except for the purposes of implementing > vgettimeofday). You won''t find many Linux kernel developers who are > sympathetic with the idea of making any hard guarantees for bare > usermode tsc use. > > >> Except that it comes with a terrible cost... > >> This is a massive regression... > >> > > It is certainly significant but "terrible" and "massive" > > are a bit strong. Based on my measurements, the examples > > you cite will degrade performance by a fraction of a percent. > > > How have you measured this? On what systems? Your patch introduces > this regression on all systems for everyone; it isn''t enough > to measure > it on a new Nehalem machine. > > >> The fact that you haven''t named a single real app... > >> Are you really arguing on the basis that "some apps > >> might use tsc in a fragile way" or do you actually have a > >> specific list > >> > > I have a (small) specific list. For various reasons, > > I cannot go into further detail. > > > > Well, that goes back to my point about spending a lot of effort on > something that can only possibility benefit a (small) set of niche > apps. Spending the effort on a vsyscall approach would be > fast, correct > and widely beneficial. > > You can default it on within Oracle, or even in Oracle''s Xen distro. > It''s unreasonable to make this a global default when you''re trying to > solve a local problem. You haven''t established this is something that > anyone else need be concerned about. > > Besides, if they want a global sequence number, why not just keep a > global counter? That''s going to be much cheaper and more > reliable than > anything time-based. > > J >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2009-Sep-30 00:41 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 09/29/09 16:45, Dan Magenheimer wrote:> Let me attempt to summarize our disagreement and then > I''d like to stop arguing. > > 1) You think rdtsc is never safe for an app to use. I > think it is safe in many hardware/software enviroments > and that the number of safe environments will continue > to increase. >I think its highly unadvisable, and hard to do correctly. I think there are better ways to achieve the same effect. I think you are flat-out mistaken in your faith in "safe environments"; such safe environments are certain machines qualified on a case-by-case basis, and can never be assumed to be generally available baseline configuration. You have not supported your faith in "safe environments" with any concrete documentation.> 2) You think the performance hit from rdtsc-emulation > is horrible. I think it is significant but relatively > small and acceptable and, if there are cases where it > is not, administrators or virtual appliance providers > can make an informed choice to turn it off. >When evaluating any performance hit one has to take into account who benefits versus who suffers. The pool of applications and users who could possibly benefit from synthetic tsc is very small, whereas every single user will suffer some performance degradation resulting from synthesizing the tsc.> 3) You think app developers can and should be told to > not use rdtsc because it is inherently unsafe and >Application developers have been consistently told and advised not to use rdtsc in usermode in all Linux environments; Linux under Xen is no exception. Nobody has ever made any guarantees that rdtsc has any particular behaviour in usermode, nor will they. The only guarantee that Linux makes is that it will use the tsc in vgettimeofday where possible.> so Xen doesn''t need to ever be concerned with making it > safe.I don''t have any problem with allowing synthesis as an option, or even making it the default for hvm guests. But I object to interfering with an important PV ABI, for two reasons: 1. it has a negative impact on all the kernel uses of rdtsc, which are safe 2. it prevents a high-performance vsyscall implementation to allow safe, fast usermode access to the tsc In other words, it makes things worse for everyone who is doing things the right way, for the sake of mitigating the risks of unsafe code.> I think app developers will do what they > please, ignorant to the subtleties of rdtsc, and if > their app works on their hardware and on VMware but > not on Xen, they will blame Xen or Linux or their > OS provider or their cloud provider, and probably never > know that their app doesn''t work because of rdtsc. >Shrug. They can blame who they like. It is no different from developers who write buggy multithreaded code that appears to work fine until they move to more cores, or a different system topology, or different libraries or compiler or OS or... It is not recommended for Linux. Microsoft does not recommended it for Windows. Developers are consistently advised not to use rdtsc in usermode. There are no standard library functions to access it directly, so they must go out of their way to do so. Pretending that the TSC is safe or advisable to use in usermode is just going to lead developers astray and enable bad habits. When a drug addict blames the rest of the world for their affliction, the answer is not to give them more drugs. That said, just as one can use Xen''s various mechanisms to approximate a machine layout in order to work around bugs in MT programs, I don''t have any particular problems with tsc-synthesis to work around buggy programs. But it should not be on by default. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2009-Oct-05 23:15 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 09/27/09 12:22, Dan Magenheimer wrote:> Switches rdtsc emulation from boot option to per-domain > and enables it by default. Also removes hvm tsc scaling > as it is no longer necessary. > >I''ve run into a few problems from this patch: 1. I''m seeing occasional messages on the console "hrtimer: interrupt too slow, forcing clock min delta to 9001953 ns" which indicates that the kernel is noticing that timer operations are taking too long. 2. A domain can''t turn on and off its own tsc emulation state. I''m working on vsyscall support for pvclock (done, aside from this issue), so I need native tsc in usermode (or at least, one with the same parameters in kernel and userspace). I was getting very confused because I didn''t expect emulation to *only* apply to usermode; I was expecting it to be done uniformly to both user and kernel tscs, with appropriate adjustments to the vcpu_time_info values. 3. The ''s'' debug key never seems to count any usermode rdtsc instructions, even if I write a little program to explicitly exercise them. (Fix below) All in 64-bit PV domains. Thanks, J diff -r 1dc86d83b352 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Fri Oct 02 17:01:26 2009 -0700 +++ b/xen/arch/x86/time.c Mon Oct 05 16:13:15 2009 -0700 @@ -1476,7 +1476,7 @@ } else { - v->domain->arch.vtsc_kerncount++; + v->domain->arch.vtsc_usercount++; spin_lock(&v->domain->arch.vtsc_lock); now = get_s_time() + v->domain->arch.vtsc_stime_offset; if ( (int64_t)(now - v->domain->arch.vtsc_last) > 0 ) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2009-Oct-05 23:42 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
> I''ve run into a few problems from this patch: > > 1. I''m seeing occasional messages on the console "hrtimer: > interrupt > too slow, forcing clock min delta to 9001953 ns" which indicates > that the kernel is noticing that timer operations are > taking too long.Hmmm... that seems unlikely since it is highly probable that on ANY machine, emulated TSC is faster than other highres timers, for example, HPET. Perhaps it is a side effect of your assumptions described in (2) below?> 2. A domain can''t turn on and off its own tsc emulation state. I''m > working on vsyscall support for pvclock (done, aside from this > issue), so I need native tsc in usermode (or at least, one with > the same parameters in kernel and userspace). I was > getting very > confused because I didn''t expect emulation to *only* apply to > usermode; I was expecting it to be done uniformly to > both user and > kernel tscs, with appropriate adjustments to the vcpu_time_info > values.Sorry, I should probably have said this explicitly in the patch prologue, but there is no (easy/fast) way to turn on emulation for userland and turn off emulation for the kernel, so rather than requiring a change to the pvclock ABI, rdtsc emulation returns a raw TSC value when the rdtsc was executed in kernel mode and Xen system time (nsec) when the rdtsc was executed in userland. To ensure both correctness and maximum performance across a wide range of conditions, WITHOUT destroying backward compatiblity for the pvclock ABI, a decision tree similar to the one I just posted for apps could be employed. http://xen.markmail.org/message/uj4twbcsdw57z5zp But this might rely on adding the same new Xen features described in the parent to that post. OTOH, if pvclock is executed sometimes in userland and sometimes in kernel (e.g. depending on the setting of a sysfs variable), it seems like some kind of decision tree is required anyway.> 3. The ''s'' debug key never seems to count any usermode rdtsc > instructions, even if I write a little program to explicitly > exercise them. (Fix below)Good catch. Sorry, I tested that many times with an earlier version of the patch, but didn''t try debug-key with the final version, after I went through and did some (apparently careless) renaming. Dan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2009-Oct-06 00:20 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 10/05/09 16:42, Dan Magenheimer wrote:>> I''ve run into a few problems from this patch: >> >> 1. I''m seeing occasional messages on the console "hrtimer: >> interrupt >> too slow, forcing clock min delta to 9001953 ns" which indicates >> that the kernel is noticing that timer operations are >> taking too long. >> > Hmmm... that seems unlikely since it is highly probable that on > ANY machine, emulated TSC is faster than other highres timers, > for example, HPET. Perhaps it is a side effect of your assumptions > described in (2) below? >The kernel is measuring all time using pvclock, so it is using rdtsc for that too. Its simply noticing that rdtsc-to-rdtsc time is taking longer than expected. (This is a domU, so it has no access to any other form of time.)>> 2. A domain can''t turn on and off its own tsc emulation state. I''m >> working on vsyscall support for pvclock (done, aside from this >> issue), so I need native tsc in usermode (or at least, one with >> the same parameters in kernel and userspace). I was >> getting very >> confused because I didn''t expect emulation to *only* apply to >> usermode; I was expecting it to be done uniformly to >> both user and >> kernel tscs, with appropriate adjustments to the vcpu_time_info >> values. >> > Sorry, I should probably have said this explicitly in the patch > prologue, but there is no (easy/fast) way to turn on emulation > for userland and turn off emulation for the kernel, so rather > than requiring a change to the pvclock ABI, rdtsc emulation > returns a raw TSC value when the rdtsc was executed in kernel > mode and Xen system time (nsec) when the rdtsc was executed > in userland. >You misunderstand me. I was expecting that it would treat the tsc the same in user and kernel mode, and return a set of appropriate pvclock parameters. This would allow user and kernel tscs to be consistent. That wouldn''t requite any changes to the pvclock ABI and it would retain the TSC''s "global timestamp" property, even across kernel/usermode boundary. Having them different is very awkard for tools which do things like measure kernel->user latency by getting tsc timestamps, or as I''m trying to do, use the pvclock parameters in userspace. You''ve effectively added an entirely new "usermode tsc" vs "kernel mode tsc" concept to the architecture. But aside from that, all I''m asking for is a way for a domain to explicitly request that its tsc not be synthesized (or failing that, something that looks exactly like an unsynthesized tsc), so that usermode pvclock can work without needing edits to the config file. The current situation is very difficult because the kernel can''t even tell if usermode is getting the same tsc properties that it is.> To ensure both correctness and maximum performance across > a wide range of conditions, WITHOUT destroying backward > compatiblity for the pvclock ABI, a decision tree similar > to the one I just posted for apps could be employed. >Well, having a variance between usermode and kernel tscs does break backwards compatibility and is very surprising.> http://xen.markmail.org/message/uj4twbcsdw57z5zp >That looks very complicated.> But this might rely on adding the same new Xen features > described in the parent to that post. > > OTOH, if pvclock is executed sometimes in userland and > sometimes in kernel (e.g. depending on the setting of > a sysfs variable), it seems like some kind of decision > tree is required anyway. >It is run in usermode as part of the normal vsyscall mechanism; the kernel also uses the same information and tsc to compue time for itself. At the moment the only test it makes is to check if Xen supports the new hypercall I added to support this. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2009-Oct-06 01:43 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
> But aside from that, all I''m asking for is a way for a domain to > explicitly request that its tsc not be synthesized (or failing that, > something that looks exactly like an unsynthesized tsc), so that > usermode pvclock can work without needing edits to the config file.Maybe I''m misunderstanding but I think what you are asking for is that the kernel override a setting specified by the administrator. When that setting (regardless of what you choose as a default) is tsc_native==0, that means the administrator has decided that correctness is more important than performance, that now or sometime in the future an app will use rdtsc and expect it to behave like it does on either hardware with a reliable TSC or like it does on VMware. So in that circumstance, if pvclock+vsyscall is dependent on an rdtsc, any rdtsc is slow and the pvclock+vsyscall using rdtsc is slow. I''m certainly open to a solution that solves both problems, I just don''t see one. Perhaps a better choice would be for emulated tsc to return Xen system time in both kernel and user mode (which is what it does for HVM domains) and, when a domain has tsc_native==0, Xen sets its pvclock parameters so that no scaling occurs? This results in the guest reporting that it has a 1GHz clock, but may be more consistent.> -----Original Message----- > From: Jeremy Fitzhardinge [mailto:jeremy@goop.org] > Sent: Monday, October 05, 2009 6:21 PM > To: Dan Magenheimer > Cc: Xen-Devel (E-mail); Keir Fraser > Subject: Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen > boot option with per-domain (hypervisor part) > > > On 10/05/09 16:42, Dan Magenheimer wrote: > >> I''ve run into a few problems from this patch: > >> > >> 1. I''m seeing occasional messages on the console "hrtimer: > >> interrupt > >> too slow, forcing clock min delta to 9001953 ns" > which indicates > >> that the kernel is noticing that timer operations are > >> taking too long. > >> > > Hmmm... that seems unlikely since it is highly probable that on > > ANY machine, emulated TSC is faster than other highres timers, > > for example, HPET. Perhaps it is a side effect of your assumptions > > described in (2) below? > > > > The kernel is measuring all time using pvclock, so it is > using rdtsc for > that too. Its simply noticing that rdtsc-to-rdtsc time is > taking longer > than expected. (This is a domU, so it has no access to any other form > of time.) > > >> 2. A domain can''t turn on and off its own tsc emulation > state. I''m > >> working on vsyscall support for pvclock (done, aside > from this > >> issue), so I need native tsc in usermode (or at > least, one with > >> the same parameters in kernel and userspace). I was > >> getting very > >> confused because I didn''t expect emulation to *only* apply to > >> usermode; I was expecting it to be done uniformly to > >> both user and > >> kernel tscs, with appropriate adjustments to the > vcpu_time_info > >> values. > >> > > Sorry, I should probably have said this explicitly in the patch > > prologue, but there is no (easy/fast) way to turn on emulation > > for userland and turn off emulation for the kernel, so rather > > than requiring a change to the pvclock ABI, rdtsc emulation > > returns a raw TSC value when the rdtsc was executed in kernel > > mode and Xen system time (nsec) when the rdtsc was executed > > in userland. > > > > You misunderstand me. I was expecting that it would treat the tsc the > same in user and kernel mode, and return a set of appropriate pvclock > parameters. This would allow user and kernel tscs to be consistent. > That wouldn''t requite any changes to the pvclock ABI and it > would retain > the TSC''s "global timestamp" property, even across > kernel/usermode boundary. > > Having them different is very awkard for tools which do things like > measure kernel->user latency by getting tsc timestamps, or as > I''m trying > to do, use the pvclock parameters in userspace. You''ve effectively > added an entirely new "usermode tsc" vs "kernel mode tsc" > concept to the > architecture. > > But aside from that, all I''m asking for is a way for a domain to > explicitly request that its tsc not be synthesized (or failing that, > something that looks exactly like an unsynthesized tsc), so that > usermode pvclock can work without needing edits to the config file. > > The current situation is very difficult because the kernel can''t even > tell if usermode is getting the same tsc properties that it is. > > > To ensure both correctness and maximum performance across > > a wide range of conditions, WITHOUT destroying backward > > compatiblity for the pvclock ABI, a decision tree similar > > to the one I just posted for apps could be employed. > > > Well, having a variance between usermode and kernel tscs does break > backwards compatibility and is very surprising. > > > http://xen.markmail.org/message/uj4twbcsdw57z5zp > > > > That looks very complicated. > > > But this might rely on adding the same new Xen features > > described in the parent to that post. > > > > OTOH, if pvclock is executed sometimes in userland and > > sometimes in kernel (e.g. depending on the setting of > > a sysfs variable), it seems like some kind of decision > > tree is required anyway. > > > > It is run in usermode as part of the normal vsyscall mechanism; the > kernel also uses the same information and tsc to compue time for > itself. At the moment the only test it makes is to check if Xen > supports the new hypercall I added to support this. > > J >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2009-Oct-06 04:56 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 10/05/09 18:43, Dan Magenheimer wrote:> Perhaps a better choice would be for emulated tsc > to return Xen system time in both kernel and user > mode (which is what it does for HVM domains) and, > when a domain has tsc_native==0, > Xen sets its pvclock parameters so that no scaling > occurs? This results in the guest reporting that > it has a 1GHz clock, but may be more consistent. >Yes, as I said:> But aside from that, all I''m asking for is a way for a domain to > explicitly request that its tsc not be synthesized (or failing that, > something that looks exactly like an unsynthesized tsc), so that > usermode pvclock can work without needing edits to the config file. >Having a consistent tsc between usermode and kernel will solve the significant functional breakage that currently exists with tsc synthesis enabled, making it just a question of performance (and resolution). I still think tsc_native=1 is the correct setting for the vast majority of PV guest domains, and enabling it by default is a bad idea. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Oct-06 07:07 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 06/10/2009 02:43, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:> Perhaps a better choice would be for emulated tsc > to return Xen system time in both kernel and user > mode (which is what it does for HVM domains) and, > when a domain has tsc_native==0, > Xen sets its pvclock parameters so that no scaling > occurs? This results in the guest reporting that > it has a 1GHz clock, but may be more consistent.Yes, this has to be fixed. There''s no good reason for different TSC behaviours between kernel and userspace when both are emulated. If nothing else, when Jeremy''s patches go in (which I am sure they will, as the concept is sane and the implementation appears so too) then that is going to be incompatible with emulated tsc as it behaves currently. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Oct-06 09:09 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 06/10/2009 08:07, "Keir Fraser" <keir.fraser@eu.citrix.com> wrote:>> Perhaps a better choice would be for emulated tsc >> to return Xen system time in both kernel and user >> mode (which is what it does for HVM domains) and, >> when a domain has tsc_native==0, >> Xen sets its pvclock parameters so that no scaling >> occurs? This results in the guest reporting that >> it has a 1GHz clock, but may be more consistent. > > Yes, this has to be fixed. There''s no good reason for different TSC > behaviours between kernel and userspace when both are emulated. If nothing > else, when Jeremy''s patches go in (which I am sure they will, as the concept > is sane and the implementation appears so too) then that is going to be > incompatible with emulated tsc as it behaves currently.Should be fixed by c/s 20271. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2009-Oct-06 13:49 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
> >> Perhaps a better choice would be for emulated tsc > >> to return Xen system time in both kernel and user > >> mode (which is what it does for HVM domains) and, > >> when a domain has tsc_native==0, > >> Xen sets its pvclock parameters so that no scaling > >> occurs? This results in the guest reporting that > >> it has a 1GHz clock, but may be more consistent. > > > > Yes, this has to be fixed. There''s no good reason for different TSC > > behaviours between kernel and userspace when both are > emulated. If nothing > > else, when Jeremy''s patches go in (which I am sure they > will, as the concept > > is sane and the implementation appears so too) then that is > going to be > > incompatible with emulated tsc as it behaves currently. > > Should be fixed by c/s 20271.Excellent. Thanks. But in reviewing this patch, I noticed that you removed the vtsc_stime_offset field in struct arch_domain. This was intended to record the offset across migration. But a per-domain stime offset must already be recorded somewhere else, or hvm fully-emulated platform timers would be broken across migration. Do pv guests need something similar or is it magically handled someplace that I couldn''t quickly find? Thanks, Dan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Oct-06 13:59 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 06/10/2009 14:49, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:> This was intended to record the offset across migration. > But a per-domain stime offset must already be recorded > somewhere else, or hvm fully-emulated platform timers > would be broken across migration. > > Do pv guests need something similar or is it magically > handled someplace that I couldn''t quickly find?PV guests understand that there will be a system-time discontinuity across save/restore. All I did was remove an unused field. You can reintro it when you use it. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2009-Oct-08 23:35 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com] > > On 06/10/2009 14:49, "Dan Magenheimer" > <dan.magenheimer@oracle.com> wrote: > > > This was intended to record the offset across migration. > > But a per-domain stime offset must already be recorded > > somewhere else, or hvm fully-emulated platform timers > > would be broken across migration. > > > > Do pv guests need something similar or is it magically > > handled someplace that I couldn''t quickly find? > > PV guests understand that there will be a system-time > discontinuity across > save/restore. All I did was remove an unused field. You can > reintro it when > you use it.Interestingly, I find that there is a discontinuity for HVM guests as well (with either tsc_native=0 or tsc_native=1). This can be demonstrated easily by running a rdtsc loop in a user program that fails if time goes backwards or jumps forward too far, then doing a save/restore. I had assumed that the VT-x tsc_offset mechanism would handle the discontinuity but apparently it was never implemented. (I didn''t test live migration, so maybe the mechanism IS used if time goes backwards, but I suspect not.) Since TSC is used for inter-jiffie interpolation on many Linux systems (and the system I ran the test on reports "Using 3.5xxx MHz WALL PM GTOD PIT/TSC timer"), I''m surprised a huge discontinuity doesn''t cause at least some interesting problem for some HVM OS''s. Oops, it does. I see a soft lockup reported when I save/restore an HVM domain (tested with tsc_native=1, e.g. no tsc emulation). Looks like another one for the "fix the tsc" list... Dan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2009-Nov-05 17:08 UTC
RE: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
> On 06/10/2009 14:49, "Dan Magenheimer" > <dan.magenheimer@oracle.com> wrote: > > > This was intended to record the offset across migration. > > But a per-domain stime offset must already be recorded > > somewhere else, or hvm fully-emulated platform timers > > would be broken across migration. > > > > Do pv guests need something similar or is it magically > > handled someplace that I couldn''t quickly find? > > PV guests understand that there will be a system-time > discontinuity across > save/restore. All I did was remove an unused field. You can > reintro it when > you use it.Finally got back to looking at this... For an HVM, the relevant time values are saved/restored as part of the cpu state. For a PV, I''m thinking about adding them to the domctl_getdomaininfo struct rather than add additional domctls for each. Is this the proper place or will changes to this struct result in backward compatibility problems or ?? Thanks, Dan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Nov-06 07:01 UTC
Re: [Xen-devel] [PATCH] replace rdtsc emulation-vs-native xen boot option with per-domain (hypervisor part)
On 05/11/2009 17:08, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:> For a PV, I''m thinking about adding them to the > domctl_getdomaininfo struct rather than add additional > domctls for each. Is this the proper place or will > changes to this struct result in backward compatibility > problems or ??It would be fine as we provide no compatibility guarantees for any domctl or sysctl across consecutive major Xen releases. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel