Stefano Stabellini
2010-Feb-09 18:12 UTC
[Xen-devel] [PATCH] [PVOPS] dom0 sync xen wallclock
Hi Jeremy, this patch removes clock_was_set and adds an atomic notification chain instead so that not only hrtimers but other parts of the kernel can be notified of a time change as well. In fact xen/time.c needs to be notified so that can update xen wallclock time to keep it in sync; this is necessary otherwise other PV guests will get a wrong wallclock time at boot. Please let me know if my approach is suitable for upstream, I am willing to do any change needed and to send the patch upstream myself if you give me some directions. Thanks, Stefano Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> --- diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 0b56fd4..a0532ec 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -13,6 +13,7 @@ #include <linux/clockchips.h> #include <linux/kernel_stat.h> #include <linux/math64.h> +#include <linux/notifier.h> #include <asm/pvclock.h> #include <asm/xen/hypervisor.h> @@ -526,6 +527,43 @@ static int __init xen_setup_vsyscall_timeinfo(int cpu) } #endif /* CONFIG_PARAVIRT_CLOCK_VSYSCALL */ +static void sync_xen_wallclock(unsigned long dummy); +static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0); +static void sync_xen_wallclock(unsigned long dummy) +{ + struct xen_platform_op op; + struct timespec ts; + + write_seqlock_irq(&xtime_lock); + + set_normalized_timespec(&ts, xtime.tv_sec, xtime.tv_nsec); + + op.cmd = XENPF_settime; + op.u.settime.secs = ts.tv_sec; + op.u.settime.nsecs = ts.tv_nsec; + op.u.settime.system_time = xen_clocksource_read(); + WARN_ON(HYPERVISOR_dom0_op(&op)); + + write_sequnlock_irq(&xtime_lock); + + /* Once per minute. */ + mod_timer(&sync_xen_wallclock_timer, jiffies + 60*HZ); +} + +int xen_update_persistent_clock(struct notifier_block *this, + unsigned long event, void *ptr) +{ + if (!xen_initial_domain()) + return -1; + mod_timer(&sync_xen_wallclock_timer, jiffies + 1); + return 0; +} + +static struct notifier_block xen_clock_was_set = { + .notifier_call = xen_update_persistent_clock, +}; + + __init void xen_time_init(void) { int cpu = smp_processor_id(); @@ -549,6 +587,8 @@ __init void xen_time_init(void) xen_setup_runstate_info(cpu); xen_setup_timer(cpu); xen_setup_cpu_clockevents(); + if (xen_initial_domain()) + atomic_notifier_chain_register(&clockset_notifier_list, &xen_clock_was_set); #ifdef CONFIG_PARAVIRT_CLOCK_VSYSCALL if (xen_setup_vsyscall_timeinfo(cpu) == 0) diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 5d42d55..422e8ef 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -130,7 +130,7 @@ out_resume: dpm_resume_end(PMSG_RESUME); /* Make sure timer events get retriggered on all CPUs */ - clock_was_set(); + atomic_notifier_call_chain(&clockset_notifier_list, 0, NULL); out_thaw: #ifdef CONFIG_PREEMPT diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 4759917..c46a4e5 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -247,7 +247,6 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) #ifdef CONFIG_HIGH_RES_TIMERS struct clock_event_device; -extern void clock_was_set(void); extern void hres_timers_resume(void); extern void hrtimer_interrupt(struct clock_event_device *dev); @@ -282,12 +281,6 @@ extern void hrtimer_peek_ahead_timers(void); # define MONOTONIC_RES_NSEC LOW_RES_NSEC # define KTIME_MONOTONIC_RES KTIME_LOW_RES -/* - * clock_was_set() is a NOP for non- high-resolution systems. The - * time-sorted order guarantees that a timer does not expire early and - * is expired in the next softirq when the clock was advanced. - */ -static inline void clock_was_set(void) { } static inline void hrtimer_peek_ahead_timers(void) { } static inline void hres_timers_resume(void) { } diff --git a/include/linux/time.h b/include/linux/time.h index ea16c1a..5bb7e4a 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -131,6 +131,7 @@ static inline u32 arch_gettimeoffset(void) { return 0; } extern void do_gettimeofday(struct timeval *tv); extern int do_settimeofday(struct timespec *tv); extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); +extern struct atomic_notifier_head clockset_notifier_list; #define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) extern long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags); struct itimerval; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 49da79a..c170b65 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -665,7 +665,8 @@ static void retrigger_next_event(void *arg) * resolution timer interrupts. On UP we just disable interrupts and * call the high resolution interrupt code. */ -void clock_was_set(void) +static int clock_was_set(struct notifier_block *this, unsigned long event, + void *ptr) { /* Retrigger the CPU local events everywhere */ on_each_cpu(retrigger_next_event, NULL, 1); @@ -772,7 +773,11 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, } static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } - +static inline int clock_was_set(struct notifier_block *this, unsigned long event, + void *ptr) +{ + return 0; +} #endif /* CONFIG_HIGH_RES_TIMERS */ #ifdef CONFIG_TIMER_STATS @@ -1720,11 +1725,16 @@ static struct notifier_block __cpuinitdata hrtimers_nb = { .notifier_call = hrtimer_cpu_notify, }; +static struct notifier_block hrt_clock_was_set = { + .notifier_call = clock_was_set, +}; + void __init hrtimers_init(void) { hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); register_cpu_notifier(&hrtimers_nb); + atomic_notifier_chain_register(&clockset_notifier_list, &hrt_clock_was_set); #ifdef CONFIG_HIGH_RES_TIMERS open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); #endif diff --git a/kernel/time.c b/kernel/time.c index 2951194..667b959 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -138,7 +138,7 @@ static inline void warp_clock(void) xtime.tv_sec += sys_tz.tz_minuteswest * 60; update_xtime_cache(0); write_sequnlock_irq(&xtime_lock); - clock_was_set(); + atomic_notifier_call_chain(&clockset_notifier_list, 0, NULL); } /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e8c77d9..284539f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -18,7 +18,11 @@ #include <linux/jiffies.h> #include <linux/time.h> #include <linux/tick.h> +#include <linux/notifier.h> +ATOMIC_NOTIFIER_HEAD(clockset_notifier_list); + +EXPORT_SYMBOL(clockset_notifier_list); /* * This read-write spinlock protects us from races in SMP while @@ -174,8 +178,7 @@ int do_settimeofday(struct timespec *tv) write_sequnlock_irqrestore(&xtime_lock, flags); - /* signal hrtimers about time change */ - clock_was_set(); + atomic_notifier_call_chain(&clockset_notifier_list, 0, NULL); return 0; } _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Stefano Stabellini
2010-Feb-09 18:20 UTC
Re: [Xen-devel] [PATCH] [PVOPS] dom0 sync xen wallclock
On Tue, 9 Feb 2010, Stefano Stabellini wrote:> Hi Jeremy, > this patch removes clock_was_set and adds an atomic notification chain > instead so that not only hrtimers but other parts of the kernel can be > notified of a time change as well. > In fact xen/time.c needs to be notified so that can update xen > wallclock time to keep it in sync; this is necessary otherwise other > PV guests will get a wrong wallclock time at boot. > Please let me know if my approach is suitable for upstream, I am willing > to do any change needed and to send the patch upstream myself if you > give me some directions.actually I just noticed that I missed few others clock_was_set calls used in other architectures (cris, blackfin, ...), I''ll resend another patch soon. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2010-Feb-10 23:09 UTC
[Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock
On 02/09/2010 10:12 AM, Stefano Stabellini wrote:> Hi Jeremy, > this patch removes clock_was_set and adds an atomic notification chain > instead so that not only hrtimers but other parts of the kernel can be > notified of a time change as well. > In fact xen/time.c needs to be notified so that can update xen > wallclock time to keep it in sync; this is necessary otherwise other > PV guests will get a wrong wallclock time at boot. > Please let me know if my approach is suitable for upstream, I am willing > to do any change needed and to send the patch upstream myself if you > give me some directions. >I''m not sure this is the right thing to do. We have a set_wallclock pvop, which Xen currently implements as a no-op, but it should do the appropriate hypercall to set Xen''s time if privileged enough. Conceptually the Xen persistent time is the same as the platform CMOS clock, so I don''t think we should update it any differently. Your patch may make sense, but it should also address the native case. At the moment it happens via sync_cmos_clock(), which is called periodically (I think) independently of whether the clock has actually been changed. There is one big difference between the Xen clock and the CMOS clock, which is that the Xen clock is being concurrently accessed by other domains. If it is being updated periodically, then there will be discontinuities in time which may affect other domains. But since there''s no time-warp ABI to Xen, I don''t think this can really be avoided; anyone reading periodically the Xen clock needs to be able to deal with any discontinuities. pvops kernels only inspect it at boot time, and so won''t see any subsequent time adjustments anyway. Thanks, J> Thanks, > > Stefano > > Signed-off-by: Stefano Stabellini<stefano.stabellini@eu.citrix.com> > > --- > > diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c > index 0b56fd4..a0532ec 100644 > --- a/arch/x86/xen/time.c > +++ b/arch/x86/xen/time.c > @@ -13,6 +13,7 @@ > #include<linux/clockchips.h> > #include<linux/kernel_stat.h> > #include<linux/math64.h> > +#include<linux/notifier.h> > > #include<asm/pvclock.h> > #include<asm/xen/hypervisor.h> > @@ -526,6 +527,43 @@ static int __init xen_setup_vsyscall_timeinfo(int cpu) > } > #endif /* CONFIG_PARAVIRT_CLOCK_VSYSCALL */ > > +static void sync_xen_wallclock(unsigned long dummy); > +static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0); > +static void sync_xen_wallclock(unsigned long dummy) > +{ > + struct xen_platform_op op; > + struct timespec ts; > + > + write_seqlock_irq(&xtime_lock); > + > + set_normalized_timespec(&ts, xtime.tv_sec, xtime.tv_nsec); > + > + op.cmd = XENPF_settime; > + op.u.settime.secs = ts.tv_sec; > + op.u.settime.nsecs = ts.tv_nsec; > + op.u.settime.system_time = xen_clocksource_read(); > + WARN_ON(HYPERVISOR_dom0_op(&op)); > + > + write_sequnlock_irq(&xtime_lock); > + > + /* Once per minute. */ > + mod_timer(&sync_xen_wallclock_timer, jiffies + 60*HZ); > +} > + > +int xen_update_persistent_clock(struct notifier_block *this, > + unsigned long event, void *ptr) > +{ > + if (!xen_initial_domain()) > + return -1; > + mod_timer(&sync_xen_wallclock_timer, jiffies + 1); > + return 0; > +} > + > +static struct notifier_block xen_clock_was_set = { > + .notifier_call = xen_update_persistent_clock, > +}; > + > + > __init void xen_time_init(void) > { > int cpu = smp_processor_id(); > @@ -549,6 +587,8 @@ __init void xen_time_init(void) > xen_setup_runstate_info(cpu); > xen_setup_timer(cpu); > xen_setup_cpu_clockevents(); > + if (xen_initial_domain()) > + atomic_notifier_chain_register(&clockset_notifier_list,&xen_clock_was_set); > > #ifdef CONFIG_PARAVIRT_CLOCK_VSYSCALL > if (xen_setup_vsyscall_timeinfo(cpu) == 0) > diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c > index 5d42d55..422e8ef 100644 > --- a/drivers/xen/manage.c > +++ b/drivers/xen/manage.c > @@ -130,7 +130,7 @@ out_resume: > dpm_resume_end(PMSG_RESUME); > > /* Make sure timer events get retriggered on all CPUs */ > - clock_was_set(); > + atomic_notifier_call_chain(&clockset_notifier_list, 0, NULL); > > out_thaw: > #ifdef CONFIG_PREEMPT > diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h > index 4759917..c46a4e5 100644 > --- a/include/linux/hrtimer.h > +++ b/include/linux/hrtimer.h > @@ -247,7 +247,6 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) > #ifdef CONFIG_HIGH_RES_TIMERS > struct clock_event_device; > > -extern void clock_was_set(void); > extern void hres_timers_resume(void); > extern void hrtimer_interrupt(struct clock_event_device *dev); > > @@ -282,12 +281,6 @@ extern void hrtimer_peek_ahead_timers(void); > # define MONOTONIC_RES_NSEC LOW_RES_NSEC > # define KTIME_MONOTONIC_RES KTIME_LOW_RES > > -/* > - * clock_was_set() is a NOP for non- high-resolution systems. The > - * time-sorted order guarantees that a timer does not expire early and > - * is expired in the next softirq when the clock was advanced. > - */ > -static inline void clock_was_set(void) { } > static inline void hrtimer_peek_ahead_timers(void) { } > > static inline void hres_timers_resume(void) { } > diff --git a/include/linux/time.h b/include/linux/time.h > index ea16c1a..5bb7e4a 100644 > --- a/include/linux/time.h > +++ b/include/linux/time.h > @@ -131,6 +131,7 @@ static inline u32 arch_gettimeoffset(void) { return 0; } > extern void do_gettimeofday(struct timeval *tv); > extern int do_settimeofday(struct timespec *tv); > extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); > +extern struct atomic_notifier_head clockset_notifier_list; > #define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) > extern long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags); > struct itimerval; > diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c > index 49da79a..c170b65 100644 > --- a/kernel/hrtimer.c > +++ b/kernel/hrtimer.c > @@ -665,7 +665,8 @@ static void retrigger_next_event(void *arg) > * resolution timer interrupts. On UP we just disable interrupts and > * call the high resolution interrupt code. > */ > -void clock_was_set(void) > +static int clock_was_set(struct notifier_block *this, unsigned long event, > + void *ptr) > { > /* Retrigger the CPU local events everywhere */ > on_each_cpu(retrigger_next_event, NULL, 1); > @@ -772,7 +773,11 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, > } > static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } > static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } > - > +static inline int clock_was_set(struct notifier_block *this, unsigned long event, > + void *ptr) > +{ > + return 0; > +} > #endif /* CONFIG_HIGH_RES_TIMERS */ > > #ifdef CONFIG_TIMER_STATS > @@ -1720,11 +1725,16 @@ static struct notifier_block __cpuinitdata hrtimers_nb = { > .notifier_call = hrtimer_cpu_notify, > }; > > +static struct notifier_block hrt_clock_was_set = { > + .notifier_call = clock_was_set, > +}; > + > void __init hrtimers_init(void) > { > hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, > (void *)(long)smp_processor_id()); > register_cpu_notifier(&hrtimers_nb); > + atomic_notifier_chain_register(&clockset_notifier_list,&hrt_clock_was_set); > #ifdef CONFIG_HIGH_RES_TIMERS > open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); > #endif > diff --git a/kernel/time.c b/kernel/time.c > index 2951194..667b959 100644 > --- a/kernel/time.c > +++ b/kernel/time.c > @@ -138,7 +138,7 @@ static inline void warp_clock(void) > xtime.tv_sec += sys_tz.tz_minuteswest * 60; > update_xtime_cache(0); > write_sequnlock_irq(&xtime_lock); > - clock_was_set(); > + atomic_notifier_call_chain(&clockset_notifier_list, 0, NULL); > } > > /* > diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c > index e8c77d9..284539f 100644 > --- a/kernel/time/timekeeping.c > +++ b/kernel/time/timekeeping.c > @@ -18,7 +18,11 @@ > #include<linux/jiffies.h> > #include<linux/time.h> > #include<linux/tick.h> > +#include<linux/notifier.h> > > +ATOMIC_NOTIFIER_HEAD(clockset_notifier_list); > + > +EXPORT_SYMBOL(clockset_notifier_list); > > /* > * This read-write spinlock protects us from races in SMP while > @@ -174,8 +178,7 @@ int do_settimeofday(struct timespec *tv) > > write_sequnlock_irqrestore(&xtime_lock, flags); > > - /* signal hrtimers about time change */ > - clock_was_set(); > + atomic_notifier_call_chain(&clockset_notifier_list, 0, NULL); > > return 0; > } > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2010-Feb-11 00:06 UTC
Re: [Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock
On 10/02/2010 23:09, "Jeremy Fitzhardinge" <jeremy@goop.org> wrote:> There is one big difference between the Xen clock and the CMOS clock, > which is that the Xen clock is being concurrently accessed by other > domains. If it is being updated periodically, then there will be > discontinuities in time which may affect other domains. But since > there''s no time-warp ABI to Xen, I don''t think this can really be > avoided; anyone reading periodically the Xen clock needs to be able to > deal with any discontinuities. pvops kernels only inspect it at boot > time, and so won''t see any subsequent time adjustments anyway.A Xen ABI for adjtime()-alike warping of Xen system time is something we should add at some point, as that''s the logical thing for ntp in dom0 to affect, keeping the system''s ''nanosecond ticker'' in sync with the wider world. That''d keep all domU clocks in sync, all except for fixed wallclock offsets. But it doesn''t currently exist, so instead it gets bunged in with Xen''s existing settimeofday()-alike interface: hence domUs either can see discontinuities, or don''t get the automatic benefits of ntp sync at all, because they only read the wallclock offset at boot time. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2010-Feb-11 00:47 UTC
Re: [Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock
On 02/10/2010 04:06 PM, Keir Fraser wrote:> A Xen ABI for adjtime()-alike warping of Xen system time is something we > should add at some point, as that''s the logical thing for ntp in dom0 to > affect, keeping the system''s ''nanosecond ticker'' in sync with the wider > world.Hm. I think it would be best to keep the ticker unchanged, but perhaps offer a mechanism so that dom0 can set separate offset and drift parameters which other domains can query to may system time to global time. But I don''t think even that is a very compelling idea.> That''d keep all domU clocks in sync, all except for fixed wallclock > offsets. But it doesn''t currently exist, so instead it gets bunged in with > Xen''s existing settimeofday()-alike interface: hence domUs either can see > discontinuities, or don''t get the automatic benefits of ntp sync at all, > because they only read the wallclock offset at boot time. >I suppose. All the domains can already run ntp if you really want to keep them in sync with each other and/or dom0 and/or the outside world. I''m not sure there''s much point in making Xen ABI changes in this area; anything we do will come down to being a partial and probably inadequate reimplementation of ntp (esp when you consider how to deal with migration). J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2010-Feb-11 08:03 UTC
Re: [Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock
On 11/02/2010 00:47, "Jeremy Fitzhardinge" <jeremy@goop.org> wrote:> On 02/10/2010 04:06 PM, Keir Fraser wrote: >> A Xen ABI for adjtime()-alike warping of Xen system time is something we >> should add at some point, as that''s the logical thing for ntp in dom0 to >> affect, keeping the system''s ''nanosecond ticker'' in sync with the wider >> world. > > Hm. I think it would be best to keep the ticker unchanged, but perhaps > offer a mechanism so that dom0 can set separate offset and drift > parameters which other domains can query to may system time to global > time. But I don''t think even that is a very compelling idea.Hm, well your suggestion is certainly less work. :-D -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Stefano Stabellini
2010-Feb-11 11:24 UTC
[Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock
On Wed, 10 Feb 2010, Jeremy Fitzhardinge wrote:> I''m not sure this is the right thing to do. We have a set_wallclock > pvop, which Xen currently implements as a no-op, but it should do the > appropriate hypercall to set Xen''s time if privileged enough. > > Conceptually the Xen persistent time is the same as the platform CMOS > clock, so I don''t think we should update it any differently. Your patch > may make sense, but it should also address the native case. At the > moment it happens via sync_cmos_clock(), which is called periodically (I > think) independently of whether the clock has actually been changed. > > There is one big difference between the Xen clock and the CMOS clock, > which is that the Xen clock is being concurrently accessed by other > domains. If it is being updated periodically, then there will be > discontinuities in time which may affect other domains. But since > there''s no time-warp ABI to Xen, I don''t think this can really be > avoided; anyone reading periodically the Xen clock needs to be able to > deal with any discontinuities. pvops kernels only inspect it at boot > time, and so won''t see any subsequent time adjustments anyway. >Linux 2.6.18 does consider xen persistent time as the platform CMOS clock, but I don''t think this is what we actually want: if we run ntpd in dom0 we probably want to sync xen time with dom0 time more often than linux usually update the CMOS clock. In particular we want that as soon as ntpd in dom0 set the right time, it gets propagated in xen so that all the PV guests created after that moment can read the right wallclock at boot. I think that the right approach to achieve that is to break the assumption that xen persistent time is like the CMOS and treat it more like xtime instead. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Campbell
2010-Feb-11 11:36 UTC
[Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock
On Thu, 2010-02-11 at 11:24 +0000, Stefano Stabellini wrote:> On Wed, 10 Feb 2010, Jeremy Fitzhardinge wrote: > > I''m not sure this is the right thing to do. We have a set_wallclock > > pvop, which Xen currently implements as a no-op, but it should do the > > appropriate hypercall to set Xen''s time if privileged enough. > > > > Conceptually the Xen persistent time is the same as the platform CMOS > > clock, so I don''t think we should update it any differently. Your patch > > may make sense, but it should also address the native case. At the > > moment it happens via sync_cmos_clock(), which is called periodically (I > > think) independently of whether the clock has actually been changed. > > > > There is one big difference between the Xen clock and the CMOS clock, > > which is that the Xen clock is being concurrently accessed by other > > domains. If it is being updated periodically, then there will be > > discontinuities in time which may affect other domains. But since > > there''s no time-warp ABI to Xen, I don''t think this can really be > > avoided; anyone reading periodically the Xen clock needs to be able to > > deal with any discontinuities. pvops kernels only inspect it at boot > > time, and so won''t see any subsequent time adjustments anyway. > > > > Linux 2.6.18 does consider xen persistent time as the platform > CMOS clock, but I don''t think this is what we actually want: if we run > ntpd in dom0 we probably want to sync xen time with dom0 time more often > than linux usually update the CMOS clock. > In particular we want that as soon as ntpd in dom0 set the right time, > it gets propagated in xen so that all the PV guests created after that > moment can read the right wallclock at boot. > I think that the right approach to achieve that is to break the > assumption that xen persistent time is like the CMOS and treat it more > like xtime instead.I agree, guests which have a dependent wallclock are reading this variable from the hypervisor as their xtime, not their CMOS RTC. Having domain 0 write it as if it were the CMOS seems asymmetric with this approach. Any explicit setting of the time in domain 0 should be immediately propagated through to the hypervisor, with all the usual caveats that calling settimeofday has on any normal Linux system, but it appears it is necessary for the same reasons we usually allow ntpd (or ntpdate) to step the time once at start of day. Without this we have to wait some time after boot for the time setting to be propagated, this delay is often long enough to have started several VMs which will now be running with the wrong time and/or see even worse discontinuities when the time is eventually set. As for propagating variations derived from the ntp drift calculations a drift based interface to the hypervisor would be an improvement but I think what Stefano proposes is basically inline with historical domain 0 behaviour. Guests which have a dependent wallclock already handle any discontinuities by adding their own monotonicity checks. One tricky issue with pushing drift parameters into the hypervisor is that presumably ntp would need to calculate them based on the hypervisor''s idea of time, calculating based on domain 0''s idea of time when the two are allowed to drift seems like it would simply lead to using the wrong drift paramters and making things worse for the hypervisor. Ian. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2010-Feb-11 19:16 UTC
Re: [Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock
On 02/11/2010 12:03 AM, Keir Fraser wrote:>> Hm. I think it would be best to keep the ticker unchanged, but perhaps >> offer a mechanism so that dom0 can set separate offset and drift >> parameters which other domains can query to may system time to global >> time. But I don''t think even that is a very compelling idea. >> > Hm, well your suggestion is certainly less work. :-D >Yep, always a plus. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2010-Feb-18 23:43 UTC
[Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock
On 02/11/2010 03:24 AM, Stefano Stabellini wrote:> On Wed, 10 Feb 2010, Jeremy Fitzhardinge wrote: > >> I''m not sure this is the right thing to do. We have a set_wallclock >> pvop, which Xen currently implements as a no-op, but it should do the >> appropriate hypercall to set Xen''s time if privileged enough. >> >> Conceptually the Xen persistent time is the same as the platform CMOS >> clock, so I don''t think we should update it any differently. Your patch >> may make sense, but it should also address the native case. At the >> moment it happens via sync_cmos_clock(), which is called periodically (I >> think) independently of whether the clock has actually been changed. >> >> There is one big difference between the Xen clock and the CMOS clock, >> which is that the Xen clock is being concurrently accessed by other >> domains. If it is being updated periodically, then there will be >> discontinuities in time which may affect other domains. But since >> there''s no time-warp ABI to Xen, I don''t think this can really be >> avoided; anyone reading periodically the Xen clock needs to be able to >> deal with any discontinuities. pvops kernels only inspect it at boot >> time, and so won''t see any subsequent time adjustments anyway. >> >> > Linux 2.6.18 does consider xen persistent time as the platform > CMOS clock, but I don''t think this is what we actually want: if we run > ntpd in dom0 we probably want to sync xen time with dom0 time more often > than linux usually update the CMOS clock. > In particular we want that as soon as ntpd in dom0 set the right time, > it gets propagated in xen so that all the PV guests created after that > moment can read the right wallclock at boot. >If you''re making the assumption that a guest will query the Xen wallclock time precisely once, then I guess that will work OK. But if you expect them to query it more than once, then you need to add a notion of drift correction to Xen''s system clock so that you can warp it in a continuous fashion. But that complicates things because dom0 itself is using the Xen system clock as its own timebase which it is also applying drift correction to. So to make that work you either need to introduce distinct notions of corrected and uncorrected Xen clocks and make domains query them appropriately, or more deeply PV time in the domains so they directly use/adjust the Xen drift factor rather than maintaining their own.> I think that the right approach to achieve that is to break the > assumption that xen persistent time is like the CMOS and treat it more > like xtime instead. >I don''t really see why wallclock time is any of Xen''s business. It seems like something that could be completely dropped with no loss of functionality. It''s somewhat useful for domains to be able to fetch an approximately correct wallclock time early in boot, but aside from that its fairly useless. I would prefer to see: * dom0 gets time of day from CMOS or whatever hardware mechanism it wants * PV domUs get a rough time of day at boot either from a simple "seconds since 1970" in the startup info put there by the domain builder from dom0''s clock (so there''s no illusion that it is anything other than a one-time start time for the domain) * HVM domains similarly get an emulated CMOS implemented in qemu * anyone who wants continuously tracked accurate time needs to run ntp and deprecate any notion of wallclock time from Xen. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2010-Feb-19 01:02 UTC
RE: [Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock
> I don''t really see why wallclock time is any of Xen''s business.While I agree in principal, I ran across a case recently where it could/would be very useful: Recent Linux kernels running on very latest processors use the TSC as a clocksource. VMware can and does support this, even across save/restore/migration. Supporting it across migration requires the hypervisor to have some equivalent of wallclock time that can be synchronized across machines in a "migration pool". I *think* VMware can use NTP to do this (because their hypervisor is closer to a full OS than Xen is). Without this, Xen cannot allow an OS to select the TSC as a clocksource unless it disallows save/restore/migration. (Thus the "no_migrate" flag recently added in Xen.) The result is a small but not insignificant performance advantage for VMware on some benchmarks, which may appear in upcoming 2.6.32-based distros. Of course, it may also be possible to get this from dom0 but for performance reasons some concept of wallclock "offset" must reside in the hypervisor.> -----Original Message----- > From: Jeremy Fitzhardinge [mailto:jeremy@goop.org] > Sent: Thursday, February 18, 2010 4:44 PM > To: Stefano Stabellini > Cc: Ian Campbell; xen-devel@lists.xensource.com > Subject: [Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock > > On 02/11/2010 03:24 AM, Stefano Stabellini wrote: > > On Wed, 10 Feb 2010, Jeremy Fitzhardinge wrote: > > > >> I''m not sure this is the right thing to do. We have a set_wallclock > >> pvop, which Xen currently implements as a no-op, but it should do > the > >> appropriate hypercall to set Xen''s time if privileged enough. > >> > >> Conceptually the Xen persistent time is the same as the platform > CMOS > >> clock, so I don''t think we should update it any differently. Your > patch > >> may make sense, but it should also address the native case. At the > >> moment it happens via sync_cmos_clock(), which is called > periodically (I > >> think) independently of whether the clock has actually been changed. > >> > >> There is one big difference between the Xen clock and the CMOS > clock, > >> which is that the Xen clock is being concurrently accessed by other > >> domains. If it is being updated periodically, then there will be > >> discontinuities in time which may affect other domains. But since > >> there''s no time-warp ABI to Xen, I don''t think this can really be > >> avoided; anyone reading periodically the Xen clock needs to be able > to > >> deal with any discontinuities. pvops kernels only inspect it at > boot > >> time, and so won''t see any subsequent time adjustments anyway. > >> > >> > > Linux 2.6.18 does consider xen persistent time as the platform > > CMOS clock, but I don''t think this is what we actually want: if we > run > > ntpd in dom0 we probably want to sync xen time with dom0 time more > often > > than linux usually update the CMOS clock. > > In particular we want that as soon as ntpd in dom0 set the right > time, > > it gets propagated in xen so that all the PV guests created after > that > > moment can read the right wallclock at boot. > > > > If you''re making the assumption that a guest will query the Xen > wallclock time precisely once, then I guess that will work OK. But if > you expect them to query it more than once, then you need to add a > notion of drift correction to Xen''s system clock so that you can warp > it > in a continuous fashion. But that complicates things because dom0 > itself is using the Xen system clock as its own timebase which it is > also applying drift correction to. So to make that work you either > need > to introduce distinct notions of corrected and uncorrected Xen clocks > and make domains query them appropriately, or more deeply PV time in > the > domains so they directly use/adjust the Xen drift factor rather than > maintaining their own. > > > > I think that the right approach to achieve that is to break the > > assumption that xen persistent time is like the CMOS and treat it > more > > like xtime instead. > > > > I don''t really see why wallclock time is any of Xen''s business. It > seems like something that could be completely dropped with no loss of > functionality. It''s somewhat useful for domains to be able to fetch an > approximately correct wallclock time early in boot, but aside from that > its fairly useless. > > I would prefer to see: > > * dom0 gets time of day from CMOS or whatever hardware mechanism it > wants > * PV domUs get a rough time of day at boot either from a simple > "seconds since 1970" in the startup info put there by the domain > builder from dom0''s clock (so there''s no illusion that it is > anything other than a one-time start time for the domain) > * HVM domains similarly get an emulated CMOS implemented in qemu > * anyone who wants continuously tracked accurate time needs to run > ntp > > and deprecate any notion of wallclock time from Xen. > > J > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Campbell
2010-Feb-19 09:10 UTC
[Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock
On Thu, 2010-02-18 at 23:43 +0000, Jeremy Fitzhardinge wrote:> * PV domUs get a rough time of day at boot either from a simple > "seconds since 1970" in the startup info put there by the domain > builder from dom0''s clock (so there''s no illusion that it is > anything other than a one-time start time for the domain)Unfortunately there is an existing large installed base of guests which use the dependent wallclock mode since it is the default in oldstyle Xen kernels.> * HVM domains similarly get an emulated CMOS implemented in qemuIsn''t this currently emulated within the hypervisor using the wallclock time within Xen? I assume that it was pulled into the h/v for a specific reason? I''m not sure what reason though, the emulated RTC doesn''t seem especially performance critical... Ian. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2010-Feb-19 09:39 UTC
Re: [Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock
On 19/02/2010 09:10, "Ian Campbell" <Ian.Campbell@citrix.com> wrote:> On Thu, 2010-02-18 at 23:43 +0000, Jeremy Fitzhardinge wrote: >> * PV domUs get a rough time of day at boot either from a simple >> "seconds since 1970" in the startup info put there by the domain >> builder from dom0''s clock (so there''s no illusion that it is >> anything other than a one-time start time for the domain) > > Unfortunately there is an existing large installed base of guests which > use the dependent wallclock mode since it is the default in oldstyle Xen > kernels.That still plays okay with dom0 setting Xen wallclock only once, or not at all (since Xen primes its WC values from CMOS). The wallclock values would then never change and old kernels naturally just pick up WC at boot and never again. And their default mode is also compatible with them running ntpd -- if the kernel detects ntp sync then it changes/reduces how it syncs with Xen. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2010-Feb-19 23:05 UTC
[Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock
On 02/19/2010 01:10 AM, Ian Campbell wrote:> Unfortunately there is an existing large installed base of guests which > use the dependent wallclock mode since it is the default in oldstyle Xen > kernels. >Sure. I''m not suggesting that we remove the existing hypercall. I just don''t think we should extend it or encourage its use.>> * HVM domains similarly get an emulated CMOS implemented in qemu >> > Isn''t this currently emulated within the hypervisor using the wallclock > time within Xen? I assume that it was pulled into the h/v for a specific > reason? I''m not sure what reason though, the emulated RTC doesn''t seem > especially performance critical... >It is? I wonder why we bothered adding a pvop for it then... J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Stefano Stabellini
2010-Feb-23 12:13 UTC
Re: [Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock
on fri, 19 feb 2010, keir fraser wrote:> on 19/02/2010 09:10, "ian campbell" <ian.campbell@citrix.com> wrote: > > > on thu, 2010-02-18 at 23:43 +0000, jeremy fitzhardinge wrote: > >> * pv domus get a rough time of day at boot either from a simple > >> "seconds since 1970" in the startup info put there by the domain > >> builder from dom0''s clock (so there''s no illusion that it is > >> anything other than a one-time start time for the domain) > > > > unfortunately there is an existing large installed base of guests which > > use the dependent wallclock mode since it is the default in oldstyle xen > > kernels. > > that still plays okay with dom0 setting xen wallclock only once, or not at > all (since xen primes its wc values from cmos). the wallclock values would > then never change and old kernels naturally just pick up wc at boot and > never again. and their default mode is also compatible with them running > ntpd -- if the kernel detects ntp sync then it changes/reduces how it syncs > with xen. >Even if we decide that dom0 should sync the wallclock only once with xen, ideally right after ntp sync''s, we still need to hook into clock_was_set and therefore do something very similar to what my patch does (apart from the timer to keep updating the xen wallclock) because otherwise if we just use the pvop set_wallclock it might take several minutes before having a single wallclock update. The reason for this is that the wallclock update policy of the kernel is very lax and I don''t think it is suitable for our use case; in particular the classic initial time sync done with ntpdate at boot time by most distros doesn''t propagate the wallclock to the hardware and\or xen. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Campbell
2010-Feb-23 14:25 UTC
[Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock
On Fri, 2010-02-19 at 23:05 +0000, Jeremy Fitzhardinge wrote:> > > Unfortunately there is an existing large installed base of guests > which > > use the dependent wallclock mode since it is the default in oldstyle > Xen > > kernels. > > > > Sure. I''m not suggesting that we remove the existing hypercall. I > just don''t think we should extend it or encourage its use.We currently have a large installed base of guests which use dependent wallclock and therefore expect the Xen wallclock to be correct when they start and to be remain roughly in sync thereafter (and these guests cope with the sawtoothing this implies). I think the first half of that behaviour (correct when they start) is most crucial here. As it stands the Xen wallclock time doesn''t get updated for several minutes after boot because ntpdate doesn''t cause a sync to the hypervisor nor hardware RTC and ntpd takes ages to decide it has syncd which means that any PV guests which start in that interval and use dependent wallclock (which is currently most) will see a large time discontinuity whenever ntpd in domain 0 is happy enough to actually sync the RTC (which as a side-effect pushes a new time into Xen) not to mention they will be running with some bogus time for that entire interval. Hooking clock_was_set fixes this issue If we choose not to include the timer aspect which keeps the hypervisor wallclock roughly in sync we need to communicate this change to. They now need to run ntpd in their guest where previously we advised the opposite (for example in http://wiki.xensource.com/xenwiki/InstallationNotes but this also impacts distro documentation and google-fu). Ian. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Campbell
2010-Feb-23 14:25 UTC
Re: [Xen-devel] Re: [PATCH] [PVOPS] dom0 sync xen wallclock
On Fri, 2010-02-19 at 09:39 +0000, Keir Fraser wrote:> On 19/02/2010 09:10, "Ian Campbell" <Ian.Campbell@citrix.com> wrote: > > > On Thu, 2010-02-18 at 23:43 +0000, Jeremy Fitzhardinge wrote: > >> * PV domUs get a rough time of day at boot either from a simple > >> "seconds since 1970" in the startup info put there by the domain > >> builder from dom0''s clock (so there''s no illusion that it is > >> anything other than a one-time start time for the domain) > > > > Unfortunately there is an existing large installed base of guests which > > use the dependent wallclock mode since it is the default in oldstyle Xen > > kernels. > > That still plays okay with dom0 setting Xen wallclock only once, or not at > all (since Xen primes its WC values from CMOS).I think we should try and arrange to do it once so we cope more gracefully with wildly incorrect CMOS clocks.> The wallclock values would > then never change and old kernels naturally just pick up WC at boot and > never again. And their default mode is also compatible with them running > ntpd -- if the kernel detects ntp sync then it changes/reduces how it syncs > with Xen.But these guests won''t be running ntpd because they expect domain 0 to be doing it for them, via the dependent wallclock mode. I don''t disagree that we could transition to a better solution but we are currently not providing the host level behaviour that existing guests expect. Ian. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel