X86: Prefer TSC-deadline timer in Xen
The new TSC Deadline Timer offers system software a low overhead
per-logical-thread deadline timer in TSC units.
The timer is implemented via a new architectural 64-bit register,
IA32_TSC_DEADLINE_MSR. Reads and writes of this MSR occur in program order,
but are non-serializing.
The support for this feature is indicated by
CPUID.01H:ECX.TSC_Deadline[bit 24] = 1 as documented in the Intel Architectures
Software Developer''s Manual.
The LOCAL APIC on new processors has a mode where its underlying hardware timer
can now be accessed via the non-serializing IA32_TSC_DEADLINE_MSR in TSC tick
units.
If this mode is present, prefer it over the traditional LAPIC timer mode.
KERN_DEBUG dmesg will print "TSC deadline timer enabled" when TDT is
used.
Bootparam "tdt_off" is available to revert to LAPIC timer mode.
This patch is based on original work by Len Brown for Linux kernel.
cc: Len Brown <len.brown@intel.com>
Signed-off-by: Wei Gang <gang.wei@intel.com>
diff -r 00b92112b055 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c	Wed Oct 20 17:26:51 2010 +0100
+++ b/xen/arch/x86/apic.c	Fri Oct 29 19:24:56 2010 +0800
@@ -37,6 +37,15 @@
 #include <asm/asm_defns.h> /* for BUILD_SMP_INTERRUPT */
 #include <mach_apic.h>
 #include <io_ports.h>
+
+#define APIC_TIMER_MODE_ONESHOT         (0 << 17)
+#define APIC_TIMER_MODE_PERIODIC        (1 << 17)
+#define APIC_TIMER_MODE_TSC_DEADLINE    (2 << 17)
+#define APIC_TIMER_MODE_MASK            (3 << 17)
+
+static int tdt_enabled;
+static int tdt_disable;
+boolean_param("tdt_off", tdt_disable);
 
 static struct {
     int active;
@@ -1198,6 +1207,13 @@ static void __setup_APIC_LVTT(unsigned i
     lvtt_value = /*APIC_LVT_TIMER_PERIODIC |*/ LOCAL_TIMER_VECTOR;
     if (!APIC_INTEGRATED(ver))
         lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
+
+    if (tdt_enabled)
+    {
+        lvtt_value &= (~APIC_TIMER_MODE_MASK);
+        lvtt_value |= APIC_TIMER_MODE_TSC_DEADLINE;
+    }
+
     apic_write_around(APIC_LVTT, lvtt_value);
 
     tmp_value = apic_read(APIC_TDCR);
@@ -1311,6 +1327,12 @@ void __init setup_boot_APIC_clock(void)
 
     calibrate_APIC_clock();
 
+    if (!tdt_disable && boot_cpu_has(X86_FEATURE_TSC_DEADLINE))
+    {
+        printk(KERN_DEBUG "TSC deadline timer enabled\n");
+        tdt_enabled = 1;
+    }
+
     setup_APIC_timer();
     
     local_irq_restore(flags);
@@ -1360,12 +1382,24 @@ int reprogram_timer(s_time_t timeout)
     if ( !cpu_has_apic )
         return 1;
 
-    if ( timeout && ((expire = timeout - NOW()) > 0) )
-        apic_tmict = min_t(u64, (bus_scale * expire) >> 18, UINT_MAX);
-
-    apic_write(APIC_TMICT, (unsigned long)apic_tmict);
-
-    return apic_tmict || !timeout;
+    if ( tdt_enabled )
+    {
+        u64 tsc = 0;
+
+        if ( timeout )
+            tsc = stime2tsc(timeout);
+
+        wrmsrl(MSR_IA32_TSC_DEADLINE, tsc);
+    }
+    else
+    {
+        if ( timeout && ((expire = timeout - NOW()) > 0) )
+            apic_tmict = min_t(u64, (bus_scale * expire) >> 18,
UINT_MAX);
+
+        apic_write(APIC_TMICT, (unsigned long)apic_tmict);
+    }
+
+    return apic_tmict || !timeout || tdt_enabled;
 }
 
 fastcall void smp_apic_timer_interrupt(struct cpu_user_regs * regs)
diff -r 00b92112b055 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Wed Oct 20 17:26:51 2010 +0100
+++ b/xen/arch/x86/time.c	Fri Oct 29 19:28:07 2010 +0800
@@ -662,26 +662,31 @@ static void __init init_platform_timer(v
            freq_string(pts->frequency), pts->name);
 }
 
-void cstate_restore_tsc(void)
+u64 stime2tsc(s_time_t stime)
 {
     struct cpu_time *t;
     struct time_scale sys_to_tsc;
     s_time_t stime_delta;
-    u64 new_tsc;
-
+    u64 tsc;
+
+    t = &this_cpu(cpu_time);
+    sys_to_tsc = scale_reciprocal(t->tsc_scale);
+
+    stime_delta = stime - t->stime_local_stamp;
+    if ( stime_delta < 0 )
+        stime_delta = 0;
+
+    tsc = t->local_tsc_stamp + scale_delta(stime_delta, &sys_to_tsc);
+
+    return tsc;
+}
+
+void cstate_restore_tsc(void)
+{
     if ( boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
         return;
 
-    t = &this_cpu(cpu_time);
-    sys_to_tsc = scale_reciprocal(t->tsc_scale);
-
-    stime_delta = read_platform_stime() - t->stime_master_stamp;
-    if ( stime_delta < 0 )
-        stime_delta = 0;
-
-    new_tsc = t->local_tsc_stamp + scale_delta(stime_delta,
&sys_to_tsc);
-
-    write_tsc(new_tsc);
+    write_tsc(stime2tsc(read_platform_stime()));
 }
 
 /***************************************************************************
diff -r 00b92112b055 xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h	Wed Oct 20 17:26:51 2010 +0100
+++ b/xen/include/asm-x86/cpufeature.h	Fri Oct 29 19:21:36 2010 +0800
@@ -99,6 +99,7 @@
 #define X86_FEATURE_SSE4_2	(4*32+20) /* Streaming SIMD Extensions 4.2 */
 #define X86_FEATURE_X2APIC	(4*32+21) /* Extended xAPIC */
 #define X86_FEATURE_POPCNT	(4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE (4*32+24) /* "tdt" TSC Deadline
Timer */
 #define X86_FEATURE_XSAVE	(4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
 #define X86_FEATURE_OSXSAVE	(4*32+27) /* OSXSAVE */
 #define X86_FEATURE_HYPERVISOR	(4*32+31) /* Running under some hypervisor */
diff -r 00b92112b055 xen/include/asm-x86/msr-index.h
--- a/xen/include/asm-x86/msr-index.h	Wed Oct 20 17:26:51 2010 +0100
+++ b/xen/include/asm-x86/msr-index.h	Fri Oct 29 19:21:36 2010 +0800
@@ -327,6 +327,8 @@
 #define MSR_IA32_MISC_ENABLE_MONITOR_ENABLE (1<<18)
 #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID  (1<<22)
 #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1<<23)
+
+#define MSR_IA32_TSC_DEADLINE		0x000006E0
 
 /* Intel Model 6 */
 #define MSR_P6_EVNTSEL0			0x00000186
diff -r 00b92112b055 xen/include/xen/time.h
--- a/xen/include/xen/time.h	Wed Oct 20 17:26:51 2010 +0100
+++ b/xen/include/xen/time.h	Fri Oct 29 19:21:36 2010 +0800
@@ -31,6 +31,7 @@ typedef s64 s_time_t;
 typedef s64 s_time_t;
 
 s_time_t get_s_time(void);
+u64 stime2tsc(s_time_t stime);
 unsigned long get_localtime(struct domain *d);
 
 struct tm {
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Jan Beulich
2010-Oct-28  07:46 UTC
Re: [Xen-devel] [PATCH] X86: Prefer TSC-deadline timer in Xen
>>> On 28.10.10 at 07:45, "Wei, Gang" <gang.wei@intel.com> wrote: > --- a/xen/arch/x86/apic.c Wed Oct 20 17:26:51 2010 +0100 > +++ b/xen/arch/x86/apic.c Fri Oct 29 19:24:56 2010 +0800 > @@ -37,6 +37,15 @@ > #include <asm/asm_defns.h> /* for BUILD_SMP_INTERRUPT */ > #include <mach_apic.h> > #include <io_ports.h> > + > +#define APIC_TIMER_MODE_ONESHOT (0 << 17) > +#define APIC_TIMER_MODE_PERIODIC (1 << 17) > +#define APIC_TIMER_MODE_TSC_DEADLINE (2 << 17) > +#define APIC_TIMER_MODE_MASK (3 << 17) > + > +static int tdt_enabled; > +static int tdt_disable; > +boolean_param("tdt_off", tdt_disable);It would be more natural to call the parameter just "tdt", and use a non-zero initialized variable that gets set to zero when the user passes "tdt=off" (or another of the boolean false indicators). Perhaps you could even get away with just the single "tdt_enabled" variable then.> @@ -1360,12 +1382,24 @@ int reprogram_timer(s_time_t timeout) > if ( !cpu_has_apic ) > return 1; > > - if ( timeout && ((expire = timeout - NOW()) > 0) ) > - apic_tmict = min_t(u64, (bus_scale * expire) >> 18, UINT_MAX); > - > - apic_write(APIC_TMICT, (unsigned long)apic_tmict); > - > - return apic_tmict || !timeout; > + if ( tdt_enabled ) > + { > + u64 tsc = 0;Is zero really a proper "no-timeout" indicator here?> + > + if ( timeout ) > + tsc = stime2tsc(timeout); > + > + wrmsrl(MSR_IA32_TSC_DEADLINE, tsc); > + } > + else > + { > + if ( timeout && ((expire = timeout - NOW()) > 0) ) > + apic_tmict = min_t(u64, (bus_scale * expire) >> 18, UINT_MAX); > + > + apic_write(APIC_TMICT, (unsigned long)apic_tmict); > + } > + > + return apic_tmict || !timeout || tdt_enabled;How can this always be successful if tdt_enabled? Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Wei, Gang
2010-Oct-28  09:48 UTC
RE: [Xen-devel] [PATCH] X86: Prefer TSC-deadline timer in Xen
> From: Jan Beulich [mailto:JBeulich@novell.com], Thursday, October 28, 2010 3:46 PM > >>> On 28.10.10 at 07:45, "Wei, Gang" <gang.wei@intel.com> wrote: > > --- a/xen/arch/x86/apic.c Wed Oct 20 17:26:51 2010 +0100 > > +++ b/xen/arch/x86/apic.c Fri Oct 29 19:24:56 2010 +0800 > > @@ -37,6 +37,15 @@ > > #include <asm/asm_defns.h> /* for BUILD_SMP_INTERRUPT */ > > #include <mach_apic.h> > > #include <io_ports.h> > > + > > +#define APIC_TIMER_MODE_ONESHOT (0 << 17) > > +#define APIC_TIMER_MODE_PERIODIC (1 << 17) > > +#define APIC_TIMER_MODE_TSC_DEADLINE (2 << 17) > > +#define APIC_TIMER_MODE_MASK (3 << 17) > > + > > +static int tdt_enabled; > > +static int tdt_disable; > > +boolean_param("tdt_off", tdt_disable); > > It would be more natural to call the parameter just "tdt", and > use a non-zero initialized variable that gets set to zero when > the user passes "tdt=off" (or another of the boolean false > indicators). Perhaps you could even get away with just the > single "tdt_enabled" variable then.Rename the parameter should be ok. But I prefer to keep two variable there to avoid check both tdt_enabled & boot_cpu_has(X86_FEATURE_TSC_DEADLINE) everywhere. I would like to accept user input "tdt=off", and replace tdt_disable with tdt_enable.> > @@ -1360,12 +1382,24 @@ int reprogram_timer(s_time_t timeout) > > if ( !cpu_has_apic ) > > return 1; > > > > - if ( timeout && ((expire = timeout - NOW()) > 0) ) > > - apic_tmict = min_t(u64, (bus_scale * expire) >> 18, UINT_MAX); > > - > > - apic_write(APIC_TMICT, (unsigned long)apic_tmict); > > - > > - return apic_tmict || !timeout; > > + if ( tdt_enabled ) > > + { > > + u64 tsc = 0; > > Is zero really a proper "no-timeout" indicator here?Yes, it is. Writing zero to MSR_IA32_TSC_DEADLINE will disarm the tdt according to SDM.> > + > > + if ( timeout ) > > + tsc = stime2tsc(timeout); > > + > > + wrmsrl(MSR_IA32_TSC_DEADLINE, tsc); > > + } > > + else > > + { > > + if ( timeout && ((expire = timeout - NOW()) > 0) ) > > + apic_tmict = min_t(u64, (bus_scale * expire) >> 18, > UINT_MAX); > > + > > + apic_write(APIC_TMICT, (unsigned long)apic_tmict); > > + } > > + > > + return apic_tmict || !timeout || tdt_enabled; > > How can this always be successful if tdt_enabled?If tdt_enabled, there are only three cases: 1st, timeout=0, then write 0 to tdt msr to stop timer, return successful; 2nd, timeout <= NOW(), a tsc value less than or equal current tsc will be written to tdt msr, then a expiring interrupt will be generated right now, return successful; 3rd, timeout > NOW(), a tsc value > current tsc will be written to tdt msr, also return successful. No need to return failed if tdt_enabled. Jimmy _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2010-Oct-28  11:42 UTC
RE: [Xen-devel] [PATCH] X86: Prefer TSC-deadline timer in Xen
>>> On 28.10.10 at 11:48, "Wei, Gang" <gang.wei@intel.com> wrote: >> From: Jan Beulich [mailto:JBeulich@novell.com], Thursday, October 28, 2010 > 3:46 PM >> >>> On 28.10.10 at 07:45, "Wei, Gang" <gang.wei@intel.com> wrote: >> > --- a/xen/arch/x86/apic.c Wed Oct 20 17:26:51 2010 +0100 >> > +++ b/xen/arch/x86/apic.c Fri Oct 29 19:24:56 2010 +0800 >> > @@ -37,6 +37,15 @@ >> > #include <asm/asm_defns.h> /* for BUILD_SMP_INTERRUPT */ >> > #include <mach_apic.h> >> > #include <io_ports.h> >> > + >> > +#define APIC_TIMER_MODE_ONESHOT (0 << 17) >> > +#define APIC_TIMER_MODE_PERIODIC (1 << 17) >> > +#define APIC_TIMER_MODE_TSC_DEADLINE (2 << 17) >> > +#define APIC_TIMER_MODE_MASK (3 << 17) >> > + >> > +static int tdt_enabled; >> > +static int tdt_disable; >> > +boolean_param("tdt_off", tdt_disable); >> >> It would be more natural to call the parameter just "tdt", and >> use a non-zero initialized variable that gets set to zero when >> the user passes "tdt=off" (or another of the boolean false >> indicators). Perhaps you could even get away with just the >> single "tdt_enabled" variable then. > > Rename the parameter should be ok. But I prefer to keep two variable there > to avoid check both tdt_enabled & boot_cpu_has(X86_FEATURE_TSC_DEADLINE) > everywhere.Why? Just clear tdt_enabled when you find !boot_cpu_has(X86_FEATURE_TSC_DEADLINE) during initialization. And btw., this (or if you really want to keep them separate, both) variable(s) are pretty reasonable candidates for __read_mostly.>> > @@ -1360,12 +1382,24 @@ int reprogram_timer(s_time_t timeout) >> > if ( !cpu_has_apic ) >> > return 1; >> > >> > - if ( timeout && ((expire = timeout - NOW()) > 0) ) >> > - apic_tmict = min_t(u64, (bus_scale * expire) >> 18, UINT_MAX); >> > - >> > - apic_write(APIC_TMICT, (unsigned long)apic_tmict); >> > - >> > - return apic_tmict || !timeout; >> > + if ( tdt_enabled ) >> > + { >> > + u64 tsc = 0; >> >> Is zero really a proper "no-timeout" indicator here? > > Yes, it is. Writing zero to MSR_IA32_TSC_DEADLINE will disarm the tdt > according to SDM.Okay, then (albeit unlikely) you should check that you don''t unintentionally write zero into the MSR.>> > + >> > + if ( timeout ) >> > + tsc = stime2tsc(timeout); >> > + >> > + wrmsrl(MSR_IA32_TSC_DEADLINE, tsc); >> > + } >> > + else >> > + { >> > + if ( timeout && ((expire = timeout - NOW()) > 0) ) >> > + apic_tmict = min_t(u64, (bus_scale * expire) >> 18, >> UINT_MAX); >> > + >> > + apic_write(APIC_TMICT, (unsigned long)apic_tmict); >> > + } >> > + >> > + return apic_tmict || !timeout || tdt_enabled; >> >> How can this always be successful if tdt_enabled? > > If tdt_enabled, there are only three cases: 1st, timeout=0, then write 0 to > tdt msr to stop timer, return successful; 2nd, timeout <= NOW(), a tsc value > less than or equal current tsc will be written to tdt msr, then a expiring > interrupt will be generated right now, return successful; 3rd, timeout > > NOW(), a tsc value > current tsc will be written to tdt msr, also return > successful. No need to return failed if tdt_enabled.Ah, okay - I should have fully read the doc first. Sorry for the noise. Now rather than extending the return expression, why don''t you "return 1" inside the if()''s body (and in that case you could leave the original code mostly unchanged since then you also don''t need an else). Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Wei, Gang
2010-Oct-28  13:54 UTC
RE: [Xen-devel] [PATCH] X86: Prefer TSC-deadline timer in Xen
> From: Jan Beulich [mailto:JBeulich@novell.com], October 28, 2010 7:43 PM > >>> On 28.10.10 at 11:48, "Wei, Gang" <gang.wei@intel.com> wrote: > >> From: Jan Beulich [mailto:JBeulich@novell.com], Thursday, October 28, > 2010 > > 3:46 PM > >> >>> On 28.10.10 at 07:45, "Wei, Gang" <gang.wei@intel.com> wrote: > >> > --- a/xen/arch/x86/apic.c Wed Oct 20 17:26:51 2010 +0100 > >> > +++ b/xen/arch/x86/apic.c Fri Oct 29 19:24:56 2010 +0800 > >> > @@ -37,6 +37,15 @@ > >> > #include <asm/asm_defns.h> /* for BUILD_SMP_INTERRUPT */ > >> > #include <mach_apic.h> > >> > #include <io_ports.h> > >> > + > >> > +#define APIC_TIMER_MODE_ONESHOT (0 << 17) > >> > +#define APIC_TIMER_MODE_PERIODIC (1 << 17) > >> > +#define APIC_TIMER_MODE_TSC_DEADLINE (2 << 17) > >> > +#define APIC_TIMER_MODE_MASK (3 << 17) > >> > + > >> > +static int tdt_enabled; > >> > +static int tdt_disable; > >> > +boolean_param("tdt_off", tdt_disable); > >> > >> It would be more natural to call the parameter just "tdt", and > >> use a non-zero initialized variable that gets set to zero when > >> the user passes "tdt=off" (or another of the boolean false > >> indicators). Perhaps you could even get away with just the > >> single "tdt_enabled" variable then. > > > > Rename the parameter should be ok. But I prefer to keep two variable there > > to avoid check both tdt_enabled & > boot_cpu_has(X86_FEATURE_TSC_DEADLINE) > > everywhere. > > Why? Just clear tdt_enabled when you find > !boot_cpu_has(X86_FEATURE_TSC_DEADLINE) during initialization. > > And btw., this (or if you really want to keep them separate, both) > variable(s) are pretty reasonable candidates for __read_mostly.I still want to keep them because __setup_APIC_LVTT() will be called multiple times - the first call with tdt_enabled == false, and the following calls with tdt_enabled == true. I will add __read_mostly for the two variables.> > >> > @@ -1360,12 +1382,24 @@ int reprogram_timer(s_time_t timeout) > >> > if ( !cpu_has_apic ) > >> > return 1; > >> > > >> > - if ( timeout && ((expire = timeout - NOW()) > 0) ) > >> > - apic_tmict = min_t(u64, (bus_scale * expire) >> 18, UINT_MAX); > >> > - > >> > - apic_write(APIC_TMICT, (unsigned long)apic_tmict); > >> > - > >> > - return apic_tmict || !timeout; > >> > + if ( tdt_enabled ) > >> > + { > >> > + u64 tsc = 0; > >> > >> Is zero really a proper "no-timeout" indicator here? > > > > Yes, it is. Writing zero to MSR_IA32_TSC_DEADLINE will disarm the tdt > > according to SDM. > > Okay, then (albeit unlikely) you should check that you don''t > unintentionally write zero into the MSR.I am sure this MSR will only be written with zero while timeout is 0.> > >> > + > >> > + if ( timeout ) > >> > + tsc = stime2tsc(timeout); > >> > + > >> > + wrmsrl(MSR_IA32_TSC_DEADLINE, tsc); > >> > + } > >> > + else > >> > + { > >> > + if ( timeout && ((expire = timeout - NOW()) > 0) ) > >> > + apic_tmict = min_t(u64, (bus_scale * expire) >> 18, > >> UINT_MAX); > >> > + > >> > + apic_write(APIC_TMICT, (unsigned long)apic_tmict); > >> > + } > >> > + > >> > + return apic_tmict || !timeout || tdt_enabled; > >> > >> How can this always be successful if tdt_enabled? > > > > If tdt_enabled, there are only three cases: 1st, timeout=0, then write 0 to > > tdt msr to stop timer, return successful; 2nd, timeout <= NOW(), a tsc value > > less than or equal current tsc will be written to tdt msr, then a expiring > > interrupt will be generated right now, return successful; 3rd, timeout > > > NOW(), a tsc value > current tsc will be written to tdt msr, also return > > successful. No need to return failed if tdt_enabled. > > Ah, okay - I should have fully read the doc first. Sorry for the > noise. Now rather than extending the return expression, why > don''t you "return 1" inside the if()''s body (and in that case > you could leave the original code mostly unchanged since then > you also don''t need an else).Yes, this is also a good comment. I will take it even it has the side effect of adding a return point in the middle of the function. Jimmy _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Wei, Gang
2010-Oct-28  14:17 UTC
RE: [Xen-devel] [PATCH] X86: Prefer TSC-deadline timer in Xen
Resend after merging Jan''s comments.
-Jimmy
---------------------------------------------------
X86: Prefer TSC-deadline timer in Xen
The new TSC Deadline Timer offers system software a low overhead
per-logical-thread deadline timer in TSC units.
The timer is implemented via a new architectural 64-bit register,
IA32_TSC_DEADLINE_MSR. Reads and writes of this MSR occur in program order,
but are non-serializing.
The support for this feature is indicated by
CPUID.01H:ECX.TSC_Deadline[bit 24] = 1 as documented in the Intel Architectures
Software Developer''s Manual.
The LOCAL APIC on new processors has a mode where its underlying hardware timer
can now be accessed via the non-serializing IA32_TSC_DEADLINE_MSR in TSC tick
units.
If this mode is present, prefer it over the traditional LAPIC timer mode.
KERN_DEBUG dmesg will print "TSC deadline timer enabled" when TDT is
used.
Bootparam "tdt=off" is available to revert to LAPIC timer mode.
This patch is based on original work by Len Brown for Linux kernel.
cc: Len Brown <len.brown@intel.com>
Signed-off-by: Wei Gang <gang.wei@intel.com>
diff -r 0dc0bc411035 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c	Thu Oct 21 18:51:36 2010 +0100
+++ b/xen/arch/x86/apic.c	Sat Oct 30 04:03:03 2010 +0800
@@ -37,6 +37,15 @@
 #include <asm/asm_defns.h> /* for BUILD_SMP_INTERRUPT */
 #include <mach_apic.h>
 #include <io_ports.h>
+
+#define APIC_TIMER_MODE_ONESHOT         (0 << 17)
+#define APIC_TIMER_MODE_PERIODIC        (1 << 17)
+#define APIC_TIMER_MODE_TSC_DEADLINE    (2 << 17)
+#define APIC_TIMER_MODE_MASK            (3 << 17)
+
+static int tdt_enabled __read_mostly;
+static int tdt_enable __read_mostly = 1;
+boolean_param("tdt", tdt_enable);
 
 static struct {
     int active;
@@ -1198,6 +1207,13 @@ static void __setup_APIC_LVTT(unsigned i
     lvtt_value = /*APIC_LVT_TIMER_PERIODIC |*/ LOCAL_TIMER_VECTOR;
     if (!APIC_INTEGRATED(ver))
         lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
+
+    if ( tdt_enabled )
+    {
+        lvtt_value &= (~APIC_TIMER_MODE_MASK);
+        lvtt_value |= APIC_TIMER_MODE_TSC_DEADLINE;
+    }
+
     apic_write_around(APIC_LVTT, lvtt_value);
 
     tmp_value = apic_read(APIC_TDCR);
@@ -1311,6 +1327,12 @@ void __init setup_boot_APIC_clock(void)
 
     calibrate_APIC_clock();
 
+    if ( tdt_enable && boot_cpu_has(X86_FEATURE_TSC_DEADLINE) )
+    {
+        printk(KERN_DEBUG "TSC deadline timer enabled\n");
+        tdt_enabled = 1;
+    }
+
     setup_APIC_timer();
     
     local_irq_restore(flags);
@@ -1360,10 +1382,24 @@ int reprogram_timer(s_time_t timeout)
     if ( !cpu_has_apic )
         return 1;
 
-    if ( timeout && ((expire = timeout - NOW()) > 0) )
-        apic_tmict = min_t(u64, (bus_scale * expire) >> 18, UINT_MAX);
-
-    apic_write(APIC_TMICT, (unsigned long)apic_tmict);
+    if ( tdt_enabled )
+    {
+        u64 tsc = 0;
+
+        if ( timeout )
+            tsc = stime2tsc(timeout);
+
+        wrmsrl(MSR_IA32_TSC_DEADLINE, tsc);
+
+        return 1;
+    }
+    else
+    {
+        if ( timeout && ((expire = timeout - NOW()) > 0) )
+            apic_tmict = min_t(u64, (bus_scale * expire) >> 18,
UINT_MAX);
+
+        apic_write(APIC_TMICT, (unsigned long)apic_tmict);
+    }
 
     return apic_tmict || !timeout;
 }
diff -r 0dc0bc411035 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Thu Oct 21 18:51:36 2010 +0100
+++ b/xen/arch/x86/time.c	Fri Oct 29 19:30:33 2010 +0800
@@ -662,26 +662,31 @@ static void __init init_platform_timer(v
            freq_string(pts->frequency), pts->name);
 }
 
-void cstate_restore_tsc(void)
+u64 stime2tsc(s_time_t stime)
 {
     struct cpu_time *t;
     struct time_scale sys_to_tsc;
     s_time_t stime_delta;
-    u64 new_tsc;
-
+    u64 tsc;
+
+    t = &this_cpu(cpu_time);
+    sys_to_tsc = scale_reciprocal(t->tsc_scale);
+
+    stime_delta = stime - t->stime_local_stamp;
+    if ( stime_delta < 0 )
+        stime_delta = 0;
+
+    tsc = t->local_tsc_stamp + scale_delta(stime_delta, &sys_to_tsc);
+
+    return tsc;
+}
+
+void cstate_restore_tsc(void)
+{
     if ( boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
         return;
 
-    t = &this_cpu(cpu_time);
-    sys_to_tsc = scale_reciprocal(t->tsc_scale);
-
-    stime_delta = read_platform_stime() - t->stime_master_stamp;
-    if ( stime_delta < 0 )
-        stime_delta = 0;
-
-    new_tsc = t->local_tsc_stamp + scale_delta(stime_delta,
&sys_to_tsc);
-
-    write_tsc(new_tsc);
+    write_tsc(stime2tsc(read_platform_stime()));
 }
 
 /***************************************************************************
diff -r 0dc0bc411035 xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h	Thu Oct 21 18:51:36 2010 +0100
+++ b/xen/include/asm-x86/cpufeature.h	Fri Oct 29 19:30:33 2010 +0800
@@ -99,6 +99,7 @@
 #define X86_FEATURE_SSE4_2	(4*32+20) /* Streaming SIMD Extensions 4.2 */
 #define X86_FEATURE_X2APIC	(4*32+21) /* Extended xAPIC */
 #define X86_FEATURE_POPCNT	(4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE (4*32+24) /* "tdt" TSC Deadline
Timer */
 #define X86_FEATURE_XSAVE	(4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
 #define X86_FEATURE_OSXSAVE	(4*32+27) /* OSXSAVE */
 #define X86_FEATURE_HYPERVISOR	(4*32+31) /* Running under some hypervisor */
diff -r 0dc0bc411035 xen/include/asm-x86/msr-index.h
--- a/xen/include/asm-x86/msr-index.h	Thu Oct 21 18:51:36 2010 +0100
+++ b/xen/include/asm-x86/msr-index.h	Fri Oct 29 19:30:33 2010 +0800
@@ -327,6 +327,8 @@
 #define MSR_IA32_MISC_ENABLE_MONITOR_ENABLE (1<<18)
 #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID  (1<<22)
 #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1<<23)
+
+#define MSR_IA32_TSC_DEADLINE		0x000006E0
 
 /* Intel Model 6 */
 #define MSR_P6_EVNTSEL0			0x00000186
diff -r 0dc0bc411035 xen/include/xen/time.h
--- a/xen/include/xen/time.h	Thu Oct 21 18:51:36 2010 +0100
+++ b/xen/include/xen/time.h	Fri Oct 29 19:30:33 2010 +0800
@@ -31,6 +31,7 @@ typedef s64 s_time_t;
 typedef s64 s_time_t;
 
 s_time_t get_s_time(void);
+u64 stime2tsc(s_time_t stime);
 unsigned long get_localtime(struct domain *d);
 
 struct tm {
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Tim Deegan
2010-Oct-28  14:22 UTC
Re: [Xen-devel] [PATCH] X86: Prefer TSC-deadline timer in Xen
At 14:54 +0100 on 28 Oct (1288277689), Wei, Gang wrote:> > From: Jan Beulich [mailto:JBeulich@novell.com], October 28, 2010 7:43 PM > > >>> On 28.10.10 at 11:48, "Wei, Gang" <gang.wei@intel.com> wrote: > > >> From: Jan Beulich [mailto:JBeulich@novell.com], Thursday, October 28, > > 2010 > > > 3:46 PM > > >> >>> On 28.10.10 at 07:45, "Wei, Gang" <gang.wei@intel.com> wrote: > > >> > --- a/xen/arch/x86/apic.c Wed Oct 20 17:26:51 2010 +0100 > > >> > +++ b/xen/arch/x86/apic.c Fri Oct 29 19:24:56 2010 +0800 > > >> > @@ -37,6 +37,15 @@ > > >> > #include <asm/asm_defns.h> /* for BUILD_SMP_INTERRUPT */ > > >> > #include <mach_apic.h> > > >> > #include <io_ports.h> > > >> > + > > >> > +#define APIC_TIMER_MODE_ONESHOT (0 << 17) > > >> > +#define APIC_TIMER_MODE_PERIODIC (1 << 17) > > >> > +#define APIC_TIMER_MODE_TSC_DEADLINE (2 << 17) > > >> > +#define APIC_TIMER_MODE_MASK (3 << 17) > > >> > + > > >> > +static int tdt_enabled; > > >> > +static int tdt_disable; > > >> > +boolean_param("tdt_off", tdt_disable); > > >> > > >> It would be more natural to call the parameter just "tdt", and > > >> use a non-zero initialized variable that gets set to zero when > > >> the user passes "tdt=off" (or another of the boolean false > > >> indicators). Perhaps you could even get away with just the > > >> single "tdt_enabled" variable then. > > > > > > Rename the parameter should be ok. But I prefer to keep two variable there > > > to avoid check both tdt_enabled & > > boot_cpu_has(X86_FEATURE_TSC_DEADLINE) > > > everywhere. > > > > Why? Just clear tdt_enabled when you find > > !boot_cpu_has(X86_FEATURE_TSC_DEADLINE) during initialization. > > > > And btw., this (or if you really want to keep them separate, both) > > variable(s) are pretty reasonable candidates for __read_mostly. > > I still want to keep them because __setup_APIC_LVTT() will be called > multiple times - the first call with tdt_enabled == false, and the > following calls with tdt_enabled == true.Is that important? If so, please add explanatory comments in the appropriate places, because it''s not obvious that it''s happening, or why. Tim. -- Tim Deegan <Tim.Deegan@citrix.com> Principal Software Engineer, XenServer Engineering Citrix Systems UK Ltd. (Company #02937203, SL9 0BG) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2010-Oct-28  14:36 UTC
RE: [Xen-devel] [PATCH] X86: Prefer TSC-deadline timer in Xen
>>> On 28.10.10 at 16:17, "Wei, Gang" <gang.wei@intel.com> wrote: > +static int tdt_enabled __read_mostly; > +static int tdt_enable __read_mostly = 1;I wasn''t completely correct with my earlier statement regarding these: tdt_enable really should be __initdata, as it''s only referenced by an __init function.>- if ( timeout && ((expire = timeout - NOW()) > 0) ) >- apic_tmict = min_t(u64, (bus_scale * expire) >> 18, UINT_MAX); >- >- apic_write(APIC_TMICT, (unsigned long)apic_tmict); >+ if ( tdt_enabled ) >+ { >+ u64 tsc = 0; >+ >+ if ( timeout ) >+ tsc = stime2tsc(timeout); >+ >+ wrmsrl(MSR_IA32_TSC_DEADLINE, tsc); >+ >+ return 1; >+ } >+ else >+ { >+ if ( timeout && ((expire = timeout - NOW()) > 0) ) >+ apic_tmict = min_t(u64, (bus_scale * expire) >> 18, UINT_MAX); >+ >+ apic_write(APIC_TMICT, (unsigned long)apic_tmict); >+ } > > return apic_tmict || !timeout;So you still decided to keep the "else"? Without it the patch would be smaller and, at least to me, the function remain better readable... But that''s minor, or course, patch looks good to me regardless. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2010-Oct-28  14:42 UTC
Re: [Xen-devel] [PATCH] X86: Prefer TSC-deadline timer in Xen
On 28/10/2010 15:22, "Tim Deegan" <Tim.Deegan@citrix.com> wrote:>> I still want to keep them because __setup_APIC_LVTT() will be called >> multiple times - the first call with tdt_enabled == false, and the >> following calls with tdt_enabled == true. > > Is that important? If so, please add explanatory comments in the > appropriate places, because it''s not obvious that it''s happening, or why.Indeed, I often simplify patches when I merge them, and an apparently redundant foo_enabled variable would be first to go. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Wei, Gang
2010-Oct-28  15:01 UTC
RE: [Xen-devel] [PATCH] X86: Prefer TSC-deadline timer in Xen
> Tim Deegan [mailto:Tim.Deegan@citrix.com], Thursday, October 28, 2010 10:23 PM...> > > >> > +static int tdt_enabled; > > > >> > +static int tdt_disable; > > > >> > +boolean_param("tdt_off", tdt_disable); > > > >> > > > >> It would be more natural to call the parameter just "tdt", and > > > >> use a non-zero initialized variable that gets set to zero when > > > >> the user passes "tdt=off" (or another of the boolean false > > > >> indicators). Perhaps you could even get away with just the > > > >> single "tdt_enabled" variable then. > > > > > > > > Rename the parameter should be ok. But I prefer to keep two variable > there > > > > to avoid check both tdt_enabled & > > > boot_cpu_has(X86_FEATURE_TSC_DEADLINE) > > > > everywhere. > > > > > > Why? Just clear tdt_enabled when you find > > > !boot_cpu_has(X86_FEATURE_TSC_DEADLINE) during initialization. > > > > > > And btw., this (or if you really want to keep them separate, both) > > > variable(s) are pretty reasonable candidates for __read_mostly. > > > > I still want to keep them because __setup_APIC_LVTT() will be called > > multiple times - the first call with tdt_enabled == false, and the > > following calls with tdt_enabled == true. > > Is that important? If so, please add explanatory comments in the > appropriate places, because it''s not obvious that it''s happening, or why.Think it again, I should remove tdt_enable and just keep tdt_enabled, and skip calibrate_APIC_clock() while tdt_enabled & boot_cpu_has(X86_FEATURE_TSC_DEADLINE). Thus things become simpler. Jimmy _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Wei, Gang
2010-Oct-28  15:05 UTC
RE: [Xen-devel] [PATCH] X86: Prefer TSC-deadline timer in Xen
>Jan Beulich [mailto:JBeulich@novell.com], October 28, 2010 10:36 PM > >>> On 28.10.10 at 16:17, "Wei, Gang" <gang.wei@intel.com> wrote: > > +static int tdt_enabled __read_mostly; > > +static int tdt_enable __read_mostly = 1; > > I wasn''t completely correct with my earlier statement regarding these: > tdt_enable really should be __initdata, as it''s only referenced by an > __init function.I decided to remove tdt_enable and just keep tdt_enabled.> > >- if ( timeout && ((expire = timeout - NOW()) > 0) ) > >- apic_tmict = min_t(u64, (bus_scale * expire) >> 18, UINT_MAX); > >- > >- apic_write(APIC_TMICT, (unsigned long)apic_tmict); > >+ if ( tdt_enabled ) > >+ { > >+ u64 tsc = 0; > >+ > >+ if ( timeout ) > >+ tsc = stime2tsc(timeout); > >+ > >+ wrmsrl(MSR_IA32_TSC_DEADLINE, tsc); > >+ > >+ return 1; > >+ } > >+ else > >+ { > >+ if ( timeout && ((expire = timeout - NOW()) > 0) ) > >+ apic_tmict = min_t(u64, (bus_scale * expire) >> 18, > UINT_MAX); > >+ > >+ apic_write(APIC_TMICT, (unsigned long)apic_tmict); > >+ } > > > > return apic_tmict || !timeout; > > So you still decided to keep the "else"? Without it the patch would be > smaller and, at least to me, the function remain better readable... > > But that''s minor, or course, patch looks good to me regardless.I will remove it. Jimmy _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Wei, Gang
2010-Oct-28  15:13 UTC
RE: [Xen-devel] [PATCH] X86: Prefer TSC-deadline timer in Xen
Resend.
Jimmy
---------------------------------------------------------
X86: Prefer TSC-deadline timer in Xen
The new TSC Deadline Timer offers system software a low overhead
per-logical-thread deadline timer in TSC units.
The timer is implemented via a new architectural 64-bit register,
IA32_TSC_DEADLINE_MSR. Reads and writes of this MSR occur in program order,
but are non-serializing.
The support for this feature is indicated by
CPUID.01H:ECX.TSC_Deadline[bit 24] = 1 as documented in the Intel Architectures
Software Developer''s Manual.
The LOCAL APIC on new processors has a mode where its underlying hardware timer
can now be accessed via the non-serializing IA32_TSC_DEADLINE_MSR in TSC tick
units.
If this mode is present, prefer it over the traditional LAPIC timer mode.
KERN_DEBUG dmesg will print "TSC deadline timer enabled" when TDT is
used.
Bootparam "tdt=off" is available to revert to LAPIC timer mode.
This patch is based on original work by Len Brown for Linux kernel.
cc: Len Brown <len.brown@intel.com>
Signed-off-by: Wei Gang <gang.wei@intel.com>
diff -r 0dc0bc411035 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c	Thu Oct 21 18:51:36 2010 +0100
+++ b/xen/arch/x86/apic.c	Sat Oct 30 05:09:41 2010 +0800
@@ -37,6 +37,14 @@
 #include <asm/asm_defns.h> /* for BUILD_SMP_INTERRUPT */
 #include <mach_apic.h>
 #include <io_ports.h>
+
+#define APIC_TIMER_MODE_ONESHOT         (0 << 17)
+#define APIC_TIMER_MODE_PERIODIC        (1 << 17)
+#define APIC_TIMER_MODE_TSC_DEADLINE    (2 << 17)
+#define APIC_TIMER_MODE_MASK            (3 << 17)
+
+static int tdt_enabled __read_mostly = 1;
+boolean_param("tdt", tdt_enabled);
 
 static struct {
     int active;
@@ -1198,6 +1206,13 @@ static void __setup_APIC_LVTT(unsigned i
     lvtt_value = /*APIC_LVT_TIMER_PERIODIC |*/ LOCAL_TIMER_VECTOR;
     if (!APIC_INTEGRATED(ver))
         lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
+
+    if ( tdt_enabled )
+    {
+        lvtt_value &= (~APIC_TIMER_MODE_MASK);
+        lvtt_value |= APIC_TIMER_MODE_TSC_DEADLINE;
+    }
+
     apic_write_around(APIC_LVTT, lvtt_value);
 
     tmp_value = apic_read(APIC_TDCR);
@@ -1309,7 +1324,15 @@ void __init setup_boot_APIC_clock(void)
 
     local_irq_save(flags);
 
-    calibrate_APIC_clock();
+    if ( tdt_enabled && boot_cpu_has(X86_FEATURE_TSC_DEADLINE) )
+    {
+        printk(KERN_DEBUG "TSC deadline timer enabled\n");
+    }
+    else
+    {
+        tdt_enabled = 0;
+        calibrate_APIC_clock();
+    }
 
     setup_APIC_timer();
     
@@ -1359,6 +1382,18 @@ int reprogram_timer(s_time_t timeout)
     /* No local APIC: timer list is polled via the PIT interrupt. */
     if ( !cpu_has_apic )
         return 1;
+
+    if ( tdt_enabled )
+    {
+        u64 tsc = 0;
+
+        if ( timeout )
+            tsc = stime2tsc(timeout);
+
+        wrmsrl(MSR_IA32_TSC_DEADLINE, tsc);
+
+        return 1;
+    }
 
     if ( timeout && ((expire = timeout - NOW()) > 0) )
         apic_tmict = min_t(u64, (bus_scale * expire) >> 18, UINT_MAX);
diff -r 0dc0bc411035 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Thu Oct 21 18:51:36 2010 +0100
+++ b/xen/arch/x86/time.c	Sat Oct 30 04:28:14 2010 +0800
@@ -662,26 +662,31 @@ static void __init init_platform_timer(v
            freq_string(pts->frequency), pts->name);
 }
 
-void cstate_restore_tsc(void)
+u64 stime2tsc(s_time_t stime)
 {
     struct cpu_time *t;
     struct time_scale sys_to_tsc;
     s_time_t stime_delta;
-    u64 new_tsc;
-
+    u64 tsc;
+
+    t = &this_cpu(cpu_time);
+    sys_to_tsc = scale_reciprocal(t->tsc_scale);
+
+    stime_delta = stime - t->stime_local_stamp;
+    if ( stime_delta < 0 )
+        stime_delta = 0;
+
+    tsc = t->local_tsc_stamp + scale_delta(stime_delta, &sys_to_tsc);
+
+    return tsc;
+}
+
+void cstate_restore_tsc(void)
+{
     if ( boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
         return;
 
-    t = &this_cpu(cpu_time);
-    sys_to_tsc = scale_reciprocal(t->tsc_scale);
-
-    stime_delta = read_platform_stime() - t->stime_master_stamp;
-    if ( stime_delta < 0 )
-        stime_delta = 0;
-
-    new_tsc = t->local_tsc_stamp + scale_delta(stime_delta,
&sys_to_tsc);
-
-    write_tsc(new_tsc);
+    write_tsc(stime2tsc(read_platform_stime()));
 }
 
 /***************************************************************************
diff -r 0dc0bc411035 xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h	Thu Oct 21 18:51:36 2010 +0100
+++ b/xen/include/asm-x86/cpufeature.h	Sat Oct 30 04:28:14 2010 +0800
@@ -99,6 +99,7 @@
 #define X86_FEATURE_SSE4_2	(4*32+20) /* Streaming SIMD Extensions 4.2 */
 #define X86_FEATURE_X2APIC	(4*32+21) /* Extended xAPIC */
 #define X86_FEATURE_POPCNT	(4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE (4*32+24) /* "tdt" TSC Deadline
Timer */
 #define X86_FEATURE_XSAVE	(4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
 #define X86_FEATURE_OSXSAVE	(4*32+27) /* OSXSAVE */
 #define X86_FEATURE_HYPERVISOR	(4*32+31) /* Running under some hypervisor */
diff -r 0dc0bc411035 xen/include/asm-x86/msr-index.h
--- a/xen/include/asm-x86/msr-index.h	Thu Oct 21 18:51:36 2010 +0100
+++ b/xen/include/asm-x86/msr-index.h	Sat Oct 30 04:28:14 2010 +0800
@@ -327,6 +327,8 @@
 #define MSR_IA32_MISC_ENABLE_MONITOR_ENABLE (1<<18)
 #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID  (1<<22)
 #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1<<23)
+
+#define MSR_IA32_TSC_DEADLINE		0x000006E0
 
 /* Intel Model 6 */
 #define MSR_P6_EVNTSEL0			0x00000186
diff -r 0dc0bc411035 xen/include/xen/time.h
--- a/xen/include/xen/time.h	Thu Oct 21 18:51:36 2010 +0100
+++ b/xen/include/xen/time.h	Sat Oct 30 04:28:14 2010 +0800
@@ -31,6 +31,7 @@ typedef s64 s_time_t;
 typedef s64 s_time_t;
 
 s_time_t get_s_time(void);
+u64 stime2tsc(s_time_t stime);
 unsigned long get_localtime(struct domain *d);
 
 struct tm {
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Dan Magenheimer
2010-Oct-28  22:29 UTC
RE: [Xen-devel] [PATCH] X86: Prefer TSC-deadline timer in Xen
The change to cstate_restore_tsc reminds me... TSC should never be written if it is being used to generate system time, and I think cstate_restore_tsc is only used on systems where another clocksource is used to generate system time. (And it generates a rather poor approximation of TSC.) Are there any machines where cstate_restore_tsc might get executed but TSC_Deadline is in use? If so, might there be a race condition here? Also, are there any issues with using TSC Deadline on a system with the potential for hot-add physical CPUs? I suspect cstate_restore_tsc doesn''t get executed if TSC is reliable, and the TSC Deadline feature is probably only available on systems where TSC is reliable, but am not sure. Thanks, Dan> -----Original Message----- > From: Wei, Gang [mailto:gang.wei@intel.com] > Sent: Thursday, October 28, 2010 9:14 AM > To: Keir Fraser; Tim Deegan > Cc: Brown, Len; xen-devel@lists.xensource.com; Wei, Gang; Jan Beulich > Subject: RE: [Xen-devel] [PATCH] X86: Prefer TSC-deadline timer in Xen > > Resend. > > Jimmy > --------------------------------------------------------- > X86: Prefer TSC-deadline timer in Xen > > The new TSC Deadline Timer offers system software a low overhead > per-logical-thread deadline timer in TSC units. > > The timer is implemented via a new architectural 64-bit register, > IA32_TSC_DEADLINE_MSR. Reads and writes of this MSR occur in program > order, > but are non-serializing. > > The support for this feature is indicated by > CPUID.01H:ECX.TSC_Deadline[bit 24] = 1 as documented in the Intel > Architectures > Software Developer''s Manual. > > The LOCAL APIC on new processors has a mode where its underlying > hardware timer > can now be accessed via the non-serializing IA32_TSC_DEADLINE_MSR in > TSC tick > units. > > If this mode is present, prefer it over the traditional LAPIC timer > mode. > KERN_DEBUG dmesg will print "TSC deadline timer enabled" when TDT is > used. > > Bootparam "tdt=off" is available to revert to LAPIC timer mode. > > This patch is based on original work by Len Brown for Linux kernel. > > cc: Len Brown <len.brown@intel.com> > Signed-off-by: Wei Gang <gang.wei@intel.com> > > diff -r 0dc0bc411035 xen/arch/x86/apic.c > --- a/xen/arch/x86/apic.c Thu Oct 21 18:51:36 2010 +0100 > +++ b/xen/arch/x86/apic.c Sat Oct 30 05:09:41 2010 +0800 > @@ -37,6 +37,14 @@ > #include <asm/asm_defns.h> /* for BUILD_SMP_INTERRUPT */ > #include <mach_apic.h> > #include <io_ports.h> > + > +#define APIC_TIMER_MODE_ONESHOT (0 << 17) > +#define APIC_TIMER_MODE_PERIODIC (1 << 17) > +#define APIC_TIMER_MODE_TSC_DEADLINE (2 << 17) > +#define APIC_TIMER_MODE_MASK (3 << 17) > + > +static int tdt_enabled __read_mostly = 1; > +boolean_param("tdt", tdt_enabled); > > static struct { > int active; > @@ -1198,6 +1206,13 @@ static void __setup_APIC_LVTT(unsigned i > lvtt_value = /*APIC_LVT_TIMER_PERIODIC |*/ LOCAL_TIMER_VECTOR; > if (!APIC_INTEGRATED(ver)) > lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); > + > + if ( tdt_enabled ) > + { > + lvtt_value &= (~APIC_TIMER_MODE_MASK); > + lvtt_value |= APIC_TIMER_MODE_TSC_DEADLINE; > + } > + > apic_write_around(APIC_LVTT, lvtt_value); > > tmp_value = apic_read(APIC_TDCR); > @@ -1309,7 +1324,15 @@ void __init setup_boot_APIC_clock(void) > > local_irq_save(flags); > > - calibrate_APIC_clock(); > + if ( tdt_enabled && boot_cpu_has(X86_FEATURE_TSC_DEADLINE) ) > + { > + printk(KERN_DEBUG "TSC deadline timer enabled\n"); > + } > + else > + { > + tdt_enabled = 0; > + calibrate_APIC_clock(); > + } > > setup_APIC_timer(); > > @@ -1359,6 +1382,18 @@ int reprogram_timer(s_time_t timeout) > /* No local APIC: timer list is polled via the PIT interrupt. */ > if ( !cpu_has_apic ) > return 1; > + > + if ( tdt_enabled ) > + { > + u64 tsc = 0; > + > + if ( timeout ) > + tsc = stime2tsc(timeout); > + > + wrmsrl(MSR_IA32_TSC_DEADLINE, tsc); > + > + return 1; > + } > > if ( timeout && ((expire = timeout - NOW()) > 0) ) > apic_tmict = min_t(u64, (bus_scale * expire) >> 18, UINT_MAX); > diff -r 0dc0bc411035 xen/arch/x86/time.c > --- a/xen/arch/x86/time.c Thu Oct 21 18:51:36 2010 +0100 > +++ b/xen/arch/x86/time.c Sat Oct 30 04:28:14 2010 +0800 > @@ -662,26 +662,31 @@ static void __init init_platform_timer(v > freq_string(pts->frequency), pts->name); > } > > -void cstate_restore_tsc(void) > +u64 stime2tsc(s_time_t stime) > { > struct cpu_time *t; > struct time_scale sys_to_tsc; > s_time_t stime_delta; > - u64 new_tsc; > - > + u64 tsc; > + > + t = &this_cpu(cpu_time); > + sys_to_tsc = scale_reciprocal(t->tsc_scale); > + > + stime_delta = stime - t->stime_local_stamp; > + if ( stime_delta < 0 ) > + stime_delta = 0; > + > + tsc = t->local_tsc_stamp + scale_delta(stime_delta, &sys_to_tsc); > + > + return tsc; > +} > + > +void cstate_restore_tsc(void) > +{ > if ( boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ) > return; > > - t = &this_cpu(cpu_time); > - sys_to_tsc = scale_reciprocal(t->tsc_scale); > - > - stime_delta = read_platform_stime() - t->stime_master_stamp; > - if ( stime_delta < 0 ) > - stime_delta = 0; > - > - new_tsc = t->local_tsc_stamp + scale_delta(stime_delta, > &sys_to_tsc); > - > - write_tsc(new_tsc); > + write_tsc(stime2tsc(read_platform_stime())); > } > > > /********************************************************************** > ***** > diff -r 0dc0bc411035 xen/include/asm-x86/cpufeature.h > --- a/xen/include/asm-x86/cpufeature.h Thu Oct 21 18:51:36 2010 > +0100 > +++ b/xen/include/asm-x86/cpufeature.h Sat Oct 30 04:28:14 2010 > +0800 > @@ -99,6 +99,7 @@ > #define X86_FEATURE_SSE4_2 (4*32+20) /* Streaming SIMD Extensions > 4.2 */ > #define X86_FEATURE_X2APIC (4*32+21) /* Extended xAPIC */ > #define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ > +#define X86_FEATURE_TSC_DEADLINE (4*32+24) /* "tdt" TSC Deadline Timer > */ > #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV > */ > #define X86_FEATURE_OSXSAVE (4*32+27) /* OSXSAVE */ > #define X86_FEATURE_HYPERVISOR (4*32+31) /* Running under some > hypervisor */ > diff -r 0dc0bc411035 xen/include/asm-x86/msr-index.h > --- a/xen/include/asm-x86/msr-index.h Thu Oct 21 18:51:36 2010 > +0100 > +++ b/xen/include/asm-x86/msr-index.h Sat Oct 30 04:28:14 2010 > +0800 > @@ -327,6 +327,8 @@ > #define MSR_IA32_MISC_ENABLE_MONITOR_ENABLE (1<<18) > #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1<<22) > #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1<<23) > + > +#define MSR_IA32_TSC_DEADLINE 0x000006E0 > > /* Intel Model 6 */ > #define MSR_P6_EVNTSEL0 0x00000186 > diff -r 0dc0bc411035 xen/include/xen/time.h > --- a/xen/include/xen/time.h Thu Oct 21 18:51:36 2010 +0100 > +++ b/xen/include/xen/time.h Sat Oct 30 04:28:14 2010 +0800 > @@ -31,6 +31,7 @@ typedef s64 s_time_t; > typedef s64 s_time_t; > > s_time_t get_s_time(void); > +u64 stime2tsc(s_time_t stime); > unsigned long get_localtime(struct domain *d); > > struct tm {_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Wei, Gang
2010-Oct-29  01:00 UTC
RE: [Xen-devel] [PATCH] X86: Prefer TSC-deadline timer in Xen
Architecturally the TSC Deadline feature comes along with reliable TSC. For hot-add physical CPU case, if original APIC timer & TSC calibration work well, then TSC Deadline should not have problem. Jimmy> -----Original Message----- > From: Dan Magenheimer [mailto:dan.magenheimer@oracle.com] > Sent: Friday, October 29, 2010 6:29 AM > To: Wei, Gang; Keir Fraser; Tim Deegan > Cc: Brown, Len; xen-devel@lists.xensource.com; Jan Beulich > Subject: RE: [Xen-devel] [PATCH] X86: Prefer TSC-deadline timer in Xen > > The change to cstate_restore_tsc reminds me... > > TSC should never be written if it is being used to > generate system time, and I think cstate_restore_tsc > is only used on systems where another clocksource > is used to generate system time. (And it generates > a rather poor approximation of TSC.) Are there any > machines where cstate_restore_tsc might get executed > but TSC_Deadline is in use? If so, might there be > a race condition here? Also, are there any > issues with using TSC Deadline on a system with > the potential for hot-add physical CPUs? > > I suspect cstate_restore_tsc doesn''t get executed > if TSC is reliable, and the TSC Deadline feature > is probably only available on systems where TSC > is reliable, but am not sure. > > Thanks, > Dan_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel