Vitaly Kuznetsov
2017-Feb-08  17:07 UTC
[PATCH RFC 0/2] x86/vdso: Add Hyper-V TSC page clocksource support
Hi, Hyper-V TSC page clocksource is suitable for vDSO, however, the protocol defined by the hypervisor is different from VCLOCK_PVCLOCK. I implemented the required support re-using pvclock_page VVAR. Simple sysbench test shows the following results: Before: # time sysbench --test=memory --max-requests=500000 run ... real 1m22.618s user 0m50.193s sys 0m32.268s After: # time sysbench --test=memory --max-requests=500000 run ... real 0m50.218s user 0m50.171s sys 0m0.016s So it seems it is worth it. What do you think? Vitaly Kuznetsov (2): hyperv: implement hv_get_tsc_page() x86/vdso: Add VCLOCK_HVCLOCK vDSO clock read method arch/x86/entry/vdso/vclock_gettime.c | 48 ++++++++++++++++++++++++++++++++++++ arch/x86/entry/vdso/vma.c | 26 +++++++++++++------ arch/x86/hyperv/hv_init.c | 8 ++++++ arch/x86/include/asm/clocksource.h | 3 ++- arch/x86/include/asm/mshyperv.h | 8 ++++++ drivers/hv/Kconfig | 5 ++++ 6 files changed, 90 insertions(+), 8 deletions(-) -- 2.9.3
Vitaly Kuznetsov
2017-Feb-08  17:07 UTC
[PATCH RFC 1/2] hyperv: implement hv_get_tsc_page()
To use Hyper-V TSC page clocksource from vDSO we need to make tsc_pg
available. Implement hv_get_tsc_page() and add CONFIG_HYPERV_CLOCK to
make #ifdef-s simple.
Signed-off-by: Vitaly Kuznetsov <vkuznets at redhat.com>
---
 arch/x86/hyperv/hv_init.c       | 5 +++++
 arch/x86/include/asm/mshyperv.h | 8 ++++++++
 drivers/hv/Kconfig              | 5 +++++
 3 files changed, 18 insertions(+)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index b371d0e..aa36049 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -31,6 +31,11 @@
 
 static struct ms_hyperv_tsc_page *tsc_pg;
 
+struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
+{
+	return tsc_pg;
+}
+
 static u64 read_hv_clock_tsc(struct clocksource *arg)
 {
 	u64 current_tick;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index f8dc370..005d0bc 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -173,4 +173,12 @@ void hyperv_report_panic(struct pt_regs *regs);
 bool hv_is_hypercall_page_setup(void);
 void hyperv_cleanup(void);
 #endif
+#ifdef CONFIG_HYPERV_CLOCK
+struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
+#else
+static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
+{
+	return NULL;
+}
+#endif
 #endif
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 0403b51..3b45e69 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -3,10 +3,15 @@ menu "Microsoft Hyper-V guest support"
 config HYPERV
 	tristate "Microsoft Hyper-V client drivers"
 	depends on X86 && ACPI && PCI && X86_LOCAL_APIC
&& HYPERVISOR_GUEST
+	select HYPERV_CLOCK
 	help
 	  Select this option to run Linux as a Hyper-V client operating
 	  system.
 
+config HYPERV_CLOCK
+       bool
+       depends on X86_64
+
 config HYPERV_UTILS
 	tristate "Microsoft Hyper-V Utilities driver"
 	depends on HYPERV && CONNECTOR && NLS
-- 
2.9.3
Vitaly Kuznetsov
2017-Feb-08  17:07 UTC
[PATCH RFC 2/2] x86/vdso: Add VCLOCK_HVCLOCK vDSO clock read method
Hyper-V TSC page clocksource is suitable for vDSO, however, the protocol
defined by the hypervisor is different from VCLOCK_PVCLOCK. Implement the
required support re-using pvclock_page VVAR as VCLOCK_PVCLOCK is mutually
exclusive with VCLOCK_HVCLOCK at run time.
Signed-off-by: Vitaly Kuznetsov <vkuznets at redhat.com>
---
 arch/x86/entry/vdso/vclock_gettime.c | 48 ++++++++++++++++++++++++++++++++++++
 arch/x86/entry/vdso/vma.c            | 26 +++++++++++++------
 arch/x86/hyperv/hv_init.c            |  3 +++
 arch/x86/include/asm/clocksource.h   |  3 ++-
 4 files changed, 72 insertions(+), 8 deletions(-)
diff --git a/arch/x86/entry/vdso/vclock_gettime.c
b/arch/x86/entry/vdso/vclock_gettime.c
index 9d4d6e1..93e9dcd 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -17,6 +17,7 @@
 #include <asm/unistd.h>
 #include <asm/msr.h>
 #include <asm/pvclock.h>
+#include <asm/mshyperv.h>
 #include <linux/math64.h>
 #include <linux/time.h>
 #include <linux/kernel.h>
@@ -141,6 +142,49 @@ static notrace u64 vread_pvclock(int *mode)
 	return last;
 }
 #endif
+#ifdef CONFIG_HYPERV_CLOCK
+/* (a * b) >> 64 implementation */
+static inline u64 mul64x64_hi(u64 a, u64 b)
+{
+	u64 a_lo, a_hi, b_lo, b_hi, p1, p2;
+
+	a_lo = (u32)a;
+	a_hi = a >> 32;
+	b_lo = (u32)b;
+	b_hi = b >> 32;
+	p1 = a_lo * b_hi;
+	p2 = a_hi * b_lo;
+
+	return a_hi * b_hi + (p1 >> 32) + (p2 >> 32) +
+		((((a_lo * b_lo) >> 32) + (u32)p1 + (u32)p2) >> 32);
+
+}
+
+static notrace u64 vread_hvclock(int *mode)
+{
+	const struct ms_hyperv_tsc_page *tsc_pg +		(const struct ms_hyperv_tsc_page
*)&pvclock_page;
+	u64 sequence, scale, offset, current_tick, cur_tsc;
+
+	while (1) {
+		sequence = READ_ONCE(tsc_pg->tsc_sequence);
+		if (!sequence)
+			break;
+
+		scale = READ_ONCE(tsc_pg->tsc_scale);
+		offset = READ_ONCE(tsc_pg->tsc_offset);
+		rdtscll(cur_tsc);
+
+		current_tick = mul64x64_hi(cur_tsc, scale) + offset;
+
+		if (READ_ONCE(tsc_pg->tsc_sequence) == sequence)
+			return current_tick;
+	}
+
+	*mode = VCLOCK_NONE;
+	return 0;
+}
+#endif
 
 notrace static u64 vread_tsc(void)
 {
@@ -173,6 +217,10 @@ notrace static inline u64 vgetsns(int *mode)
 	else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
 		cycles = vread_pvclock(mode);
 #endif
+#ifdef CONFIG_HYPERV_CLOCK
+	else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
+		cycles = vread_hvclock(mode);
+#endif
 	else
 		return 0;
 	v = (cycles - gtod->cycle_last) & gtod->mask;
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10820f6..4b9d90c 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -21,6 +21,7 @@
 #include <asm/page.h>
 #include <asm/desc.h>
 #include <asm/cpufeature.h>
+#include <asm/mshyperv.h>
 
 #if defined(CONFIG_X86_64)
 unsigned int __read_mostly vdso64_enabled = 1;
@@ -112,13 +113,24 @@ static int vvar_fault(const struct vm_special_mapping *sm,
 		ret = vm_insert_pfn(vma, vmf->address,
 				    __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
 	} else if (sym_offset == image->sym_pvclock_page) {
-		struct pvclock_vsyscall_time_info *pvti -			pvclock_pvti_cpu0_va();
-		if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
-			ret = vm_insert_pfn(
-				vma,
-				vmf->address,
-				__pa(pvti) >> PAGE_SHIFT);
+		if (vclock_was_used(VCLOCK_PVCLOCK)) {
+			struct pvclock_vsyscall_time_info *pvti +				pvclock_pvti_cpu0_va();
+			if (pvti) {
+				ret = vm_insert_pfn(
+					vma,
+					vmf->address,
+					__pa(pvti) >> PAGE_SHIFT);
+			}
+		} else if (vclock_was_used(VCLOCK_HVCLOCK)) {
+			struct ms_hyperv_tsc_page *tsc_pg +				hv_get_tsc_page();
+			if (tsc_pg) {
+				ret = vm_insert_pfn(
+					vma,
+					vmf->address,
+					vmalloc_to_pfn(tsc_pg));
+			}
 		}
 	}
 
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index aa36049..3d534d2 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -157,6 +157,9 @@ void hyperv_init(void)
 		tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg);
 
 		wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
+
+		hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK;
+
 		clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
 		return;
 	}
diff --git a/arch/x86/include/asm/clocksource.h
b/arch/x86/include/asm/clocksource.h
index eae33c7..47bea8c 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -6,7 +6,8 @@
 #define VCLOCK_NONE	0	/* No vDSO clock available.		*/
 #define VCLOCK_TSC	1	/* vDSO should use vread_tsc.		*/
 #define VCLOCK_PVCLOCK	2	/* vDSO should use vread_pvclock.	*/
-#define VCLOCK_MAX	2
+#define VCLOCK_HVCLOCK	3	/* vDSO should use vread_hvclock.	*/
+#define VCLOCK_MAX	3
 
 struct arch_clocksource_data {
 	int vclock_mode;
-- 
2.9.3
Andy Lutomirski
2017-Feb-08  17:12 UTC
[PATCH RFC 2/2] x86/vdso: Add VCLOCK_HVCLOCK vDSO clock read method
On Wed, Feb 8, 2017 at 9:07 AM, Vitaly Kuznetsov <vkuznets at redhat.com> wrote:> Hyper-V TSC page clocksource is suitable for vDSO, however, the protocol > defined by the hypervisor is different from VCLOCK_PVCLOCK. Implement the > required support re-using pvclock_page VVAR as VCLOCK_PVCLOCK is mutually > exclusive with VCLOCK_HVCLOCK at run time. > > Signed-off-by: Vitaly Kuznetsov <vkuznets at redhat.com> > --- > arch/x86/entry/vdso/vclock_gettime.c | 48 ++++++++++++++++++++++++++++++++++++ > arch/x86/entry/vdso/vma.c | 26 +++++++++++++------ > arch/x86/hyperv/hv_init.c | 3 +++ > arch/x86/include/asm/clocksource.h | 3 ++- > 4 files changed, 72 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c > index 9d4d6e1..93e9dcd 100644 > --- a/arch/x86/entry/vdso/vclock_gettime.c > +++ b/arch/x86/entry/vdso/vclock_gettime.c > @@ -17,6 +17,7 @@ > #include <asm/unistd.h> > #include <asm/msr.h> > #include <asm/pvclock.h> > +#include <asm/mshyperv.h> > #include <linux/math64.h> > #include <linux/time.h> > #include <linux/kernel.h> > @@ -141,6 +142,49 @@ static notrace u64 vread_pvclock(int *mode) > return last; > } > #endif > +#ifdef CONFIG_HYPERV_CLOCK > +/* (a * b) >> 64 implementation */ > +static inline u64 mul64x64_hi(u64 a, u64 b) > +{ > + u64 a_lo, a_hi, b_lo, b_hi, p1, p2; > + > + a_lo = (u32)a; > + a_hi = a >> 32; > + b_lo = (u32)b; > + b_hi = b >> 32; > + p1 = a_lo * b_hi; > + p2 = a_hi * b_lo; > + > + return a_hi * b_hi + (p1 >> 32) + (p2 >> 32) + > + ((((a_lo * b_lo) >> 32) + (u32)p1 + (u32)p2) >> 32); > + > +}Unless GCC is waaay more clever than I think, this is hugely suboptimal on 64-bit. x86 can do this in a single instruction, and gcc can express it cleanly using __uint128_t. I wouldn't be terribly surprised if the 32-bit generated code was fine, too.> + > +static notrace u64 vread_hvclock(int *mode) > +{ > + const struct ms_hyperv_tsc_page *tsc_pg > + (const struct ms_hyperv_tsc_page *)&pvclock_page; > + u64 sequence, scale, offset, current_tick, cur_tsc; > + > + while (1) { > + sequence = READ_ONCE(tsc_pg->tsc_sequence); > + if (!sequence) > + break; > + > + scale = READ_ONCE(tsc_pg->tsc_scale); > + offset = READ_ONCE(tsc_pg->tsc_offset); > + rdtscll(cur_tsc); > + > + current_tick = mul64x64_hi(cur_tsc, scale) + offset; > + > + if (READ_ONCE(tsc_pg->tsc_sequence) == sequence) > + return current_tick; > + }Can you explain better what's going on here? What protocol does the hypervisor use to update this bizarro seqlock? What exactly does sequence==0 mean?> + > + *mode = VCLOCK_NONE; > + return 0; > +} > +#endif > > notrace static u64 vread_tsc(void) > { > @@ -173,6 +217,10 @@ notrace static inline u64 vgetsns(int *mode) > else if (gtod->vclock_mode == VCLOCK_PVCLOCK) > cycles = vread_pvclock(mode); > #endif > +#ifdef CONFIG_HYPERV_CLOCK > + else if (gtod->vclock_mode == VCLOCK_HVCLOCK) > + cycles = vread_hvclock(mode); > +#endif > else > return 0; > v = (cycles - gtod->cycle_last) & gtod->mask; > diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c > index 10820f6..4b9d90c 100644 > --- a/arch/x86/entry/vdso/vma.c > +++ b/arch/x86/entry/vdso/vma.c > @@ -21,6 +21,7 @@ > #include <asm/page.h> > #include <asm/desc.h> > #include <asm/cpufeature.h> > +#include <asm/mshyperv.h> > > #if defined(CONFIG_X86_64) > unsigned int __read_mostly vdso64_enabled = 1; > @@ -112,13 +113,24 @@ static int vvar_fault(const struct vm_special_mapping *sm, > ret = vm_insert_pfn(vma, vmf->address, > __pa_symbol(&__vvar_page) >> PAGE_SHIFT); > } else if (sym_offset == image->sym_pvclock_page) { > - struct pvclock_vsyscall_time_info *pvti > - pvclock_pvti_cpu0_va(); > - if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) { > - ret = vm_insert_pfn( > - vma, > - vmf->address, > - __pa(pvti) >> PAGE_SHIFT); > + if (vclock_was_used(VCLOCK_PVCLOCK)) { > + struct pvclock_vsyscall_time_info *pvti > + pvclock_pvti_cpu0_va(); > + if (pvti) { > + ret = vm_insert_pfn( > + vma, > + vmf->address, > + __pa(pvti) >> PAGE_SHIFT); > + } > + } else if (vclock_was_used(VCLOCK_HVCLOCK)) { > + struct ms_hyperv_tsc_page *tsc_pg > + hv_get_tsc_page(); > + if (tsc_pg) { > + ret = vm_insert_pfn( > + vma, > + vmf->address, > + vmalloc_to_pfn(tsc_pg)); > + }This is IMO just too weird and fragile. Why not allocate another page and avoid this strange aliasing? --Andy
Seemingly Similar Threads
- [PATCH RFC 0/2] x86/vdso: Add Hyper-V TSC page clocksource support
- [PATCH RFC 2/2] x86/vdso: Add VCLOCK_HVCLOCK vDSO clock read method
- [PATCH RFC 2/2] x86/vdso: Add VCLOCK_HVCLOCK vDSO clock read method
- [PATCH RFC 2/2] x86/vdso: Add VCLOCK_HVCLOCK vDSO clock read method
- [PATCH 0/2] x86/vdso: Add Hyper-V TSC page clocksource support