Vitaly Kuznetsov
2017-Feb-09 14:10 UTC
[PATCH 0/2] x86/vdso: Add Hyper-V TSC page clocksource support
Hi, Hyper-V TSC page clocksource is suitable for vDSO, however, the protocol defined by the hypervisor is different from VCLOCK_PVCLOCK. Implemented the required support. Simple sysbench test shows the following results: Before: # time sysbench --test=memory --max-requests=500000 run ... real 1m22.618s user 0m50.193s sys 0m32.268s After: # time sysbench --test=memory --max-requests=500000 run ... real 0m47.241s user 0m47.117s sys 0m0.008s So it seems it is worth it. As nobody seems to be strongly offended by my RFC I'm sending first non-RFC version. Patch 1 is made on top of K. Y.'s code refactoring which moved tsc page clocksource to arch/x86/hyperv/hv_init.c, this is currently present in Greg's char-misc-next tree. Changes since RFC: - Use mul_u64_u64_shr() instead of an open coded implementation [Andy Lutomirski, Thomas Gleixner] - Don't use the same pvclock_page for both VCLOCK_PVCLOCK and VCLOCK_HVCLOCK, create another one [Andy Lutomirski] - Fix issues reported by kbuild test robot. - Rename HYPERV_CLOCK -> HYPERV_TSCPAGE to avoid the ambiguity. I'm also going to try to optimize mul_u64_u64_shr() for 32bit but this can be split from this series I guess. Vitaly Kuznetsov (2): hyperv: implement hv_get_tsc_page() x86/vdso: Add VCLOCK_HVCLOCK vDSO clock read method arch/x86/entry/vdso/vclock_gettime.c | 36 +++++++++++++++++++++++++++++++++++ arch/x86/entry/vdso/vdso-layout.lds.S | 3 ++- arch/x86/entry/vdso/vdso2c.c | 3 +++ arch/x86/entry/vdso/vma.c | 7 +++++++ arch/x86/hyperv/hv_init.c | 12 ++++++++++-- arch/x86/include/asm/clocksource.h | 3 ++- arch/x86/include/asm/mshyperv.h | 8 ++++++++ arch/x86/include/asm/vdso.h | 1 + drivers/hv/Kconfig | 3 +++ 9 files changed, 72 insertions(+), 4 deletions(-) -- 2.9.3
To use Hyper-V TSC page clocksource from vDSO we need to make tsc_pg available. Implement hv_get_tsc_page() and add CONFIG_HYPERV_TSCPAGE to make #ifdef-s simple. Signed-off-by: Vitaly Kuznetsov <vkuznets at redhat.com> --- arch/x86/hyperv/hv_init.c | 9 +++++++-- arch/x86/include/asm/mshyperv.h | 8 ++++++++ drivers/hv/Kconfig | 3 +++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index b371d0e..0ce8485 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -27,10 +27,15 @@ #include <linux/clockchips.h> -#ifdef CONFIG_X86_64 +#ifdef CONFIG_HYPERV_TSCPAGE static struct ms_hyperv_tsc_page *tsc_pg; +struct ms_hyperv_tsc_page *hv_get_tsc_page(void) +{ + return tsc_pg; +} + static u64 read_hv_clock_tsc(struct clocksource *arg) { u64 current_tick; @@ -136,7 +141,7 @@ void hyperv_init(void) /* * Register Hyper-V specific clocksource. */ -#ifdef CONFIG_X86_64 +#ifdef CONFIG_HYPERV_TSCPAGE if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) { union hv_x64_msr_hypercall_contents tsc_msr; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index f8dc370..14dd92c 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -173,4 +173,12 @@ void hyperv_report_panic(struct pt_regs *regs); bool hv_is_hypercall_page_setup(void); void hyperv_cleanup(void); #endif +#ifdef CONFIG_HYPERV_TSCPAGE +struct ms_hyperv_tsc_page *hv_get_tsc_page(void); +#else +static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void) +{ + return NULL; +} +#endif #endif diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 0403b51..c29cd53 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -7,6 +7,9 @@ config HYPERV Select this option to run Linux as a Hyper-V client operating system. +config HYPERV_TSCPAGE + def_bool HYPERV && X86_64 + config HYPERV_UTILS tristate "Microsoft Hyper-V Utilities driver" depends on HYPERV && CONNECTOR && NLS -- 2.9.3
Vitaly Kuznetsov
2017-Feb-09 14:10 UTC
[PATCH 2/2] x86/vdso: Add VCLOCK_HVCLOCK vDSO clock read method
Hyper-V TSC page clocksource is suitable for vDSO, however, the protocol defined by the hypervisor is different from VCLOCK_PVCLOCK. Implement the required support by adding hvclock_page VVAR. Signed-off-by: Vitaly Kuznetsov <vkuznets at redhat.com> --- arch/x86/entry/vdso/vclock_gettime.c | 36 +++++++++++++++++++++++++++++++++++ arch/x86/entry/vdso/vdso-layout.lds.S | 3 ++- arch/x86/entry/vdso/vdso2c.c | 3 +++ arch/x86/entry/vdso/vma.c | 7 +++++++ arch/x86/hyperv/hv_init.c | 3 +++ arch/x86/include/asm/clocksource.h | 3 ++- arch/x86/include/asm/vdso.h | 1 + 7 files changed, 54 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 9d4d6e1..4af10b4 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -17,6 +17,7 @@ #include <asm/unistd.h> #include <asm/msr.h> #include <asm/pvclock.h> +#include <asm/mshyperv.h> #include <linux/math64.h> #include <linux/time.h> #include <linux/kernel.h> @@ -32,6 +33,11 @@ extern u8 pvclock_page __attribute__((visibility("hidden"))); #endif +#ifdef CONFIG_HYPERV_TSCPAGE +extern u8 hvclock_page + __attribute__((visibility("hidden"))); +#endif + #ifndef BUILD_VDSO32 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) @@ -141,6 +147,32 @@ static notrace u64 vread_pvclock(int *mode) return last; } #endif +#ifdef CONFIG_HYPERV_TSCPAGE +static notrace u64 vread_hvclock(int *mode) +{ + const struct ms_hyperv_tsc_page *tsc_pg + (const struct ms_hyperv_tsc_page *)&hvclock_page; + u64 sequence, scale, offset, current_tick, cur_tsc; + + while (1) { + sequence = READ_ONCE(tsc_pg->tsc_sequence); + if (!sequence) + break; + + scale = READ_ONCE(tsc_pg->tsc_scale); + offset = READ_ONCE(tsc_pg->tsc_offset); + rdtscll(cur_tsc); + + current_tick = mul_u64_u64_shr(cur_tsc, scale, 64) + offset; + + if (READ_ONCE(tsc_pg->tsc_sequence) == sequence) + return current_tick; + } + + *mode = VCLOCK_NONE; + return 0; +} +#endif notrace static u64 vread_tsc(void) { @@ -173,6 +205,10 @@ notrace static inline u64 vgetsns(int *mode) else if (gtod->vclock_mode == VCLOCK_PVCLOCK) cycles = vread_pvclock(mode); #endif +#ifdef CONFIG_HYPERV_TSCPAGE + else if (gtod->vclock_mode == VCLOCK_HVCLOCK) + cycles = vread_hvclock(mode); +#endif else return 0; v = (cycles - gtod->cycle_last) & gtod->mask; diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S index a708aa9..8ebb4b6 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -25,7 +25,7 @@ SECTIONS * segment. */ - vvar_start = . - 2 * PAGE_SIZE; + vvar_start = . - 3 * PAGE_SIZE; vvar_page = vvar_start; /* Place all vvars at the offsets in asm/vvar.h. */ @@ -36,6 +36,7 @@ SECTIONS #undef EMIT_VVAR pvclock_page = vvar_start + PAGE_SIZE; + hvclock_page = vvar_start + 2 * PAGE_SIZE; . = SIZEOF_HEADERS; diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c index 491020b..0780a44 100644 --- a/arch/x86/entry/vdso/vdso2c.c +++ b/arch/x86/entry/vdso/vdso2c.c @@ -74,6 +74,7 @@ enum { sym_vvar_page, sym_hpet_page, sym_pvclock_page, + sym_hvclock_page, sym_VDSO_FAKE_SECTION_TABLE_START, sym_VDSO_FAKE_SECTION_TABLE_END, }; @@ -82,6 +83,7 @@ const int special_pages[] = { sym_vvar_page, sym_hpet_page, sym_pvclock_page, + sym_hvclock_page, }; struct vdso_sym { @@ -94,6 +96,7 @@ struct vdso_sym required_syms[] = { [sym_vvar_page] = {"vvar_page", true}, [sym_hpet_page] = {"hpet_page", true}, [sym_pvclock_page] = {"pvclock_page", true}, + [sym_hvclock_page] = {"hvclock_page", true}, [sym_VDSO_FAKE_SECTION_TABLE_START] = { "VDSO_FAKE_SECTION_TABLE_START", false }, diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 10820f6..b256a3b 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -21,6 +21,7 @@ #include <asm/page.h> #include <asm/desc.h> #include <asm/cpufeature.h> +#include <asm/mshyperv.h> #if defined(CONFIG_X86_64) unsigned int __read_mostly vdso64_enabled = 1; @@ -120,6 +121,12 @@ static int vvar_fault(const struct vm_special_mapping *sm, vmf->address, __pa(pvti) >> PAGE_SHIFT); } + } else if (sym_offset == image->sym_hvclock_page) { + struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page(); + + if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK)) + ret = vm_insert_pfn(vma, vmf->address, + vmalloc_to_pfn(tsc_pg)); } if (ret == 0 || ret == -EBUSY) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 0ce8485..17519e0 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -157,6 +157,9 @@ void hyperv_init(void) tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg); wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); + + hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK; + clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); return; } diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h index eae33c7..47bea8c 100644 --- a/arch/x86/include/asm/clocksource.h +++ b/arch/x86/include/asm/clocksource.h @@ -6,7 +6,8 @@ #define VCLOCK_NONE 0 /* No vDSO clock available. */ #define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ #define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */ -#define VCLOCK_MAX 2 +#define VCLOCK_HVCLOCK 3 /* vDSO should use vread_hvclock. */ +#define VCLOCK_MAX 3 struct arch_clocksource_data { int vclock_mode; diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 2444189..bccdf49 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -20,6 +20,7 @@ struct vdso_image { long sym_vvar_page; long sym_hpet_page; long sym_pvclock_page; + long sym_hvclock_page; long sym_VDSO32_NOTE_MASK; long sym___kernel_sigreturn; long sym___kernel_rt_sigreturn; -- 2.9.3
Thomas Gleixner
2017-Feb-09 17:08 UTC
[PATCH 2/2] x86/vdso: Add VCLOCK_HVCLOCK vDSO clock read method
On Thu, 9 Feb 2017, Vitaly Kuznetsov wrote:> +#ifdef CONFIG_HYPERV_TSCPAGE > +static notrace u64 vread_hvclock(int *mode) > +{ > + const struct ms_hyperv_tsc_page *tsc_pg > + (const struct ms_hyperv_tsc_page *)&hvclock_page; > + u64 sequence, scale, offset, current_tick, cur_tsc; > + > + while (1) { > + sequence = READ_ONCE(tsc_pg->tsc_sequence); > + if (!sequence) > + break; > + > + scale = READ_ONCE(tsc_pg->tsc_scale); > + offset = READ_ONCE(tsc_pg->tsc_offset); > + rdtscll(cur_tsc); > + > + current_tick = mul_u64_u64_shr(cur_tsc, scale, 64) + offset; > + > + if (READ_ONCE(tsc_pg->tsc_sequence) == sequence) > + return current_tick;That sequence stuff lacks still a sensible explanation. It's fundamentally different from the sequence counting we do in the kernel, so documentation for it is really required. Thanks, tglx
The actual code looks fine, but the style police will not like you. { should be at start of line on functions. And #else should be at start of line, But maybe this was just more of exchange mangling the mail. -----Original Message----- From: Vitaly Kuznetsov [mailto:vkuznets at redhat.com] Sent: Thursday, February 9, 2017 6:11 AM To: x86 at kernel.org; Andy Lutomirski <luto at amacapital.net> Cc: Thomas Gleixner <tglx at linutronix.de>; Ingo Molnar <mingo at redhat.com>; H. Peter Anvin <hpa at zytor.com>; KY Srinivasan <kys at microsoft.com>; Haiyang Zhang <haiyangz at microsoft.com>; Stephen Hemminger <sthemmin at microsoft.com>; Dexuan Cui <decui at microsoft.com>; linux-kernel at vger.kernel.org; devel at linuxdriverproject.org; virtualization at lists.linux-foundation.org Subject: [PATCH 1/2] hyperv: implement hv_get_tsc_page() To use Hyper-V TSC page clocksource from vDSO we need to make tsc_pg available. Implement hv_get_tsc_page() and add CONFIG_HYPERV_TSCPAGE to make #ifdef-s simple. Signed-off-by: Vitaly Kuznetsov <vkuznets at redhat.com> --- arch/x86/hyperv/hv_init.c | 9 +++++++-- arch/x86/include/asm/mshyperv.h | 8 ++++++++ drivers/hv/Kconfig | 3 +++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index b371d0e..0ce8485 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -27,10 +27,15 @@ #include <linux/clockchips.h> -#ifdef CONFIG_X86_64 +#ifdef CONFIG_HYPERV_TSCPAGE static struct ms_hyperv_tsc_page *tsc_pg; +struct ms_hyperv_tsc_page *hv_get_tsc_page(void) { + return tsc_pg; +} + static u64 read_hv_clock_tsc(struct clocksource *arg) { u64 current_tick; @@ -136,7 +141,7 @@ void hyperv_init(void) /* * Register Hyper-V specific clocksource. */ -#ifdef CONFIG_X86_64 +#ifdef CONFIG_HYPERV_TSCPAGE if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) { union hv_x64_msr_hypercall_contents tsc_msr; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index f8dc370..14dd92c 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -173,4 +173,12 @@ void hyperv_report_panic(struct pt_regs *regs); bool hv_is_hypercall_page_setup(void); void hyperv_cleanup(void); #endif +#ifdef CONFIG_HYPERV_TSCPAGE +struct ms_hyperv_tsc_page *hv_get_tsc_page(void); #else static inline +struct ms_hyperv_tsc_page *hv_get_tsc_page(void) { + return NULL; +} +#endif #endif diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 0403b51..c29cd53 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -7,6 +7,9 @@ config HYPERV Select this option to run Linux as a Hyper-V client operating system. +config HYPERV_TSCPAGE + def_bool HYPERV && X86_64 + config HYPERV_UTILS tristate "Microsoft Hyper-V Utilities driver" depends on HYPERV && CONNECTOR && NLS -- 2.9.3
Possibly Parallel Threads
- [PATCH 1/2] hyperv: implement hv_get_tsc_page()
- [PATCH 1/2] hyperv: implement hv_get_tsc_page()
- [PATCH v3 2/3] x86/hyperv: move TSC reading method to asm/mshyperv.h
- [PATCH 2/2] x86/vdso: Add VCLOCK_HVCLOCK vDSO clock read method
- [PATCH v3 2/3] x86/hyperv: move TSC reading method to asm/mshyperv.h