This patchset includes several VMware guest improvements: Alexey Makhalov (3): x86/vmware: Use tsc_khz value for calibrate_cpu() x86/vmware: Add basic paravirt ops support x86/vmware: Add paravirt sched clock Documentation/kernel-parameters.txt | 4 +++ arch/x86/kernel/cpu/vmware.c | 51 +++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) -- 2.10.1
Alexey Makhalov
2016-Oct-26 05:20 UTC
[PATCH 1/3] x86/vmware: Use tsc_khz value for calibrate_cpu()
After aa297292d708, there are separate native calibrations for cpu_khz and tsc_khz. The code sets x86_platform.calibrate_cpu to native_calibrate_cpu() which looks in cpuid leaf 0x16 or msrs for the cpu frequency. Since we keep the tsc_khz constant (even after vmotion), the cpu_khz and tsc_khz may start diverging. tsc_init() now does cpu_khz = x86_platform.calibrate_cpu(); tsc_khz = x86_platform.calibrate_tsc(); if (tsc_khz == 0) tsc_khz = cpu_khz; else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz) cpu_khz = tsc_khz; We want the cpu_khz and tsc_khz to be sync even if they diverge less then 10%. This patch resolves this issue by setting x86_platform.calibrate_cpu to vmware_get_tsc_khz(). Signed-off-by: Alexey Makhalov <amakhalov at vmware.com> Acked-by: Alok N Kataria <akataria at vmware.com> --- arch/x86/kernel/cpu/vmware.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 4e34da4b..480790f 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -83,6 +83,7 @@ static void __init vmware_platform_setup(void) vmware_tsc_khz = tsc_khz; x86_platform.calibrate_tsc = vmware_get_tsc_khz; + x86_platform.calibrate_cpu = vmware_get_tsc_khz; #ifdef CONFIG_X86_LOCAL_APIC /* Skip lapic calibration since we know the bus frequency. */ -- 2.10.1
Alexey Makhalov
2016-Oct-26 05:26 UTC
[PATCH 2/3] x86/vmware: Add basic paravirt ops support
Add basic paravirt support:
1. set pv_info.name to "VMware" to have proper boot log message
Booting paravirtualized kernel on VMware
instead of "... on bare hardware"
2. set pv_cpu_ops.io_delay() to empty function - paravirt_nop() to
avoid vm-exits on IO delays.
Signed-off-by: Alexey Makhalov <amakhalov at vmware.com>
Acked-by: Alok N Kataria <akataria at vmware.com>
---
arch/x86/kernel/cpu/vmware.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 480790f..e3fb320 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -61,6 +61,16 @@ static unsigned long vmware_get_tsc_khz(void)
return vmware_tsc_khz;
}
+#ifdef CONFIG_PARAVIRT
+static void __init vmware_paravirt_ops_setup(void)
+{
+ pv_info.name = "VMware";
+ pv_cpu_ops.io_delay = paravirt_nop;
+}
+#else
+#define vmware_paravirt_ops_setup() do {} while (0)
+#endif
+
static void __init vmware_platform_setup(void)
{
uint32_t eax, ebx, ecx, edx;
@@ -94,6 +104,8 @@ static void __init vmware_platform_setup(void)
} else {
pr_warn("Failed to get TSC freq from the hypervisor\n");
}
+
+ vmware_paravirt_ops_setup();
}
/*
--
2.10.1
Set pv_time_ops.sched_clock to vmware_sched_clock(). It is simplified
version of native_sched_clock() without ring buffer of mult/shift/offset
triplets and preempt toggling.
Since VMware hypervisor provides constant tsc we can use constant
mult/shift/offset triplet calculated at boot time.
no-vmw-sched-clock kernel parameter is added to switch back to the
native_sched_clock() implementation.
Signed-off-by: Alexey Makhalov <amakhalov at vmware.com>
Acked-by: Alok N Kataria <akataria at vmware.com>
---
Documentation/kernel-parameters.txt | 4 ++++
arch/x86/kernel/cpu/vmware.c | 38 +++++++++++++++++++++++++++++++++++++
2 files changed, 42 insertions(+)
diff --git a/Documentation/kernel-parameters.txt
b/Documentation/kernel-parameters.txt
index 37babf9..b3b2ec0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2754,6 +2754,10 @@ bytes respectively. Such letter suffixes can also be
entirely omitted.
no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
fault handling.
+ no-vmw-sched-clock
+ [X86,PV_OPS] Disable paravirtualized VMware scheduler
+ clock and use the default one.
+
no-steal-acc [X86,KVM] Disable paravirtualized steal time accounting.
steal time is computed, but won't influence scheduler
behaviour
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index e3fb320..6ef22c1 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -24,10 +24,12 @@
#include <linux/dmi.h>
#include <linux/init.h>
#include <linux/export.h>
+#include <linux/clocksource.h>
#include <asm/div64.h>
#include <asm/x86_init.h>
#include <asm/hypervisor.h>
#include <asm/apic.h>
+#include <asm/timer.h>
#define CPUID_VMWARE_INFO_LEAF 0x40000000
#define VMWARE_HYPERVISOR_MAGIC 0x564D5868
@@ -62,10 +64,46 @@ static unsigned long vmware_get_tsc_khz(void)
}
#ifdef CONFIG_PARAVIRT
+static struct cyc2ns_data vmware_cyc2ns __ro_after_init;
+
+static int vmw_sched_clock __initdata = 1;
+static __init int setup_vmw_sched_clock(char *s)
+{
+ vmw_sched_clock = 0;
+ return 0;
+}
+early_param("no-vmw-sched-clock", setup_vmw_sched_clock);
+
+static unsigned long long vmware_sched_clock(void)
+{
+ unsigned long long ns;
+
+ ns = mul_u64_u32_shr(rdtsc(), vmware_cyc2ns.cyc2ns_mul,
+ vmware_cyc2ns.cyc2ns_shift);
+ ns -= vmware_cyc2ns.cyc2ns_offset;
+ return ns;
+}
+
static void __init vmware_paravirt_ops_setup(void)
{
pv_info.name = "VMware";
pv_cpu_ops.io_delay = paravirt_nop;
+
+ if (vmware_tsc_khz && vmw_sched_clock) {
+ unsigned long long tsc_now = rdtsc();
+
+ clocks_calc_mult_shift(&vmware_cyc2ns.cyc2ns_mul,
+ &vmware_cyc2ns.cyc2ns_shift,
+ vmware_tsc_khz,
+ NSEC_PER_MSEC, 0);
+ vmware_cyc2ns.cyc2ns_offset + mul_u64_u32_shr(tsc_now,
vmware_cyc2ns.cyc2ns_mul,
+ vmware_cyc2ns.cyc2ns_shift);
+
+ pv_time_ops.sched_clock = vmware_sched_clock;
+ pr_info("vmware: using sched offset of %llu ns\n",
+ vmware_cyc2ns.cyc2ns_offset);
+ }
}
#else
#define vmware_paravirt_ops_setup() do {} while (0)
--
2.10.1
On Tue, 25 Oct 2016, Alexey Makhalov wrote:> no-vmw-sched-clock kernel parameter is added to switch back to the > native_sched_clock() implementation.You are not switching back. The parameter is used to disable the paravirt sched clock.> #ifdef CONFIG_PARAVIRT > +static struct cyc2ns_data vmware_cyc2ns __ro_after_init; > + > +static int vmw_sched_clock __initdata = 1; > +static __init int setup_vmw_sched_clock(char *s)Please stop glueing a variable to a function w/o a new line between them. It's just stopping the reading flow.> +{ > + vmw_sched_clock = 0; > + return 0; > +} > +early_param("no-vmw-sched-clock", setup_vmw_sched_clock); > + > +static unsigned long long vmware_sched_clock(void) > +{ > + unsigned long long ns; > + > + ns = mul_u64_u32_shr(rdtsc(), vmware_cyc2ns.cyc2ns_mul, > + vmware_cyc2ns.cyc2ns_shift); > + ns -= vmware_cyc2ns.cyc2ns_offset; > + return ns; > +} > + > static void __init vmware_paravirt_ops_setup(void) > { > pv_info.name = "VMware"; > pv_cpu_ops.io_delay = paravirt_nop; > + > + if (vmware_tsc_khz && vmw_sched_clock) { > + unsigned long long tsc_now = rdtsc(); > + > + clocks_calc_mult_shift(&vmware_cyc2ns.cyc2ns_mul, > + &vmware_cyc2ns.cyc2ns_shift, > + vmware_tsc_khz, > + NSEC_PER_MSEC, 0); > + vmware_cyc2ns.cyc2ns_offset > + mul_u64_u32_shr(tsc_now, vmware_cyc2ns.cyc2ns_mul, > + vmware_cyc2ns.cyc2ns_shift); > + > + pv_time_ops.sched_clock = vmware_sched_clock; > + pr_info("vmware: using sched offset of %llu ns\n",Please use pr_fmt instead of adding the prefix to every print. Thanks, tglx
I believe our trademark guidelines say we aren't supposed to use VMware as a noun to mean a product, only to mean the company. So we can say "running on VMware ESXi" or "running in a VMware virtual machine", but "running on VMware" is wrong. There is supposedly some good legal reason for this related to keeping our trademark. On Tue, 25 Oct 2016 22:26:00 -0700, Alexey Makhalov <amakhalov at vmware.com> wrote:> Add basic paravirt support: > 1. set pv_info.name to "VMware" to have proper boot log message > Booting paravirtualized kernel on VMware > instead of "... on bare hardware" > 2. set pv_cpu_ops.io_delay() to empty function - paravirt_nop() to > avoid vm-exits on IO delays. > > Signed-off-by: Alexey Makhalov <amakhalov at vmware.com> > Acked-by: Alok N Kataria <akataria at vmware.com> > --- > arch/x86/kernel/cpu/vmware.c | 12 ++++++++++++ > 1 file changed, 12 insertions(+) > > diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c > index 480790f..e3fb320 100644 > --- a/arch/x86/kernel/cpu/vmware.c > +++ b/arch/x86/kernel/cpu/vmware.c > @@ -61,6 +61,16 @@ static unsigned long vmware_get_tsc_khz(void) > return vmware_tsc_khz; > } > > +#ifdef CONFIG_PARAVIRT > +static void __init vmware_paravirt_ops_setup(void) > +{ > + pv_info.name = "VMware"; > + pv_cpu_ops.io_delay = paravirt_nop; > +} > +#else > +#define vmware_paravirt_ops_setup() do {} while (0) > +#endif > + > static void __init vmware_platform_setup(void) > { > uint32_t eax, ebx, ecx, edx; > @@ -94,6 +104,8 @@ static void __init vmware_platform_setup(void) > } else { > pr_warn("Failed to get TSC freq from the hypervisor\n"); > } > + > + vmware_paravirt_ops_setup(); > } > > /*-- Tim Mann | work: mann at vmware.com home: tim at tim-mann.org VMware Sr. Staff Engineer | http://www.vmware.com http://tim-mann.org
Seemingly Similar Threads
- [PATCH 0/3] x86/vmware guest improvements
- [RESEND PATCH 1/3] x86/vmware: Use tsc_khz value for calibrate_cpu()
- [RESEND PATCH 1/3] x86/vmware: Use tsc_khz value for calibrate_cpu()
- [PATCH v3 0/3] x86/vmware guest improvements
- [PATCH v3 0/3] x86/vmware guest improvements