With virt_spin_lock() being a pvops function the bare metal case can be optimized by patching the call away completely. In case a kernel running as a guest it can decide whether to use paravitualized spinlocks, the current fallback to the unfair test-and-set scheme, or to mimic the bare metal behavior. Juergen Gross (4): paravirt: add generic _paravirt_false() function paravirt: switch vcpu_is_preempted to use _paravirt_false() on bare metal paravirt: add virt_spin_lock pvops function paravirt,xen: correct xen_nopvspin case arch/x86/include/asm/paravirt.h | 5 ++++ arch/x86/include/asm/paravirt_types.h | 3 +++ arch/x86/include/asm/qspinlock.h | 48 ++++++++++++++++++++++++----------- arch/x86/kernel/paravirt-spinlocks.c | 22 ++++++++-------- arch/x86/kernel/paravirt.c | 7 +++++ arch/x86/kernel/paravirt_patch_32.c | 18 ++++++------- arch/x86/kernel/paravirt_patch_64.c | 17 +++++-------- arch/x86/kernel/smpboot.c | 2 ++ arch/x86/xen/spinlock.c | 2 ++ 9 files changed, 79 insertions(+), 45 deletions(-) -- 2.12.3
Juergen Gross
2017-Sep-05 13:24 UTC
[PATCH 1/4] paravirt: add generic _paravirt_false() function
Add a _paravirt_false() default function returning always false which can be used for cases where a boolean pvops replacement should just say "no". Signed-off-by: Juergen Gross <jgross at suse.com> --- arch/x86/include/asm/paravirt_types.h | 2 ++ arch/x86/kernel/paravirt.c | 7 +++++++ arch/x86/kernel/paravirt_patch_32.c | 8 ++++++++ arch/x86/kernel/paravirt_patch_64.c | 7 +++++++ 4 files changed, 24 insertions(+) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 6b64fc6367f2..19efefc0e27e 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -377,6 +377,7 @@ extern struct pv_lock_ops pv_lock_ops; unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); +unsigned paravirt_patch_false(void *insnbuf, unsigned len); unsigned paravirt_patch_call(void *insnbuf, const void *target, u16 tgt_clobbers, unsigned long addr, u16 site_clobbers, @@ -682,6 +683,7 @@ void paravirt_flush_lazy_mmu(void); void _paravirt_nop(void); u32 _paravirt_ident_32(u32); u64 _paravirt_ident_64(u64); +bool _paravirt_false(void); #define paravirt_nop ((void *)_paravirt_nop) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index a14df9eecfed..94105773c00c 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -66,6 +66,11 @@ u64 notrace _paravirt_ident_64(u64 x) return x; } +bool notrace _paravirt_false(void) +{ + return false; +} + void __init default_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", @@ -149,6 +154,8 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, ret = paravirt_patch_ident_32(insnbuf, len); else if (opfunc == _paravirt_ident_64) ret = paravirt_patch_ident_64(insnbuf, len); + else if (opfunc == _paravirt_false) + ret = paravirt_patch_false(insnbuf, len); else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 553acbbb4d32..287c7b9735de 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -9,6 +9,8 @@ DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); +DEF_NATIVE(, xor, "xor %eax, %eax"); + #if defined(CONFIG_PARAVIRT_SPINLOCKS) DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax"); @@ -26,6 +28,12 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) return 0; } +unsigned paravirt_patch_false(void *insnbuf, unsigned len) +{ + return paravirt_patch_insns(insnbuf, len, + start__xor, end__xor); +} + extern bool pv_is_native_spin_unlock(void); extern bool pv_is_native_vcpu_is_preempted(void); diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 11aaf1eaa0e4..8ab4379ceea9 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -17,6 +17,7 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); DEF_NATIVE(, mov32, "mov %edi, %eax"); DEF_NATIVE(, mov64, "mov %rdi, %rax"); +DEF_NATIVE(, xor, "xor %rax, %rax"); #if defined(CONFIG_PARAVIRT_SPINLOCKS) DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)"); @@ -35,6 +36,12 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) start__mov64, end__mov64); } +unsigned paravirt_patch_false(void *insnbuf, unsigned len) +{ + return paravirt_patch_insns(insnbuf, len, + start__xor, end__xor); +} + extern bool pv_is_native_spin_unlock(void); extern bool pv_is_native_vcpu_is_preempted(void); -- 2.12.3
Juergen Gross
2017-Sep-05 13:24 UTC
[PATCH 2/4] paravirt: switch vcpu_is_preempted to use _paravirt_false() on bare metal
Instead of special casing pv_lock_ops.vcpu_is_preempted when patching use _paravirt_false() on bare metal. Signed-off-by: Juergen Gross <jgross at suse.com> --- arch/x86/kernel/paravirt-spinlocks.c | 14 +------------- arch/x86/kernel/paravirt_patch_32.c | 10 ---------- arch/x86/kernel/paravirt_patch_64.c | 10 ---------- 3 files changed, 1 insertion(+), 33 deletions(-) diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 8f2d1c9d43a8..26e4bd92f309 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -20,25 +20,13 @@ bool pv_is_native_spin_unlock(void) __raw_callee_save___native_queued_spin_unlock; } -__visible bool __native_vcpu_is_preempted(long cpu) -{ - return false; -} -PV_CALLEE_SAVE_REGS_THUNK(__native_vcpu_is_preempted); - -bool pv_is_native_vcpu_is_preempted(void) -{ - return pv_lock_ops.vcpu_is_preempted.func =- __raw_callee_save___native_vcpu_is_preempted; -} - struct pv_lock_ops pv_lock_ops = { #ifdef CONFIG_SMP .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock), .wait = paravirt_nop, .kick = paravirt_nop, - .vcpu_is_preempted = PV_CALLEE_SAVE(__native_vcpu_is_preempted), + .vcpu_is_preempted = __PV_IS_CALLEE_SAVE(_paravirt_false), #endif /* SMP */ }; EXPORT_SYMBOL(pv_lock_ops); diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 287c7b9735de..ea311a3563e3 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -13,7 +13,6 @@ DEF_NATIVE(, xor, "xor %eax, %eax"); #if defined(CONFIG_PARAVIRT_SPINLOCKS) DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); -DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax"); #endif unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) @@ -35,7 +34,6 @@ unsigned paravirt_patch_false(void *insnbuf, unsigned len) } extern bool pv_is_native_spin_unlock(void); -extern bool pv_is_native_vcpu_is_preempted(void); unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) @@ -65,14 +63,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, goto patch_site; } goto patch_default; - - case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted): - if (pv_is_native_vcpu_is_preempted()) { - start = start_pv_lock_ops_vcpu_is_preempted; - end = end_pv_lock_ops_vcpu_is_preempted; - goto patch_site; - } - goto patch_default; #endif default: diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 8ab4379ceea9..64dffe4499b4 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -21,7 +21,6 @@ DEF_NATIVE(, xor, "xor %rax, %rax"); #if defined(CONFIG_PARAVIRT_SPINLOCKS) DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)"); -DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %rax, %rax"); #endif unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) @@ -43,7 +42,6 @@ unsigned paravirt_patch_false(void *insnbuf, unsigned len) } extern bool pv_is_native_spin_unlock(void); -extern bool pv_is_native_vcpu_is_preempted(void); unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) @@ -76,14 +74,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, goto patch_site; } goto patch_default; - - case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted): - if (pv_is_native_vcpu_is_preempted()) { - start = start_pv_lock_ops_vcpu_is_preempted; - end = end_pv_lock_ops_vcpu_is_preempted; - goto patch_site; - } - goto patch_default; #endif default: -- 2.12.3
Juergen Gross
2017-Sep-05 13:24 UTC
[PATCH 3/4] paravirt: add virt_spin_lock pvops function
There are cases where a guest tries to switch spinlocks to bare metal behavior (e.g. by setting "xen_nopvspin" boot parameter). Today this has the downside of falling back to unfair test and set scheme for qspinlocks due to virt_spin_lock() detecting the virtualized environment. Make virt_spin_lock() a paravirt operation in order to enable users to select an explicit behavior like bare metal. Signed-off-by: Juergen Gross <jgross at suse.com> --- arch/x86/include/asm/paravirt.h | 5 ++++ arch/x86/include/asm/paravirt_types.h | 1 + arch/x86/include/asm/qspinlock.h | 48 ++++++++++++++++++++++++----------- arch/x86/kernel/paravirt-spinlocks.c | 14 ++++++++++ arch/x86/kernel/smpboot.c | 2 ++ 5 files changed, 55 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index c25dd22f7c70..d9e954fb37df 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -725,6 +725,11 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu) return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); } +static __always_inline bool pv_virt_spin_lock(struct qspinlock *lock) +{ + return PVOP_CALLEE1(bool, pv_lock_ops.virt_spin_lock, lock); +} + #endif /* SMP && PARAVIRT_SPINLOCKS */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 19efefc0e27e..928f5e7953a7 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -319,6 +319,7 @@ struct pv_lock_ops { void (*kick)(int cpu); struct paravirt_callee_save vcpu_is_preempted; + struct paravirt_callee_save virt_spin_lock; } __no_randomize_layout; /* This contains all the paravirt structures: we get a convenient diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 48a706f641f2..fbd98896385c 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -17,6 +17,25 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock) smp_store_release((u8 *)lock, 0); } +static inline bool native_virt_spin_lock(struct qspinlock *lock) +{ + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) + return false; + + /* + * On hypervisors without PARAVIRT_SPINLOCKS support we fall + * back to a Test-and-Set spinlock, because fair locks have + * horrible lock 'holder' preemption issues. + */ + + do { + while (atomic_read(&lock->val) != 0) + cpu_relax(); + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); + + return true; +} + #ifdef CONFIG_PARAVIRT_SPINLOCKS extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); extern void __pv_init_lock_hash(void); @@ -38,33 +57,32 @@ static inline bool vcpu_is_preempted(long cpu) { return pv_vcpu_is_preempted(cpu); } + +void native_pv_lock_init(void) __init; #else static inline void queued_spin_unlock(struct qspinlock *lock) { native_queued_spin_unlock(lock); } + +static inline void native_pv_lock_init(void) +{ +} #endif #ifdef CONFIG_PARAVIRT #define virt_spin_lock virt_spin_lock +#ifdef CONFIG_PARAVIRT_SPINLOCKS static inline bool virt_spin_lock(struct qspinlock *lock) { - if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) - return false; - - /* - * On hypervisors without PARAVIRT_SPINLOCKS support we fall - * back to a Test-and-Set spinlock, because fair locks have - * horrible lock 'holder' preemption issues. - */ - - do { - while (atomic_read(&lock->val) != 0) - cpu_relax(); - } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); - - return true; + return pv_virt_spin_lock(lock); +} +#else +static inline bool virt_spin_lock(struct qspinlock *lock) +{ + return native_virt_spin_lock(lock); } +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ #endif /* CONFIG_PARAVIRT */ #include <asm-generic/qspinlock.h> diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 26e4bd92f309..1be187ef8a38 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -20,6 +20,12 @@ bool pv_is_native_spin_unlock(void) __raw_callee_save___native_queued_spin_unlock; } +__visible bool __native_virt_spin_lock(struct qspinlock *lock) +{ + return native_virt_spin_lock(lock); +} +PV_CALLEE_SAVE_REGS_THUNK(__native_virt_spin_lock); + struct pv_lock_ops pv_lock_ops = { #ifdef CONFIG_SMP .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, @@ -27,6 +33,14 @@ struct pv_lock_ops pv_lock_ops = { .wait = paravirt_nop, .kick = paravirt_nop, .vcpu_is_preempted = __PV_IS_CALLEE_SAVE(_paravirt_false), + .virt_spin_lock = PV_CALLEE_SAVE(__native_virt_spin_lock), #endif /* SMP */ }; EXPORT_SYMBOL(pv_lock_ops); + +void __init native_pv_lock_init(void) +{ + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) + pv_lock_ops.virt_spin_lock + __PV_IS_CALLEE_SAVE(_paravirt_false); +} diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 54b9e89d4d6b..21500d3ba359 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -77,6 +77,7 @@ #include <asm/i8259.h> #include <asm/realmode.h> #include <asm/misc.h> +#include <asm/qspinlock.h> /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -1381,6 +1382,7 @@ void __init native_smp_prepare_boot_cpu(void) /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); cpu_set_state_online(me); + native_pv_lock_init(); } void __init native_smp_cpus_done(unsigned int max_cpus) -- 2.12.3
With the boot parameter "xen_nopvspin" specified a Xen guest should not make use of paravirt spinlocks, but behave as if running on bare metal. This is not true, however, as the qspinlock code will fall back to a test-and-set scheme when it is detecting a hypervisor. In order to avoid this set the virt_spin_lock pvops function to _paravirt_false() in case xen_nopvspin has been specified. Signed-off-by: Juergen Gross <jgross at suse.com> --- arch/x86/xen/spinlock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 25a7c4302ce7..c01cedfa745d 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -129,6 +129,8 @@ void __init xen_init_spinlocks(void) if (!xen_pvspin) { printk(KERN_DEBUG "xen: PV spinlocks disabled\n"); + pv_lock_ops.virt_spin_lock + __PV_IS_CALLEE_SAVE(_paravirt_false); return; } printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); -- 2.12.3
Peter Zijlstra
2017-Sep-05 13:55 UTC
[PATCH 3/4] paravirt: add virt_spin_lock pvops function
On Tue, Sep 05, 2017 at 03:24:43PM +0200, Juergen Gross wrote:> diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h > index 48a706f641f2..fbd98896385c 100644 > --- a/arch/x86/include/asm/qspinlock.h > +++ b/arch/x86/include/asm/qspinlock.h > @@ -17,6 +17,25 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock) > smp_store_release((u8 *)lock, 0); > } >Should this not have: #ifdef CONFIG_PARAVIRT ?> +static inline bool native_virt_spin_lock(struct qspinlock *lock) > +{ > + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) > + return false; > + > + /* > + * On hypervisors without PARAVIRT_SPINLOCKS support we fall > + * back to a Test-and-Set spinlock, because fair locks have > + * horrible lock 'holder' preemption issues. > + */ > + > + do { > + while (atomic_read(&lock->val) != 0) > + cpu_relax(); > + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); > + > + return true; > +}#endif> + > #ifdef CONFIG_PARAVIRT_SPINLOCKS > extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); > extern void __pv_init_lock_hash(void);> #ifdef CONFIG_PARAVIRT > #define virt_spin_lock virt_spin_lock > +#ifdef CONFIG_PARAVIRT_SPINLOCKS > static inline bool virt_spin_lock(struct qspinlock *lock) > { > + return pv_virt_spin_lock(lock); > +} > +#else > +static inline bool virt_spin_lock(struct qspinlock *lock) > +{ > + return native_virt_spin_lock(lock); > } > +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ > #endif /* CONFIG_PARAVIRT */Because I think the above only ever uses native_virt_spin_lock() when PARAVIRT.> @@ -1381,6 +1382,7 @@ void __init native_smp_prepare_boot_cpu(void) > /* already set me in cpu_online_mask in boot_cpu_init() */ > cpumask_set_cpu(me, cpu_callout_mask); > cpu_set_state_online(me); > + native_pv_lock_init(); > }Aah, this is where that goes.. OK that works too.
On 09/05/2017 09:24 AM, Juergen Gross wrote:> There are cases where a guest tries to switch spinlocks to bare metal > behavior (e.g. by setting "xen_nopvspin" boot parameter). Today this > has the downside of falling back to unfair test and set scheme for > qspinlocks due to virt_spin_lock() detecting the virtualized > environment. > > Make virt_spin_lock() a paravirt operation in order to enable users > to select an explicit behavior like bare metal. > > Signed-off-by: Juergen Gross <jgross at suse.com> > --- > arch/x86/include/asm/paravirt.h | 5 ++++ > arch/x86/include/asm/paravirt_types.h | 1 + > arch/x86/include/asm/qspinlock.h | 48 ++++++++++++++++++++++++----------- > arch/x86/kernel/paravirt-spinlocks.c | 14 ++++++++++ > arch/x86/kernel/smpboot.c | 2 ++ > 5 files changed, 55 insertions(+), 15 deletions(-) > > diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h > index c25dd22f7c70..d9e954fb37df 100644 > --- a/arch/x86/include/asm/paravirt.h > +++ b/arch/x86/include/asm/paravirt.h > @@ -725,6 +725,11 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu) > return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); > } > > +static __always_inline bool pv_virt_spin_lock(struct qspinlock *lock) > +{ > + return PVOP_CALLEE1(bool, pv_lock_ops.virt_spin_lock, lock); > +} > + > #endif /* SMP && PARAVIRT_SPINLOCKS */ > > #ifdef CONFIG_X86_32 > diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h > index 19efefc0e27e..928f5e7953a7 100644 > --- a/arch/x86/include/asm/paravirt_types.h > +++ b/arch/x86/include/asm/paravirt_types.h > @@ -319,6 +319,7 @@ struct pv_lock_ops { > void (*kick)(int cpu); > > struct paravirt_callee_save vcpu_is_preempted; > + struct paravirt_callee_save virt_spin_lock; > } __no_randomize_layout; > > /* This contains all the paravirt structures: we get a convenient > diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h > index 48a706f641f2..fbd98896385c 100644 > --- a/arch/x86/include/asm/qspinlock.h > +++ b/arch/x86/include/asm/qspinlock.h > @@ -17,6 +17,25 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock) > smp_store_release((u8 *)lock, 0); > } > > +static inline bool native_virt_spin_lock(struct qspinlock *lock) > +{ > + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) > + return false; > +I think you can take the above if statement out as you has done test in native_pv_lock_init(). So the test will also be false here. As this patch series is x86 specific, you should probably add "x86/" in front of paravirt in the patch titles. Cheers, Longman
On 09/05/2017 09:24 AM, Juergen Gross wrote:> There are cases where a guest tries to switch spinlocks to bare metal > behavior (e.g. by setting "xen_nopvspin" boot parameter). Today this > has the downside of falling back to unfair test and set scheme for > qspinlocks due to virt_spin_lock() detecting the virtualized > environment. > > Make virt_spin_lock() a paravirt operation in order to enable users > to select an explicit behavior like bare metal. > > Signed-off-by: Juergen Gross <jgross at suse.com> > --- > arch/x86/include/asm/paravirt.h | 5 ++++ > arch/x86/include/asm/paravirt_types.h | 1 + > arch/x86/include/asm/qspinlock.h | 48 ++++++++++++++++++++++++----------- > arch/x86/kernel/paravirt-spinlocks.c | 14 ++++++++++ > arch/x86/kernel/smpboot.c | 2 ++ > 5 files changed, 55 insertions(+), 15 deletions(-) > > diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h > index c25dd22f7c70..d9e954fb37df 100644 > --- a/arch/x86/include/asm/paravirt.h > +++ b/arch/x86/include/asm/paravirt.h > @@ -725,6 +725,11 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu) > return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); > } > > +static __always_inline bool pv_virt_spin_lock(struct qspinlock *lock) > +{ > + return PVOP_CALLEE1(bool, pv_lock_ops.virt_spin_lock, lock); > +} > + > #endif /* SMP && PARAVIRT_SPINLOCKS */ > > #ifdef CONFIG_X86_32 > diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h > index 19efefc0e27e..928f5e7953a7 100644 > --- a/arch/x86/include/asm/paravirt_types.h > +++ b/arch/x86/include/asm/paravirt_types.h > @@ -319,6 +319,7 @@ struct pv_lock_ops { > void (*kick)(int cpu); > > struct paravirt_callee_save vcpu_is_preempted; > + struct paravirt_callee_save virt_spin_lock; > } __no_randomize_layout; > > /* This contains all the paravirt structures: we get a convenient > diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h > index 48a706f641f2..fbd98896385c 100644 > --- a/arch/x86/include/asm/qspinlock.h > +++ b/arch/x86/include/asm/qspinlock.h > @@ -17,6 +17,25 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock) > smp_store_release((u8 *)lock, 0); > } > > +static inline bool native_virt_spin_lock(struct qspinlock *lock) > +{ > + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) > + return false; > + > + /* > + * On hypervisors without PARAVIRT_SPINLOCKS support we fall > + * back to a Test-and-Set spinlock, because fair locks have > + * horrible lock 'holder' preemption issues. > + */ > + > + do { > + while (atomic_read(&lock->val) != 0) > + cpu_relax(); > + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); > + > + return true; > +} > + > #ifdef CONFIG_PARAVIRT_SPINLOCKS > extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); > extern void __pv_init_lock_hash(void); > @@ -38,33 +57,32 @@ static inline bool vcpu_is_preempted(long cpu) > { > return pv_vcpu_is_preempted(cpu); > } > + > +void native_pv_lock_init(void) __init; > #else > static inline void queued_spin_unlock(struct qspinlock *lock) > { > native_queued_spin_unlock(lock); > } > + > +static inline void native_pv_lock_init(void) > +{ > +} > #endif > > #ifdef CONFIG_PARAVIRT > #define virt_spin_lock virt_spin_lock > +#ifdef CONFIG_PARAVIRT_SPINLOCKS > static inline bool virt_spin_lock(struct qspinlock *lock) > { > - if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) > - return false;Have you consider just add one more jump label here to skip virt_spin_lock when KVM or Xen want to do so?> - > - /* > - * On hypervisors without PARAVIRT_SPINLOCKS support we fall > - * back to a Test-and-Set spinlock, because fair locks have > - * horrible lock 'holder' preemption issues. > - */ > - > - do { > - while (atomic_read(&lock->val) != 0) > - cpu_relax(); > - } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); > - > - return true; > + return pv_virt_spin_lock(lock); > +} > +#else > +static inline bool virt_spin_lock(struct qspinlock *lock) > +{ > + return native_virt_spin_lock(lock); > } > +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ > #endif /* CONFIG_PARAVIRT */ > > #include <asm-generic/qspinlock.h> > diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c > index 26e4bd92f309..1be187ef8a38 100644 > --- a/arch/x86/kernel/paravirt-spinlocks.c > +++ b/arch/x86/kernel/paravirt-spinlocks.c > @@ -20,6 +20,12 @@ bool pv_is_native_spin_unlock(void) > __raw_callee_save___native_queued_spin_unlock; > } > > +__visible bool __native_virt_spin_lock(struct qspinlock *lock) > +{ > + return native_virt_spin_lock(lock); > +} > +PV_CALLEE_SAVE_REGS_THUNK(__native_virt_spin_lock);I have some concern about the overhead of register saving/restoring have on spin lock performance in case the kernel is under a non-KVM/Xen hypervisor. Cheers, Longman
Possibly Parallel Threads
- [PATCH 3/4] paravirt: add virt_spin_lock pvops function
- [PATCH 0/4] make virt_spin_lock() a pvops function
- [PATCH 0/4] make virt_spin_lock() a pvops function
- [PATCH 3/4] paravirt: add virt_spin_lock pvops function
- [PATCH 3/4] paravirt: add virt_spin_lock pvops function