Pan Xinhui
2016-May-25 08:18 UTC
[PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention
change from v2: __spin_yeild_cpu() will yield slices to lpar if target cpu is running. remove unnecessary rmb() in __spin_yield/wake_cpu. __pv_wait() will check the *ptr == val. some commit message change change fome v1: separate into 6 pathes from one patch some minor code changes. I do several tests on pseries IBM,8408-E8E with 32cpus, 64GB memory. benchmark test results are below. 2 perf tests: perf bench futex hash perf bench futex lock-pi _____test________________spinlcok______________pv-qspinlcok_____ |futex hash | 556370 ops | 629634 ops | |futex lock-pi | 362 ops | 367 ops | scheduler test: Test how many loops of schedule() can finish within 10 seconds on all cpus. _____test________________spinlcok______________pv-qspinlcok_____ |schedule() loops| 322811921 | 311449290 | kernel compiling test: build a linux kernel image to see how long it took _____test________________spinlcok______________pv-qspinlcok_____ | compiling takes| 22m | 22m | Pan Xinhui (6): qspinlock: powerpc support qspinlock powerpc: pseries/Kconfig: Add qspinlock build config powerpc: lib/locks.c: Add cpu yield/wake helper function pv-qspinlock: powerpc support pv-qspinlock pv-qspinlock: use cmpxchg_release in __pv_queued_spin_unlock powerpc: pseries: Add pv-qspinlock build config/make arch/powerpc/include/asm/qspinlock.h | 39 +++++++++++++++++++ arch/powerpc/include/asm/qspinlock_paravirt.h | 38 ++++++++++++++++++ .../powerpc/include/asm/qspinlock_paravirt_types.h | 13 +++++++ arch/powerpc/include/asm/spinlock.h | 31 +++++++++------ arch/powerpc/include/asm/spinlock_types.h | 4 ++ arch/powerpc/kernel/Makefile | 1 + arch/powerpc/kernel/paravirt.c | 45 ++++++++++++++++++++++ arch/powerpc/lib/locks.c | 37 ++++++++++++++++++ arch/powerpc/platforms/pseries/Kconfig | 9 +++++ arch/powerpc/platforms/pseries/setup.c | 5 +++ kernel/locking/qspinlock_paravirt.h | 2 +- 11 files changed, 211 insertions(+), 13 deletions(-) create mode 100644 arch/powerpc/include/asm/qspinlock.h create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt.h create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt_types.h create mode 100644 arch/powerpc/kernel/paravirt.c -- 1.9.1
Base code to enable qspinlock on powerpc. this patch add some #ifdef here and there. Although there is no paravirt related code, we can successfully build a qspinlock kernel after apply this patch. Signed-off-by: Pan Xinhui <xinhui.pan at linux.vnet.ibm.com> --- arch/powerpc/include/asm/qspinlock.h | 22 ++++++++++++++++++++++ arch/powerpc/include/asm/spinlock.h | 27 +++++++++++++++------------ arch/powerpc/include/asm/spinlock_types.h | 4 ++++ arch/powerpc/lib/locks.c | 4 ++++ 4 files changed, 45 insertions(+), 12 deletions(-) create mode 100644 arch/powerpc/include/asm/qspinlock.h diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h new file mode 100644 index 0000000..5883954 --- /dev/null +++ b/arch/powerpc/include/asm/qspinlock.h @@ -0,0 +1,22 @@ +#ifndef _ASM_POWERPC_QSPINLOCK_H +#define _ASM_POWERPC_QSPINLOCK_H + +#include <asm-generic/qspinlock_types.h> + +#define SPIN_THRESHOLD (1 << 15) +#define queued_spin_unlock queued_spin_unlock + +static inline void native_queued_spin_unlock(struct qspinlock *lock) +{ + /* no load/store can be across the unlock()*/ + smp_store_release((u8 *)lock, 0); +} + +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + native_queued_spin_unlock(lock); +} + +#include <asm-generic/qspinlock.h> + +#endif /* _ASM_POWERPC_QSPINLOCK_H */ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 523673d..4359ee6 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -52,6 +52,20 @@ #define SYNC_IO #endif +#if defined(CONFIG_PPC_SPLPAR) +/* We only yield to the hypervisor if we are in shared processor mode */ +#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr)) +extern void __spin_yield(arch_spinlock_t *lock); +extern void __rw_yield(arch_rwlock_t *lock); +#else /* SPLPAR */ +#define __spin_yield(x) barrier() +#define __rw_yield(x) barrier() +#define SHARED_PROCESSOR 0 +#endif + +#ifdef CONFIG_QUEUED_SPINLOCKS +#include <asm/qspinlock.h> +#else static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) { return lock.slock == 0; @@ -106,18 +120,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) * held. Conveniently, we have a word in the paca that holds this * value. */ - -#if defined(CONFIG_PPC_SPLPAR) -/* We only yield to the hypervisor if we are in shared processor mode */ -#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr)) -extern void __spin_yield(arch_spinlock_t *lock); -extern void __rw_yield(arch_rwlock_t *lock); -#else /* SPLPAR */ -#define __spin_yield(x) barrier() -#define __rw_yield(x) barrier() -#define SHARED_PROCESSOR 0 -#endif - static inline void arch_spin_lock(arch_spinlock_t *lock) { CLEAR_IO_SYNC; @@ -169,6 +171,7 @@ extern void arch_spin_unlock_wait(arch_spinlock_t *lock); do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) #endif +#endif /* !CONFIG_QUEUED_SPINLOCKS */ /* * Read-write spinlocks, allowing multiple readers * but only one writer. diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h index 2351adc..bd7144e 100644 --- a/arch/powerpc/include/asm/spinlock_types.h +++ b/arch/powerpc/include/asm/spinlock_types.h @@ -5,11 +5,15 @@ # error "please don't include this file directly" #endif +#ifdef CONFIG_QUEUED_SPINLOCKS +#include <asm-generic/qspinlock_types.h> +#else typedef struct { volatile unsigned int slock; } arch_spinlock_t; #define __ARCH_SPIN_LOCK_UNLOCKED { 0 } +#endif typedef struct { volatile signed int lock; diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index f7deebd..a9ebd71 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -23,6 +23,7 @@ #include <asm/hvcall.h> #include <asm/smp.h> +#ifndef CONFIG_QUEUED_SPINLOCKS void __spin_yield(arch_spinlock_t *lock) { unsigned int lock_value, holder_cpu, yield_count; @@ -42,6 +43,7 @@ void __spin_yield(arch_spinlock_t *lock) get_hard_smp_processor_id(holder_cpu), yield_count); } EXPORT_SYMBOL_GPL(__spin_yield); +#endif /* * Waiting for a read lock or a write lock on a rwlock... @@ -69,6 +71,7 @@ void __rw_yield(arch_rwlock_t *rw) } #endif +#ifndef CONFIG_QUEUED_SPINLOCKS void arch_spin_unlock_wait(arch_spinlock_t *lock) { smp_mb(); @@ -84,3 +87,4 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock) } EXPORT_SYMBOL(arch_spin_unlock_wait); +#endif -- 1.9.1
Pan Xinhui
2016-May-25 08:18 UTC
[PATCH v3 2/6] powerpc: pseries/Kconfig: Add qspinlock build config
pseries will use qspinlock by default. Signed-off-by: Pan Xinhui <xinhui.pan at linux.vnet.ibm.com> --- arch/powerpc/platforms/pseries/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index bec90fb..f669323 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -21,6 +21,7 @@ config PPC_PSERIES select HOTPLUG_CPU if SMP select ARCH_RANDOM select PPC_DOORBELL + select ARCH_USE_QUEUED_SPINLOCKS default y config PPC_SPLPAR -- 1.9.1
Pan Xinhui
2016-May-25 08:18 UTC
[PATCH v3 3/6] powerpc: lib/locks.c: Add cpu yield/wake helper function
pv-qspinlock core has pv_wait/pv_kick which will give a better performace by yielding and kicking cpu at some cases. lets support them by adding two corresponding helper functions. Signed-off-by: Pan Xinhui <xinhui.pan at linux.vnet.ibm.com> --- arch/powerpc/include/asm/spinlock.h | 4 ++++ arch/powerpc/lib/locks.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 4359ee6..3b65372 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -56,9 +56,13 @@ /* We only yield to the hypervisor if we are in shared processor mode */ #define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr)) extern void __spin_yield(arch_spinlock_t *lock); +extern void __spin_yield_cpu(int cpu); +extern void __spin_wake_cpu(int cpu); extern void __rw_yield(arch_rwlock_t *lock); #else /* SPLPAR */ #define __spin_yield(x) barrier() +#define __spin_yield_cpu(x) barrier() +#define __spin_wake_cpu(x) barrier() #define __rw_yield(x) barrier() #define SHARED_PROCESSOR 0 #endif diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index a9ebd71..3a58834 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -23,6 +23,39 @@ #include <asm/hvcall.h> #include <asm/smp.h> +void __spin_yield_cpu(int cpu) +{ + unsigned int holder_cpu = cpu, yield_count; + + if (cpu == -1) { + plpar_hcall_norets(H_CONFER, -1, 0); + return; + } + BUG_ON(holder_cpu >= nr_cpu_ids); + yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count); + if ((yield_count & 1) == 0) { + /* if target cpu is running, confer slices to lpar*/ + plpar_hcall_norets(H_CONFER, -1, 0); + return; + } + plpar_hcall_norets(H_CONFER, + get_hard_smp_processor_id(holder_cpu), yield_count); +} +EXPORT_SYMBOL_GPL(__spin_yield_cpu); + +void __spin_wake_cpu(int cpu) +{ + unsigned int holder_cpu = cpu, yield_count; + + BUG_ON(holder_cpu >= nr_cpu_ids); + yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count); + if ((yield_count & 1) == 0) + return; /* virtual cpu is currently running */ + plpar_hcall_norets(H_PROD, + get_hard_smp_processor_id(holder_cpu)); +} +EXPORT_SYMBOL_GPL(__spin_wake_cpu); + #ifndef CONFIG_QUEUED_SPINLOCKS void __spin_yield(arch_spinlock_t *lock) { -- 1.9.1
Pan Xinhui
2016-May-25 08:18 UTC
[PATCH v3 4/6] pv-qspinlock: powerpc support pv-qspinlock
As we need let pv-qspinlock-kernel run on all environment which might have no powervm, we should runtime choose which qspinlock version to use. The default pv-qspinlock use native version. pv_lock initialization should be done in bootstage with irq disabled. And if there is PHYP, restore pv_lock_ops callbacks to pv version. Signed-off-by: Pan Xinhui <xinhui.pan at linux.vnet.ibm.com> --- arch/powerpc/include/asm/qspinlock.h | 17 ++++++++ arch/powerpc/include/asm/qspinlock_paravirt.h | 38 ++++++++++++++++++ .../powerpc/include/asm/qspinlock_paravirt_types.h | 13 +++++++ arch/powerpc/kernel/paravirt.c | 45 ++++++++++++++++++++++ arch/powerpc/platforms/pseries/setup.c | 5 +++ 5 files changed, 118 insertions(+) create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt.h create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt_types.h create mode 100644 arch/powerpc/kernel/paravirt.c diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index 5883954..4728f6e 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -12,10 +12,27 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock) smp_store_release((u8 *)lock, 0); } +#ifdef CONFIG_PARAVIRT_SPINLOCKS + +#define __ARCH_NEED_PV_HASH_LOOKUP + +#include <asm/qspinlock_paravirt.h> + +static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + pv_queued_spin_lock(lock, val); +} + +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + pv_queued_spin_unlock(lock); +} +#else static inline void queued_spin_unlock(struct qspinlock *lock) { native_queued_spin_unlock(lock); } +#endif #include <asm-generic/qspinlock.h> diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h b/arch/powerpc/include/asm/qspinlock_paravirt.h new file mode 100644 index 0000000..cd17a79 --- /dev/null +++ b/arch/powerpc/include/asm/qspinlock_paravirt.h @@ -0,0 +1,38 @@ +#ifndef CONFIG_PARAVIRT_SPINLOCKS +#error "do not include this file" +#endif + +#ifndef _ASM_QSPINLOCK_PARAVIRT_H +#define _ASM_QSPINLOCK_PARAVIRT_H + +#include <asm/qspinlock_paravirt_types.h> + +extern void pv_lock_init(void); +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_init_lock_hash(void); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_queued_spin_unlock(struct qspinlock *lock); + +static inline void pv_queued_spin_lock(struct qspinlock *lock, u32 val) +{ + CLEAR_IO_SYNC; + pv_lock_op.lock(lock, val); +} + +static inline void pv_queued_spin_unlock(struct qspinlock *lock) +{ + SYNC_IO; + pv_lock_op.unlock(lock); +} + +static inline void pv_wait(u8 *ptr, u8 val, int lockcpu) +{ + pv_lock_op.wait(ptr, val, lockcpu); +} + +static inline void pv_kick(int cpu) +{ + pv_lock_op.kick(cpu); +} + +#endif diff --git a/arch/powerpc/include/asm/qspinlock_paravirt_types.h b/arch/powerpc/include/asm/qspinlock_paravirt_types.h new file mode 100644 index 0000000..e1fdeb0 --- /dev/null +++ b/arch/powerpc/include/asm/qspinlock_paravirt_types.h @@ -0,0 +1,13 @@ +#ifndef _ASM_QSPINLOCK_PARAVIRT_TYPES_H +#define _ASM_QSPINLOCK_PARAVIRT_TYPES_H + +struct pv_lock_ops { + void (*lock)(struct qspinlock *lock, u32 val); + void (*unlock)(struct qspinlock *lock); + void (*wait)(u8 *ptr, u8 val, int cpu); + void (*kick)(int cpu); +}; + +extern struct pv_lock_ops pv_lock_op; + +#endif diff --git a/arch/powerpc/kernel/paravirt.c b/arch/powerpc/kernel/paravirt.c new file mode 100644 index 0000000..4f19f7e --- /dev/null +++ b/arch/powerpc/kernel/paravirt.c @@ -0,0 +1,45 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/spinlock.h> + +static void __native_queued_spin_unlock(struct qspinlock *lock) +{ + native_queued_spin_unlock(lock); +} + +static void __pv_wait(u8 *ptr, u8 val, int cpu) +{ + HMT_low(); + if (READ_ONCE(*ptr) == val) + __spin_yield_cpu(cpu); + HMT_medium(); +} + +static void __pv_kick(int cpu) +{ + __spin_wake_cpu(cpu); +} + +struct pv_lock_ops pv_lock_op = { + .lock = native_queued_spin_lock_slowpath, + .unlock = __native_queued_spin_unlock, + .wait = NULL, + .kick = NULL, +}; +EXPORT_SYMBOL(pv_lock_op); + +void __init pv_lock_init(void) +{ + if (SHARED_PROCESSOR) { + __pv_init_lock_hash(); + pv_lock_op.lock = __pv_queued_spin_lock_slowpath; + pv_lock_op.unlock = __pv_queued_spin_unlock; + pv_lock_op.wait = __pv_wait; + pv_lock_op.kick = __pv_kick; + } +} diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 6e944fc..c9f056e 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -547,6 +547,11 @@ static void __init pSeries_setup_arch(void) "%ld\n", rc); } } + +#ifdef CONFIG_PARAVIRT_SPINLOCKS + pv_lock_init(); +#endif + } static int __init pSeries_init_panel(void) -- 1.9.1
Pan Xinhui
2016-May-25 08:18 UTC
[PATCH v3 5/6] pv-qspinlock: use cmpxchg_release in __pv_queued_spin_unlock
cmpxchg_release is light-wight than cmpxchg, we can gain a better performace then. On some arch like ppc, barrier impact the performace too much. Suggested-by: Boqun Feng <boqun.feng at gmail.com> Signed-off-by: Pan Xinhui <xinhui.pan at linux.vnet.ibm.com> --- kernel/locking/qspinlock_paravirt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index a5b1248..2bbffe4 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -614,7 +614,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) * unhash. Otherwise it would be possible to have multiple @lock * entries, which would be BAD. */ - locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0); + locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0); if (likely(locked == _Q_LOCKED_VAL)) return; -- 1.9.1
Pan Xinhui
2016-May-25 08:18 UTC
[PATCH v3 6/6] powerpc: pseries: Add pv-qspinlock build config/make
pseries has PowerVM support, the default option is Y. Signed-off-by: Pan Xinhui <xinhui.pan at linux.vnet.ibm.com> --- arch/powerpc/kernel/Makefile | 1 + arch/powerpc/platforms/pseries/Kconfig | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 2da380f..ae7c2f1 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -50,6 +50,7 @@ obj-$(CONFIG_PPC_970_NAP) += idle_power4.o obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o procfs-y := proc_powerpc.o obj-$(CONFIG_PROC_FS) += $(procfs-y) +obj-$(CONFIG_PARAVIRT_SPINLOCKS) += paravirt.o rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y) obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index f669323..46632e4 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -128,3 +128,11 @@ config HV_PERF_CTRS systems. 24x7 is available on Power 8 systems. If unsure, select Y. + +config PARAVIRT_SPINLOCKS + bool "Paravirtialization support for qspinlock" + depends on PPC_SPLPAR && QUEUED_SPINLOCKS + default y + help + If platform supports virtualization, for example PowerVM, this option + can let guest have a better performace. -- 1.9.1
Peter Zijlstra
2016-May-26 16:47 UTC
[PATCH v3 5/6] pv-qspinlock: use cmpxchg_release in __pv_queued_spin_unlock
On Wed, May 25, 2016 at 04:18:08PM +0800, Pan Xinhui wrote:> cmpxchg_release is light-wight than cmpxchg, we can gain a better > performace then. On some arch like ppc, barrier impact the performace > too much. > > Suggested-by: Boqun Feng <boqun.feng at gmail.com> > Signed-off-by: Pan Xinhui <xinhui.pan at linux.vnet.ibm.com> > --- > kernel/locking/qspinlock_paravirt.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h > index a5b1248..2bbffe4 100644 > --- a/kernel/locking/qspinlock_paravirt.h > +++ b/kernel/locking/qspinlock_paravirt.h > @@ -614,7 +614,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) > * unhash. Otherwise it would be possible to have multiple @lock > * entries, which would be BAD. > */ > - locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0); > + locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0); > if (likely(locked == _Q_LOCKED_VAL)) > return;This patch fails to explain _why_ it can be relaxed. And seeing how this cmpxchg() can actually unlock the lock, I don't see how this can possibly be correct. Maybe cmpxchg_release(), but relaxed seems very wrong.
Peter Zijlstra
2016-May-26 16:50 UTC
[PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention
On Wed, May 25, 2016 at 04:18:03PM +0800, Pan Xinhui wrote:> _____test________________spinlcok______________pv-qspinlcok_____ > |futex hash | 556370 ops | 629634 ops | > |futex lock-pi | 362 ops | 367 ops | > > scheduler test: > Test how many loops of schedule() can finish within 10 seconds on all cpus. > > _____test________________spinlcok______________pv-qspinlcok_____ > |schedule() loops| 322811921 | 311449290 | > > kernel compiling test: > build a linux kernel image to see how long it took > > _____test________________spinlcok______________pv-qspinlcok_____ > | compiling takes| 22m | 22m |s/spinlcok/spinlock/ Is 'spinlcok' the current test-and-set lock? And what about regular qspinlock, in case of !SHARED_PROCESSOR?
Peter Zijlstra
2016-May-26 16:54 UTC
[PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention
On Wed, May 25, 2016 at 04:18:03PM +0800, Pan Xinhui wrote:> I do several tests on pseries IBM,8408-E8E with 32cpus, 64GB memory. > benchmark test results are below.> _____test________________spinlcok______________pv-qspinlcok_____ > | compiling takes| 22m | 22m |How can a kernel build take 22 minutes with 32 CPUs? That's far too long. My 40 CPU IVB-EP takes all of 50 seconds to build an x86_64-defconfig from scratch.
xinhui
2016-May-27 10:52 UTC
[PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention
On 2016?05?27? 00:50, Peter Zijlstra wrote:> On Wed, May 25, 2016 at 04:18:03PM +0800, Pan Xinhui wrote: > >> _____test________________spinlcok______________pv-qspinlcok_____ >> |futex hash | 556370 ops | 629634 ops | >> |futex lock-pi | 362 ops | 367 ops | >> >> scheduler test: >> Test how many loops of schedule() can finish within 10 seconds on all cpus. >> >> _____test________________spinlcok______________pv-qspinlcok_____ >> |schedule() loops| 322811921 | 311449290 | >> >> kernel compiling test: >> build a linux kernel image to see how long it took >> >> _____test________________spinlcok______________pv-qspinlcok_____ >> | compiling takes| 22m | 22m | > > > s/spinlcok/spinlock/ >Oh, foolish mistake...sorry> Is 'spinlcok' the current test-and-set lock? >Yes. I will describe it in a clear way in the next patchset.> And what about regular qspinlock, in case of !SHARED_PROCESSOR? >You mean the test results on powerNV? yes, I make a kernel build with !SHARED_PROCESSOR. and do perf tests and scheduler tests on same machine(32 cpus). performance is better than current spinlock _____test________________spinlock________________qspinlock_____ |futex hash | 533060 ops | 541513 ops | |futex lock-pi | 357 ops | 356 ops | _____test________________spinlock________________qspinlock_____ |schedule() loops| 337691713 | 361935207 | NOTE: I have updated the scheduler test tools, and the new performance test results show that both pv-spinlock and qspinlock is better than current spinlock. I will also update the test result in my next patchset. thanks xinhui
Apparently Analagous Threads
- [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention
- [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention
- [PATCH v5 0/6] powerPC/pSeries use pv-qpsinlock as the default spinlock implemention
- [PATCH v5 0/6] powerPC/pSeries use pv-qpsinlock as the default spinlock implemention
- [PATCH v5 0/6] powerPC/pSeries use pv-qpsinlock as the default spinlock implemention