Pan Xinhui
2016-May-25  08:18 UTC
[PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention
change from v2: __spin_yeild_cpu() will yield slices to lpar if target cpu is running. remove unnecessary rmb() in __spin_yield/wake_cpu. __pv_wait() will check the *ptr == val. some commit message change change fome v1: separate into 6 pathes from one patch some minor code changes. I do several tests on pseries IBM,8408-E8E with 32cpus, 64GB memory. benchmark test results are below. 2 perf tests: perf bench futex hash perf bench futex lock-pi _____test________________spinlcok______________pv-qspinlcok_____ |futex hash | 556370 ops | 629634 ops | |futex lock-pi | 362 ops | 367 ops | scheduler test: Test how many loops of schedule() can finish within 10 seconds on all cpus. _____test________________spinlcok______________pv-qspinlcok_____ |schedule() loops| 322811921 | 311449290 | kernel compiling test: build a linux kernel image to see how long it took _____test________________spinlcok______________pv-qspinlcok_____ | compiling takes| 22m | 22m | Pan Xinhui (6): qspinlock: powerpc support qspinlock powerpc: pseries/Kconfig: Add qspinlock build config powerpc: lib/locks.c: Add cpu yield/wake helper function pv-qspinlock: powerpc support pv-qspinlock pv-qspinlock: use cmpxchg_release in __pv_queued_spin_unlock powerpc: pseries: Add pv-qspinlock build config/make arch/powerpc/include/asm/qspinlock.h | 39 +++++++++++++++++++ arch/powerpc/include/asm/qspinlock_paravirt.h | 38 ++++++++++++++++++ .../powerpc/include/asm/qspinlock_paravirt_types.h | 13 +++++++ arch/powerpc/include/asm/spinlock.h | 31 +++++++++------ arch/powerpc/include/asm/spinlock_types.h | 4 ++ arch/powerpc/kernel/Makefile | 1 + arch/powerpc/kernel/paravirt.c | 45 ++++++++++++++++++++++ arch/powerpc/lib/locks.c | 37 ++++++++++++++++++ arch/powerpc/platforms/pseries/Kconfig | 9 +++++ arch/powerpc/platforms/pseries/setup.c | 5 +++ kernel/locking/qspinlock_paravirt.h | 2 +- 11 files changed, 211 insertions(+), 13 deletions(-) create mode 100644 arch/powerpc/include/asm/qspinlock.h create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt.h create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt_types.h create mode 100644 arch/powerpc/kernel/paravirt.c -- 1.9.1
Base code to enable qspinlock on powerpc. this patch add some #ifdef
here and there. Although there is no paravirt related code, we can
successfully build a qspinlock kernel after apply this patch.
Signed-off-by: Pan Xinhui <xinhui.pan at linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/qspinlock.h      | 22 ++++++++++++++++++++++
 arch/powerpc/include/asm/spinlock.h       | 27 +++++++++++++++------------
 arch/powerpc/include/asm/spinlock_types.h |  4 ++++
 arch/powerpc/lib/locks.c                  |  4 ++++
 4 files changed, 45 insertions(+), 12 deletions(-)
 create mode 100644 arch/powerpc/include/asm/qspinlock.h
diff --git a/arch/powerpc/include/asm/qspinlock.h
b/arch/powerpc/include/asm/qspinlock.h
new file mode 100644
index 0000000..5883954
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -0,0 +1,22 @@
+#ifndef _ASM_POWERPC_QSPINLOCK_H
+#define _ASM_POWERPC_QSPINLOCK_H
+
+#include <asm-generic/qspinlock_types.h>
+
+#define SPIN_THRESHOLD (1 << 15)
+#define queued_spin_unlock queued_spin_unlock
+
+static inline void native_queued_spin_unlock(struct qspinlock *lock)
+{
+	/* no load/store can be across the unlock()*/
+	smp_store_release((u8 *)lock, 0);
+}
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+	native_queued_spin_unlock(lock);
+}
+
+#include <asm-generic/qspinlock.h>
+
+#endif /* _ASM_POWERPC_QSPINLOCK_H */
diff --git a/arch/powerpc/include/asm/spinlock.h
b/arch/powerpc/include/asm/spinlock.h
index 523673d..4359ee6 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -52,6 +52,20 @@
 #define SYNC_IO
 #endif
 
+#if defined(CONFIG_PPC_SPLPAR)
+/* We only yield to the hypervisor if we are in shared processor mode */
+#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
+extern void __spin_yield(arch_spinlock_t *lock);
+extern void __rw_yield(arch_rwlock_t *lock);
+#else /* SPLPAR */
+#define __spin_yield(x)	barrier()
+#define __rw_yield(x)	barrier()
+#define SHARED_PROCESSOR	0
+#endif
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
+#else
 static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
 	return lock.slock == 0;
@@ -106,18 +120,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
  * held.  Conveniently, we have a word in the paca that holds this
  * value.
  */
-
-#if defined(CONFIG_PPC_SPLPAR)
-/* We only yield to the hypervisor if we are in shared processor mode */
-#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
-extern void __spin_yield(arch_spinlock_t *lock);
-extern void __rw_yield(arch_rwlock_t *lock);
-#else /* SPLPAR */
-#define __spin_yield(x)	barrier()
-#define __rw_yield(x)	barrier()
-#define SHARED_PROCESSOR	0
-#endif
-
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	CLEAR_IO_SYNC;
@@ -169,6 +171,7 @@ extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
 	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
 #endif
 
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
 /*
  * Read-write spinlocks, allowing multiple readers
  * but only one writer.
diff --git a/arch/powerpc/include/asm/spinlock_types.h
b/arch/powerpc/include/asm/spinlock_types.h
index 2351adc..bd7144e 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -5,11 +5,15 @@
 # error "please don't include this file directly"
 #endif
 
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm-generic/qspinlock_types.h>
+#else
 typedef struct {
 	volatile unsigned int slock;
 } arch_spinlock_t;
 
 #define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+#endif
 
 typedef struct {
 	volatile signed int lock;
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index f7deebd..a9ebd71 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -23,6 +23,7 @@
 #include <asm/hvcall.h>
 #include <asm/smp.h>
 
+#ifndef CONFIG_QUEUED_SPINLOCKS
 void __spin_yield(arch_spinlock_t *lock)
 {
 	unsigned int lock_value, holder_cpu, yield_count;
@@ -42,6 +43,7 @@ void __spin_yield(arch_spinlock_t *lock)
 		get_hard_smp_processor_id(holder_cpu), yield_count);
 }
 EXPORT_SYMBOL_GPL(__spin_yield);
+#endif
 
 /*
  * Waiting for a read lock or a write lock on a rwlock...
@@ -69,6 +71,7 @@ void __rw_yield(arch_rwlock_t *rw)
 }
 #endif
 
+#ifndef CONFIG_QUEUED_SPINLOCKS
 void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	smp_mb();
@@ -84,3 +87,4 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
 }
 
 EXPORT_SYMBOL(arch_spin_unlock_wait);
+#endif
-- 
1.9.1
Pan Xinhui
2016-May-25  08:18 UTC
[PATCH v3 2/6] powerpc: pseries/Kconfig: Add qspinlock build config
pseries will use qspinlock by default. Signed-off-by: Pan Xinhui <xinhui.pan at linux.vnet.ibm.com> --- arch/powerpc/platforms/pseries/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index bec90fb..f669323 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -21,6 +21,7 @@ config PPC_PSERIES select HOTPLUG_CPU if SMP select ARCH_RANDOM select PPC_DOORBELL + select ARCH_USE_QUEUED_SPINLOCKS default y config PPC_SPLPAR -- 1.9.1
Pan Xinhui
2016-May-25  08:18 UTC
[PATCH v3 3/6] powerpc: lib/locks.c: Add cpu yield/wake helper function
pv-qspinlock core has pv_wait/pv_kick which will give a better
performace by yielding and kicking cpu at some cases.
lets support them by adding two corresponding helper functions.
Signed-off-by: Pan Xinhui <xinhui.pan at linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/spinlock.h |  4 ++++
 arch/powerpc/lib/locks.c            | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)
diff --git a/arch/powerpc/include/asm/spinlock.h
b/arch/powerpc/include/asm/spinlock.h
index 4359ee6..3b65372 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -56,9 +56,13 @@
 /* We only yield to the hypervisor if we are in shared processor mode */
 #define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
 extern void __spin_yield(arch_spinlock_t *lock);
+extern void __spin_yield_cpu(int cpu);
+extern void __spin_wake_cpu(int cpu);
 extern void __rw_yield(arch_rwlock_t *lock);
 #else /* SPLPAR */
 #define __spin_yield(x)	barrier()
+#define __spin_yield_cpu(x) barrier()
+#define __spin_wake_cpu(x) barrier()
 #define __rw_yield(x)	barrier()
 #define SHARED_PROCESSOR	0
 #endif
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index a9ebd71..3a58834 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -23,6 +23,39 @@
 #include <asm/hvcall.h>
 #include <asm/smp.h>
 
+void __spin_yield_cpu(int cpu)
+{
+	unsigned int holder_cpu = cpu, yield_count;
+
+	if (cpu == -1) {
+		plpar_hcall_norets(H_CONFER, -1, 0);
+		return;
+	}
+	BUG_ON(holder_cpu >= nr_cpu_ids);
+	yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
+	if ((yield_count & 1) == 0) {
+		/* if target cpu is running, confer slices to lpar*/
+		plpar_hcall_norets(H_CONFER, -1, 0);
+		return;
+	}
+	plpar_hcall_norets(H_CONFER,
+		get_hard_smp_processor_id(holder_cpu), yield_count);
+}
+EXPORT_SYMBOL_GPL(__spin_yield_cpu);
+
+void __spin_wake_cpu(int cpu)
+{
+	unsigned int holder_cpu = cpu, yield_count;
+
+	BUG_ON(holder_cpu >= nr_cpu_ids);
+	yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
+	if ((yield_count & 1) == 0)
+		return;		/* virtual cpu is currently running */
+	plpar_hcall_norets(H_PROD,
+		get_hard_smp_processor_id(holder_cpu));
+}
+EXPORT_SYMBOL_GPL(__spin_wake_cpu);
+
 #ifndef CONFIG_QUEUED_SPINLOCKS
 void __spin_yield(arch_spinlock_t *lock)
 {
-- 
1.9.1
Pan Xinhui
2016-May-25  08:18 UTC
[PATCH v3 4/6] pv-qspinlock: powerpc support pv-qspinlock
As we need let pv-qspinlock-kernel run on all environment which might
have no powervm, we should runtime choose which qspinlock version to
use.
The default pv-qspinlock use native version. pv_lock initialization
should be done in bootstage with irq disabled. And if there is PHYP,
restore pv_lock_ops callbacks to pv version.
Signed-off-by: Pan Xinhui <xinhui.pan at linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/qspinlock.h               | 17 ++++++++
 arch/powerpc/include/asm/qspinlock_paravirt.h      | 38 ++++++++++++++++++
 .../powerpc/include/asm/qspinlock_paravirt_types.h | 13 +++++++
 arch/powerpc/kernel/paravirt.c                     | 45 ++++++++++++++++++++++
 arch/powerpc/platforms/pseries/setup.c             |  5 +++
 5 files changed, 118 insertions(+)
 create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt.h
 create mode 100644 arch/powerpc/include/asm/qspinlock_paravirt_types.h
 create mode 100644 arch/powerpc/kernel/paravirt.c
diff --git a/arch/powerpc/include/asm/qspinlock.h
b/arch/powerpc/include/asm/qspinlock.h
index 5883954..4728f6e 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -12,10 +12,27 @@ static inline void native_queued_spin_unlock(struct
qspinlock *lock)
 	smp_store_release((u8 *)lock, 0);
 }
 
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+
+#define __ARCH_NEED_PV_HASH_LOOKUP
+
+#include <asm/qspinlock_paravirt.h>
+
+static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+{
+	pv_queued_spin_lock(lock, val);
+}
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+	pv_queued_spin_unlock(lock);
+}
+#else
 static inline void queued_spin_unlock(struct qspinlock *lock)
 {
 	native_queued_spin_unlock(lock);
 }
+#endif
 
 #include <asm-generic/qspinlock.h>
 
diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h
b/arch/powerpc/include/asm/qspinlock_paravirt.h
new file mode 100644
index 0000000..cd17a79
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock_paravirt.h
@@ -0,0 +1,38 @@
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
+#error "do not include this file"
+#endif
+
+#ifndef _ASM_QSPINLOCK_PARAVIRT_H
+#define _ASM_QSPINLOCK_PARAVIRT_H
+
+#include  <asm/qspinlock_paravirt_types.h>
+
+extern void pv_lock_init(void);
+extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_init_lock_hash(void);
+extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_queued_spin_unlock(struct qspinlock *lock);
+
+static inline void pv_queued_spin_lock(struct qspinlock *lock, u32 val)
+{
+	CLEAR_IO_SYNC;
+	pv_lock_op.lock(lock, val);
+}
+
+static inline void pv_queued_spin_unlock(struct qspinlock *lock)
+{
+	SYNC_IO;
+	pv_lock_op.unlock(lock);
+}
+
+static inline void pv_wait(u8 *ptr, u8 val, int lockcpu)
+{
+	pv_lock_op.wait(ptr, val, lockcpu);
+}
+
+static inline void pv_kick(int cpu)
+{
+	pv_lock_op.kick(cpu);
+}
+
+#endif
diff --git a/arch/powerpc/include/asm/qspinlock_paravirt_types.h
b/arch/powerpc/include/asm/qspinlock_paravirt_types.h
new file mode 100644
index 0000000..e1fdeb0
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock_paravirt_types.h
@@ -0,0 +1,13 @@
+#ifndef _ASM_QSPINLOCK_PARAVIRT_TYPES_H
+#define _ASM_QSPINLOCK_PARAVIRT_TYPES_H
+
+struct pv_lock_ops {
+	void (*lock)(struct qspinlock *lock, u32 val);
+	void (*unlock)(struct qspinlock *lock);
+	void (*wait)(u8 *ptr, u8 val, int cpu);
+	void (*kick)(int cpu);
+};
+
+extern struct pv_lock_ops pv_lock_op;
+
+#endif
diff --git a/arch/powerpc/kernel/paravirt.c b/arch/powerpc/kernel/paravirt.c
new file mode 100644
index 0000000..4f19f7e
--- /dev/null
+++ b/arch/powerpc/kernel/paravirt.c
@@ -0,0 +1,45 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/spinlock.h>
+
+static void __native_queued_spin_unlock(struct qspinlock *lock)
+{
+	native_queued_spin_unlock(lock);
+}
+
+static void __pv_wait(u8 *ptr, u8 val, int cpu)
+{
+	HMT_low();
+	if (READ_ONCE(*ptr) == val)
+		__spin_yield_cpu(cpu);
+	HMT_medium();
+}
+
+static void __pv_kick(int cpu)
+{
+	__spin_wake_cpu(cpu);
+}
+
+struct pv_lock_ops pv_lock_op = {
+	.lock = native_queued_spin_lock_slowpath,
+	.unlock = __native_queued_spin_unlock,
+	.wait = NULL,
+	.kick = NULL,
+};
+EXPORT_SYMBOL(pv_lock_op);
+
+void __init pv_lock_init(void)
+{
+	if (SHARED_PROCESSOR) {
+		__pv_init_lock_hash();
+		pv_lock_op.lock = __pv_queued_spin_lock_slowpath;
+		pv_lock_op.unlock = __pv_queued_spin_unlock;
+		pv_lock_op.wait = __pv_wait;
+		pv_lock_op.kick = __pv_kick;
+	}
+}
diff --git a/arch/powerpc/platforms/pseries/setup.c
b/arch/powerpc/platforms/pseries/setup.c
index 6e944fc..c9f056e 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -547,6 +547,11 @@ static void __init pSeries_setup_arch(void)
 				"%ld\n", rc);
 		}
 	}
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+	pv_lock_init();
+#endif
+
 }
 
 static int __init pSeries_init_panel(void)
-- 
1.9.1
Pan Xinhui
2016-May-25  08:18 UTC
[PATCH v3 5/6] pv-qspinlock: use cmpxchg_release in __pv_queued_spin_unlock
cmpxchg_release is light-wight than cmpxchg, we can gain a better performace then. On some arch like ppc, barrier impact the performace too much. Suggested-by: Boqun Feng <boqun.feng at gmail.com> Signed-off-by: Pan Xinhui <xinhui.pan at linux.vnet.ibm.com> --- kernel/locking/qspinlock_paravirt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index a5b1248..2bbffe4 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -614,7 +614,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) * unhash. Otherwise it would be possible to have multiple @lock * entries, which would be BAD. */ - locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0); + locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0); if (likely(locked == _Q_LOCKED_VAL)) return; -- 1.9.1
Pan Xinhui
2016-May-25  08:18 UTC
[PATCH v3 6/6] powerpc: pseries: Add pv-qspinlock build config/make
pseries has PowerVM support, the default option is Y.
Signed-off-by: Pan Xinhui <xinhui.pan at linux.vnet.ibm.com>
---
 arch/powerpc/kernel/Makefile           | 1 +
 arch/powerpc/platforms/pseries/Kconfig | 8 ++++++++
 2 files changed, 9 insertions(+)
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2da380f..ae7c2f1 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o
 obj-$(CONFIG_PPC_P7_NAP)	+= idle_power7.o
 procfs-y			:= proc_powerpc.o
 obj-$(CONFIG_PROC_FS)		+= $(procfs-y)
+obj-$(CONFIG_PARAVIRT_SPINLOCKS)	+= paravirt.o
 rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI)	:= rtas_pci.o
 obj-$(CONFIG_PPC_RTAS)		+= rtas.o rtas-rtc.o $(rtaspci-y-y)
 obj-$(CONFIG_PPC_RTAS_DAEMON)	+= rtasd.o
diff --git a/arch/powerpc/platforms/pseries/Kconfig
b/arch/powerpc/platforms/pseries/Kconfig
index f669323..46632e4 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -128,3 +128,11 @@ config HV_PERF_CTRS
 	  systems. 24x7 is available on Power 8 systems.
 
           If unsure, select Y.
+
+config PARAVIRT_SPINLOCKS
+	bool "Paravirtialization support for qspinlock"
+	depends on PPC_SPLPAR && QUEUED_SPINLOCKS
+	default y
+	help
+	  If platform supports virtualization, for example PowerVM, this option
+	  can let guest have a better performace.
-- 
1.9.1
Peter Zijlstra
2016-May-26  16:47 UTC
[PATCH v3 5/6] pv-qspinlock: use cmpxchg_release in __pv_queued_spin_unlock
On Wed, May 25, 2016 at 04:18:08PM +0800, Pan Xinhui wrote:> cmpxchg_release is light-wight than cmpxchg, we can gain a better > performace then. On some arch like ppc, barrier impact the performace > too much. > > Suggested-by: Boqun Feng <boqun.feng at gmail.com> > Signed-off-by: Pan Xinhui <xinhui.pan at linux.vnet.ibm.com> > --- > kernel/locking/qspinlock_paravirt.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h > index a5b1248..2bbffe4 100644 > --- a/kernel/locking/qspinlock_paravirt.h > +++ b/kernel/locking/qspinlock_paravirt.h > @@ -614,7 +614,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) > * unhash. Otherwise it would be possible to have multiple @lock > * entries, which would be BAD. > */ > - locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0); > + locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0); > if (likely(locked == _Q_LOCKED_VAL)) > return;This patch fails to explain _why_ it can be relaxed. And seeing how this cmpxchg() can actually unlock the lock, I don't see how this can possibly be correct. Maybe cmpxchg_release(), but relaxed seems very wrong.
Peter Zijlstra
2016-May-26  16:50 UTC
[PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention
On Wed, May 25, 2016 at 04:18:03PM +0800, Pan Xinhui wrote:> _____test________________spinlcok______________pv-qspinlcok_____ > |futex hash | 556370 ops | 629634 ops | > |futex lock-pi | 362 ops | 367 ops | > > scheduler test: > Test how many loops of schedule() can finish within 10 seconds on all cpus. > > _____test________________spinlcok______________pv-qspinlcok_____ > |schedule() loops| 322811921 | 311449290 | > > kernel compiling test: > build a linux kernel image to see how long it took > > _____test________________spinlcok______________pv-qspinlcok_____ > | compiling takes| 22m | 22m |s/spinlcok/spinlock/ Is 'spinlcok' the current test-and-set lock? And what about regular qspinlock, in case of !SHARED_PROCESSOR?
Peter Zijlstra
2016-May-26  16:54 UTC
[PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention
On Wed, May 25, 2016 at 04:18:03PM +0800, Pan Xinhui wrote:> I do several tests on pseries IBM,8408-E8E with 32cpus, 64GB memory. > benchmark test results are below.> _____test________________spinlcok______________pv-qspinlcok_____ > | compiling takes| 22m | 22m |How can a kernel build take 22 minutes with 32 CPUs? That's far too long. My 40 CPU IVB-EP takes all of 50 seconds to build an x86_64-defconfig from scratch.
xinhui
2016-May-27  10:52 UTC
[PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention
On 2016?05?27? 00:50, Peter Zijlstra wrote:> On Wed, May 25, 2016 at 04:18:03PM +0800, Pan Xinhui wrote: > >> _____test________________spinlcok______________pv-qspinlcok_____ >> |futex hash | 556370 ops | 629634 ops | >> |futex lock-pi | 362 ops | 367 ops | >> >> scheduler test: >> Test how many loops of schedule() can finish within 10 seconds on all cpus. >> >> _____test________________spinlcok______________pv-qspinlcok_____ >> |schedule() loops| 322811921 | 311449290 | >> >> kernel compiling test: >> build a linux kernel image to see how long it took >> >> _____test________________spinlcok______________pv-qspinlcok_____ >> | compiling takes| 22m | 22m | > > > s/spinlcok/spinlock/ >Oh, foolish mistake...sorry> Is 'spinlcok' the current test-and-set lock? >Yes. I will describe it in a clear way in the next patchset.> And what about regular qspinlock, in case of !SHARED_PROCESSOR? >You mean the test results on powerNV? yes, I make a kernel build with !SHARED_PROCESSOR. and do perf tests and scheduler tests on same machine(32 cpus). performance is better than current spinlock _____test________________spinlock________________qspinlock_____ |futex hash | 533060 ops | 541513 ops | |futex lock-pi | 357 ops | 356 ops | _____test________________spinlock________________qspinlock_____ |schedule() loops| 337691713 | 361935207 | NOTE: I have updated the scheduler test tools, and the new performance test results show that both pv-spinlock and qspinlock is better than current spinlock. I will also update the test result in my next patchset. thanks xinhui
Seemingly Similar Threads
- [PATCH v3 0/6] powerpc use pv-qpsinlock as the default spinlock implemention
- [PATCH v5 0/6] powerPC/pSeries use pv-qpsinlock as the default spinlock implemention
- [PATCH v5 0/6] powerPC/pSeries use pv-qpsinlock as the default spinlock implemention
- [PATCH v5 0/6] powerPC/pSeries use pv-qpsinlock as the default spinlock implemention
- [PATCH v5 0/6] powerPC/pSeries use pv-qpsinlock as the default spinlock implemention