thr3ads.net - search: "_qspinlock

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

2014 Feb 26

2

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

...truct qspinlock *lock, int qsval) > +{ > + union arch_qspinlock *qlock = (union arch_qspinlock *)lock; > + u16 old; > + > + /* > + * Fall into the quick spinning code path only if no one is waiting > + * or the lock is available. > + */ > + if (unlikely((qsval != _QSPINLOCK_LOCKED) && > + (qsval != _QSPINLOCK_WAITING))) > + return 0; > + > + old = xchg(&qlock->lock_wait, _QSPINLOCK_WAITING|_QSPINLOCK_LOCKED); > + > + if (old == 0) { > + /* > + * Got the lock, can clear the waiting bit now > + */ > + smp_u8_store_relea...

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

2014 Feb 26

2

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

...truct qspinlock *lock, int qsval) > +{ > + union arch_qspinlock *qlock = (union arch_qspinlock *)lock; > + u16 old; > + > + /* > + * Fall into the quick spinning code path only if no one is waiting > + * or the lock is available. > + */ > + if (unlikely((qsval != _QSPINLOCK_LOCKED) && > + (qsval != _QSPINLOCK_WAITING))) > + return 0; > + > + old = xchg(&qlock->lock_wait, _QSPINLOCK_WAITING|_QSPINLOCK_LOCKED); > + > + if (old == 0) { > + /* > + * Got the lock, can clear the waiting bit now > + */ > + smp_u8_store_relea...

[PATCH v5 1/8] qspinlock: Introducing a 4-byte queue spinlock implementation

2014 Mar 02

1

[PATCH v5 1/8] qspinlock: Introducing a 4-byte queue spinlock implementation

...xchg(&lock->qlcode, my_qcode); > + /* > + * It is possible that we may accidentally steal the lock. If this is > + * the case, we need to either release it if not the head of the queue > + * or get the lock and be done with it. > + */ > + if (unlikely(!(prev_qcode & _QSPINLOCK_LOCKED))) { > + if (prev_qcode == 0) { > + /* > + * Got the lock since it is at the head of the queue > + * Now try to atomically clear the queue code. > + */ > + if (atomic_cmpxchg(&lock->qlcode, my_qcode, > + _QSPINLOCK_LOCKED) == my_qcode) > + goto...

[PATCH v5 1/8] qspinlock: Introducing a 4-byte queue spinlock implementation

2014 Mar 02

1

[PATCH v5 1/8] qspinlock: Introducing a 4-byte queue spinlock implementation

...xchg(&lock->qlcode, my_qcode); > + /* > + * It is possible that we may accidentally steal the lock. If this is > + * the case, we need to either release it if not the head of the queue > + * or get the lock and be done with it. > + */ > + if (unlikely(!(prev_qcode & _QSPINLOCK_LOCKED))) { > + if (prev_qcode == 0) { > + /* > + * Got the lock since it is at the head of the queue > + * Now try to atomically clear the queue code. > + */ > + if (atomic_cmpxchg(&lock->qlcode, my_qcode, > + _QSPINLOCK_LOCKED) == my_qcode) > + goto...

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

2014 Feb 26

0

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

.../qspinlock_types.h @@ -48,7 +48,13 @@ typedef struct qspinlock { atomic_t qlcode; /* Lock + queue code */ } arch_spinlock_t; -#define _QCODE_OFFSET 8 +#if CONFIG_NR_CPUS >= (1 << 14) +# define _Q_MANY_CPUS +# define _QCODE_OFFSET 8 +#else +# define _QCODE_OFFSET 16 +#endif + #define _QSPINLOCK_LOCKED 1U #define _QSPINLOCK_LOCK_MASK 0xff diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index ed5efa7..22a63fa 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -109,8 +109,11 @@ static DEFINE_PER_CPU_ALIGNED(struct qnode_set, qnset) = { {{0}}, 0 };...

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

2014 Feb 27

0

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

.../qspinlock_types.h @@ -48,7 +48,13 @@ typedef struct qspinlock { atomic_t qlcode; /* Lock + queue code */ } arch_spinlock_t; -#define _QCODE_OFFSET 8 +#if CONFIG_NR_CPUS >= (1 << 14) +# define _Q_MANY_CPUS +# define _QCODE_OFFSET 8 +#else +# define _QCODE_OFFSET 16 +#endif + #define _QSPINLOCK_LOCKED 1U #define _QSPINLOCK_LOCK_MASK 0xff diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index ed5efa7..22a63fa 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -109,8 +109,11 @@ static DEFINE_PER_CPU_ALIGNED(struct qnode_set, qnset) = { {{0}}, 0 };...

[PATCH v6 04/11] qspinlock: Optimized code path for 2 contending tasks

2014 Mar 12

0

[PATCH v6 04/11] qspinlock: Optimized code path for 2 contending tasks

...d to go through the + * MCS style locking queuing which has a higher overhead. */ +#define _QSPINLOCK_WAIT_SHIFT 8 /* Waiting bit position */ +#define _QSPINLOCK_WAITING (1 << _QSPINLOCK_WAIT_SHIFT) +/* Masks for lock & wait bits */ +#define _QSPINLOCK_LWMASK (_QSPINLOCK_WAITING | _QSPINLOCK_LOCKED) + #define queue_encode_qcode(cpu, idx) (((cpu) + 1) << 2 | (idx)) +#define queue_get_qcode(lock) (atomic_read(&(lock)->qlcode) >> _QCODE_OFFSET) + +#define queue_spin_trylock_quick queue_spin_trylock_quick +/** + * queue_spin_trylock_quick - quick spinning on the queue spinlock...

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

2014 Feb 27

0

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

...t;> +{ >> + union arch_qspinlock *qlock = (union arch_qspinlock *)lock; >> + u16 old; >> + >> + /* >> + * Fall into the quick spinning code path only if no one is waiting >> + * or the lock is available. >> + */ >> + if (unlikely((qsval != _QSPINLOCK_LOCKED)&& >> + (qsval != _QSPINLOCK_WAITING))) >> + return 0; >> + >> + old = xchg(&qlock->lock_wait, _QSPINLOCK_WAITING|_QSPINLOCK_LOCKED); >> + >> + if (old == 0) { >> + /* >> + * Got the lock, can clear the waiting bit now >&gt...

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

2014 Mar 02

1

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

On 02/26, Waiman Long wrote: > > @@ -144,7 +317,7 @@ static __always_inline int queue_spin_setlock(struct qspinlock *lock) > int qlcode = atomic_read(lock->qlcode); > > if (!(qlcode & _QSPINLOCK_LOCKED) && (atomic_cmpxchg(&lock->qlcode, > - qlcode, qlcode|_QSPINLOCK_LOCKED) == qlcode)) > + qlcode, code|_QSPINLOCK_LOCKED) == qlcode)) Hmm. didn't read the patch, but this change looks like accidental typo... Oleg.

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

2014 Mar 02

1

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

On 02/26, Waiman Long wrote: > > @@ -144,7 +317,7 @@ static __always_inline int queue_spin_setlock(struct qspinlock *lock) > int qlcode = atomic_read(lock->qlcode); > > if (!(qlcode & _QSPINLOCK_LOCKED) && (atomic_cmpxchg(&lock->qlcode, > - qlcode, qlcode|_QSPINLOCK_LOCKED) == qlcode)) > + qlcode, code|_QSPINLOCK_LOCKED) == qlcode)) Hmm. didn't read the patch, but this change looks like accidental typo... Oleg.

[PATCH v6 04/11] qspinlock: Optimized code path for 2 contending tasks

2014 Mar 12

2

[PATCH v6 04/11] qspinlock: Optimized code path for 2 contending tasks

On 03/12/2014 02:54 PM, Waiman Long wrote: > + > + /* > + * Now wait until the lock bit is cleared > + */ > + while (smp_load_acquire(&qlock->qlcode)& _QSPINLOCK_LOCKED) > + arch_mutex_cpu_relax(); > + > + /* > + * Set the lock bit& clear the waiting bit simultaneously > + * It is assumed that there is no lock stealing with this > + * quick path active. > + * > + * A direct memory store of _QSPINLOCK_LOCKED into the > +...

[PATCH v6 04/11] qspinlock: Optimized code path for 2 contending tasks

2014 Mar 12

2

[PATCH v6 04/11] qspinlock: Optimized code path for 2 contending tasks

On 03/12/2014 02:54 PM, Waiman Long wrote: > + > + /* > + * Now wait until the lock bit is cleared > + */ > + while (smp_load_acquire(&qlock->qlcode)& _QSPINLOCK_LOCKED) > + arch_mutex_cpu_relax(); > + > + /* > + * Set the lock bit& clear the waiting bit simultaneously > + * It is assumed that there is no lock stealing with this > + * quick path active. > + * > + * A direct memory store of _QSPINLOCK_LOCKED into the > +...

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

2014 Mar 03

5

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

...ruct mcs_spinlock *decode_tail(u32 code) { int cpu = (code >> _Q_TAIL_CPU_OFFSET) - 1; int idx = (code >> _Q_TAIL_IDX_OFFSET) & _Q_TAIL_IDX_MASK; return per_cpu_ptr(&mcs_nodes[idx], cpu); } #define _QSPINLOCK_PENDING (1U << _Q_PENDING_OFFSET) #define _QSPINLOCK_MASK (_QSPINLOCK_LOCKED | _QSPINLOCK_PENDING) // PENDING - enables the pending bit logic // OPT - removes one atomic op at the cost of making pending a byte // OPT2 - replaces some cmpxchg loops with unconditional atomic ops // // PENDING looks to be a win, even with 2 atomic ops on Intel, and a loss on AMD // OPT...

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

2014 Mar 03

5

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

...ruct mcs_spinlock *decode_tail(u32 code) { int cpu = (code >> _Q_TAIL_CPU_OFFSET) - 1; int idx = (code >> _Q_TAIL_IDX_OFFSET) & _Q_TAIL_IDX_MASK; return per_cpu_ptr(&mcs_nodes[idx], cpu); } #define _QSPINLOCK_PENDING (1U << _Q_PENDING_OFFSET) #define _QSPINLOCK_MASK (_QSPINLOCK_LOCKED | _QSPINLOCK_PENDING) // PENDING - enables the pending bit logic // OPT - removes one atomic op at the cost of making pending a byte // OPT2 - replaces some cmpxchg loops with unconditional atomic ops // // PENDING looks to be a win, even with 2 atomic ops on Intel, and a loss on AMD // OPT...

[PATCH v5 0/8] qspinlock: a 4-byte queue spinlock with PV support

2014 Feb 27

14

[PATCH v5 0/8] qspinlock: a 4-byte queue spinlock with PV support

v4->v5: - Move the optimized 2-task contending code to the generic file to enable more architectures to use it without code duplication. - Address some of the style-related comments by PeterZ. - Allow the use of unfair queue spinlock in a real para-virtualized execution environment. - Add para-virtualization support to the qspinlock code by ensuring that the lock holder and queue

[PATCH v5 0/8] qspinlock: a 4-byte queue spinlock with PV support

2014 Feb 27

14

[PATCH v5 0/8] qspinlock: a 4-byte queue spinlock with PV support

v4->v5: - Move the optimized 2-task contending code to the generic file to enable more architectures to use it without code duplication. - Address some of the style-related comments by PeterZ. - Allow the use of unfair queue spinlock in a real para-virtualized execution environment. - Add para-virtualization support to the qspinlock code by ensuring that the lock holder and queue

[PATCH v5 1/8] qspinlock: Introducing a 4-byte queue spinlock implementation

2014 Feb 26

0

[PATCH v5 1/8] qspinlock: Introducing a 4-byte queue spinlock implementation

...ck *lock, int qsval); + +/** + * queue_spin_is_locked - is the spinlock locked? + * @lock: Pointer to queue spinlock structure + * Return: 1 if it is locked, 0 otherwise + */ +static __always_inline int queue_spin_is_locked(struct qspinlock *lock) +{ + return atomic_read(&lock->qlcode) & _QSPINLOCK_LOCKED; +} + +/** + * queue_spin_value_unlocked - is the spinlock structure unlocked? + * @lock: queue spinlock structure + * Return: 1 if it is unlocked, 0 otherwise + */ +static __always_inline int queue_spin_value_unlocked(struct qspinlock lock) +{ + return !(atomic_read(&lock.qlcode) & _QSPINL...

[PATCH v5 1/8] qspinlock: Introducing a 4-byte queue spinlock implementation

2014 Feb 27

0

[PATCH v5 1/8] qspinlock: Introducing a 4-byte queue spinlock implementation

...ck *lock, int qsval); + +/** + * queue_spin_is_locked - is the spinlock locked? + * @lock: Pointer to queue spinlock structure + * Return: 1 if it is locked, 0 otherwise + */ +static __always_inline int queue_spin_is_locked(struct qspinlock *lock) +{ + return atomic_read(&lock->qlcode) & _QSPINLOCK_LOCKED; +} + +/** + * queue_spin_value_unlocked - is the spinlock structure unlocked? + * @lock: queue spinlock structure + * Return: 1 if it is unlocked, 0 otherwise + */ +static __always_inline int queue_spin_value_unlocked(struct qspinlock lock) +{ + return !(atomic_read(&lock.qlcode) & _QSPINL...

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

2014 Feb 28

5

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

On Thu, Feb 27, 2014 at 03:42:19PM -0500, Waiman Long wrote: > >>+ old = xchg(&qlock->lock_wait, _QSPINLOCK_WAITING|_QSPINLOCK_LOCKED); > >>+ > >>+ if (old == 0) { > >>+ /* > >>+ * Got the lock, can clear the waiting bit now > >>+ */ > >>+ smp_u8_store_release(&qlock->wait, 0); > > > >So we just did an atomic op, and now you're trying to optimize t...

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

2014 Feb 28

5

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

On Thu, Feb 27, 2014 at 03:42:19PM -0500, Waiman Long wrote: > >>+ old = xchg(&qlock->lock_wait, _QSPINLOCK_WAITING|_QSPINLOCK_LOCKED); > >>+ > >>+ if (old == 0) { > >>+ /* > >>+ * Got the lock, can clear the waiting bit now > >>+ */ > >>+ smp_u8_store_release(&qlock->wait, 0); > > > >So we just did an atomic op, and now you're trying to optimize t...

search for: _qspinlock_locked