thr3ads.net - search: "_q_pending

[PATCH v10 07/19] qspinlock: Use a simple write to grab the lock, if applicable

2014 May 08

1

[PATCH v10 07/19] qspinlock: Use a simple write to grab the lock, if applicable

On Wed, May 07, 2014 at 11:01:35AM -0400, Waiman Long wrote: > @@ -94,23 +94,29 @@ static inline struct mcs_spinlock *decode_tail(u32 tail) > * can allow better optimization of the lock acquisition for the pending > * bit holder. > */ > -#if _Q_PENDING_BITS == 8 > - > struct __qspinlock { > union { > atomic_t val; > - struct { > #ifdef __LITTLE_ENDIAN > + u8 locked; > + struct { > u16 locked_pending; > u16 tail; > + }; > #else > + struct { > u16 tail; > u16 locked_pending; >...

[PATCH v10 07/19] qspinlock: Use a simple write to grab the lock, if applicable

2014 May 08

1

[PATCH v10 07/19] qspinlock: Use a simple write to grab the lock, if applicable

On Wed, May 07, 2014 at 11:01:35AM -0400, Waiman Long wrote: > @@ -94,23 +94,29 @@ static inline struct mcs_spinlock *decode_tail(u32 tail) > * can allow better optimization of the lock acquisition for the pending > * bit holder. > */ > -#if _Q_PENDING_BITS == 8 > - > struct __qspinlock { > union { > atomic_t val; > - struct { > #ifdef __LITTLE_ENDIAN > + u8 locked; > + struct { > u16 locked_pending; > u16 tail; > + }; > #else > + struct { > u16 tail; > u16 locked_pending; >...

[PATCH 05/11] qspinlock: Optimize for smaller NR_CPUS

2014 Jun 15

0

[PATCH 05/11] qspinlock: Optimize for smaller NR_CPUS

...* + * When NR_CPUS >= 16K * 0- 7: locked byte * 8: pending * 9-10: tail index @@ -50,7 +58,11 @@ typedef struct qspinlock { #define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) #define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#if CONFIG_NR_CPUS < (1U << 14) +#define _Q_PENDING_BITS 8 +#else #define _Q_PENDING_BITS 1 +#endif #define _Q_PENDING_MASK _Q_SET_MASK(PENDING) #define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) @@ -61,6 +73,7 @@ typedef struct qspinlock { #define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET) #define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAI...

[PATCH v9 05/19] qspinlock: Optimize for smaller NR_CPUS

2014 Apr 17

0

[PATCH v9 05/19] qspinlock: Optimize for smaller NR_CPUS

...* + * When NR_CPUS >= 16K * 0- 7: locked byte * 8: pending * 9-10: tail index @@ -50,7 +58,11 @@ typedef struct qspinlock { #define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) #define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#if CONFIG_NR_CPUS < (1U << 14) +#define _Q_PENDING_BITS 8 +#else #define _Q_PENDING_BITS 1 +#endif #define _Q_PENDING_MASK _Q_SET_MASK(PENDING) #define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) @@ -61,6 +73,7 @@ typedef struct qspinlock { #define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET) #define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAI...

[PATCH v9 05/19] qspinlock: Optimize for smaller NR_CPUS

2014 Apr 17

1

[PATCH v9 05/19] qspinlock: Optimize for smaller NR_CPUS

...* instead of its node; whereby avoiding the need to carry a node from lock to > * unlock, and preserving API. > + * > + * N.B. The current implementation only supports architectures that allow > + * atomic operations on smaller 8-bit and 16-bit data types. > */ Only for the _Q_PENDING_BITS == 8 case, the other case should still be fine.

[PATCH v9 05/19] qspinlock: Optimize for smaller NR_CPUS

2014 Apr 17

1

[PATCH v9 05/19] qspinlock: Optimize for smaller NR_CPUS

...* instead of its node; whereby avoiding the need to carry a node from lock to > * unlock, and preserving API. > + * > + * N.B. The current implementation only supports architectures that allow > + * atomic operations on smaller 8-bit and 16-bit data types. > */ Only for the _Q_PENDING_BITS == 8 case, the other case should still be fine.

[PATCH 05/11] qspinlock: Optimize for smaller NR_CPUS

2014 Jun 18

1

[PATCH 05/11] qspinlock: Optimize for smaller NR_CPUS

...locked byte > * 8: pending > * 9-10: tail index > @@ -50,7 +58,11 @@ typedef struct qspinlock { > #define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) > > #define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) > +#if CONFIG_NR_CPUS < (1U << 14) > +#define _Q_PENDING_BITS 8 > +#else > #define _Q_PENDING_BITS 1 > +#endif > #define _Q_PENDING_MASK _Q_SET_MASK(PENDING) > > #define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) > @@ -61,6 +73,7 @@ typedef struct qspinlock { > #define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET) &...

[PATCH 05/11] qspinlock: Optimize for smaller NR_CPUS

2014 Jun 18

1

[PATCH 05/11] qspinlock: Optimize for smaller NR_CPUS

...locked byte > * 8: pending > * 9-10: tail index > @@ -50,7 +58,11 @@ typedef struct qspinlock { > #define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) > > #define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) > +#if CONFIG_NR_CPUS < (1U << 14) > +#define _Q_PENDING_BITS 8 > +#else > #define _Q_PENDING_BITS 1 > +#endif > #define _Q_PENDING_MASK _Q_SET_MASK(PENDING) > > #define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) > @@ -61,6 +73,7 @@ typedef struct qspinlock { > #define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET) &...

[PATCH 07/11] qspinlock: Use a simple write to grab the lock, if applicable

2014 Jun 15

0

[PATCH 07/11] qspinlock: Use a simple write to grab the lock, if applicable

...nline struct mcs_spinlock *decod * By using the whole 2nd least significant byte for the pending bit, we * can allow better optimization of the lock acquisition for the pending * bit holder. + * + * This internal structure is also used by the set_locked function which + * is not restricted to _Q_PENDING_BITS == 8. */ -#if _Q_PENDING_BITS == 8 - struct __qspinlock { union { atomic_t val; - struct { #ifdef __LITTLE_ENDIAN + u8 locked; + struct { u16 locked_pending; u16 tail; + }; #else + struct { u16 tail; u16 locked_pending; -#endif }; + struct { + u8 reserved[3]; +...

[PATCH 07/11] qspinlock: Use a simple write to grab the lock, if applicable

2014 Jun 18

1

[PATCH 07/11] qspinlock: Use a simple write to grab the lock, if applicable

...od > * By using the whole 2nd least significant byte for the pending bit, we > * can allow better optimization of the lock acquisition for the pending > * bit holder. > + * > + * This internal structure is also used by the set_locked function which > + * is not restricted to _Q_PENDING_BITS == 8. > */ > -#if _Q_PENDING_BITS == 8 > - > struct __qspinlock { > union { > atomic_t val; > - struct { > #ifdef __LITTLE_ENDIAN > + u8 locked; > + struct { > u16 locked_pending; > u16 tail; > + }; > #else > + struct { > u...

[PATCH 07/11] qspinlock: Use a simple write to grab the lock, if applicable

2014 Jun 18

1

[PATCH 07/11] qspinlock: Use a simple write to grab the lock, if applicable

...od > * By using the whole 2nd least significant byte for the pending bit, we > * can allow better optimization of the lock acquisition for the pending > * bit holder. > + * > + * This internal structure is also used by the set_locked function which > + * is not restricted to _Q_PENDING_BITS == 8. > */ > -#if _Q_PENDING_BITS == 8 > - > struct __qspinlock { > union { > atomic_t val; > - struct { > #ifdef __LITTLE_ENDIAN > + u8 locked; > + struct { > u16 locked_pending; > u16 tail; > + }; > #else > + struct { > u...

[PATCH v9 07/19] qspinlock: Use a simple write to grab the lock, if applicable

2014 Apr 17

0

[PATCH v9 07/19] qspinlock: Use a simple write to grab the lock, if applicable

...kernel/locking/qspinlock.c index 497da24..80fe9ee 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -98,23 +98,29 @@ static inline struct mcs_spinlock *decode_tail(u32 tail) * can allow better optimization of the lock acquisition for the pending * bit holder. */ -#if _Q_PENDING_BITS == 8 - struct __qspinlock { union { atomic_t val; - struct { #ifdef __LITTLE_ENDIAN + u8 locked; + struct { u16 locked_pending; u16 tail; + }; #else + struct { u16 tail; u16 locked_pending; -#endif }; + struct { + u8 reserved[3]; + u8 locked; + }; +#endif };...

[PATCH v10 07/19] qspinlock: Use a simple write to grab the lock, if applicable

2014 May 07

0

[PATCH v10 07/19] qspinlock: Use a simple write to grab the lock, if applicable

...kernel/locking/qspinlock.c index e734acb..0ee1a23 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -94,23 +94,29 @@ static inline struct mcs_spinlock *decode_tail(u32 tail) * can allow better optimization of the lock acquisition for the pending * bit holder. */ -#if _Q_PENDING_BITS == 8 - struct __qspinlock { union { atomic_t val; - struct { #ifdef __LITTLE_ENDIAN + u8 locked; + struct { u16 locked_pending; u16 tail; + }; #else + struct { u16 tail; u16 locked_pending; -#endif }; + struct { + u8 reserved[3]; + u8 locked; + }; +#endif };...

[PATCH 00/11] qspinlock with paravirt support

2014 Jun 15

28

[PATCH 00/11] qspinlock with paravirt support

Since Waiman seems incapable of doing simple things; here's my take on the paravirt crap. The first few patches are taken from Waiman's latest series, but the virt support is completely new. Its primary aim is to not mess up the native code. I've not stress tested it, but the virt and paravirt (kvm) cases boot on simple smp guests. I've not done Xen, but the patch should be

[PATCH 00/11] qspinlock with paravirt support

2014 Jun 15

28

[PATCH 00/11] qspinlock with paravirt support

Since Waiman seems incapable of doing simple things; here's my take on the paravirt crap. The first few patches are taken from Waiman's latest series, but the virt support is completely new. Its primary aim is to not mess up the native code. I've not stress tested it, but the virt and paravirt (kvm) cases boot on simple smp guests. I've not done Xen, but the patch should be

[PATCH 03/11] qspinlock: Add pending bit

2014 Jun 15

0

[PATCH 03/11] qspinlock: Add pending bit

...<< _Q_ ## type ## _OFFSET) @@ -48,7 +49,11 @@ typedef struct qspinlock { #define _Q_LOCKED_BITS 8 #define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) -#define _Q_TAIL_IDX_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#define _Q_PENDING_BITS 1 +#define _Q_PENDING_MASK _Q_SET_MASK(PENDING) + +#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) #define _Q_TAIL_IDX_BITS 2 #define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX) @@ -57,5 +62,6 @@ typedef struct qspinlock { #define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) #define...

[PATCH v9 03/19] qspinlock: Add pending bit

2014 Apr 17

0

[PATCH v9 03/19] qspinlock: Add pending bit

...<< _Q_ ## type ## _OFFSET) @@ -48,7 +49,11 @@ typedef struct qspinlock { #define _Q_LOCKED_BITS 8 #define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) -#define _Q_TAIL_IDX_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#define _Q_PENDING_BITS 1 +#define _Q_PENDING_MASK _Q_SET_MASK(PENDING) + +#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) #define _Q_TAIL_IDX_BITS 2 #define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX) @@ -57,5 +62,6 @@ typedef struct qspinlock { #define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) #define...

[PATCH v10 03/19] qspinlock: Add pending bit

2014 May 07

0

[PATCH v10 03/19] qspinlock: Add pending bit

...<< _Q_ ## type ## _OFFSET) @@ -48,7 +49,11 @@ typedef struct qspinlock { #define _Q_LOCKED_BITS 8 #define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) -#define _Q_TAIL_IDX_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#define _Q_PENDING_BITS 1 +#define _Q_PENDING_MASK _Q_SET_MASK(PENDING) + +#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) #define _Q_TAIL_IDX_BITS 2 #define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX) @@ -57,5 +62,6 @@ typedef struct qspinlock { #define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) #define...

[PATCH 3/9] qspinlock: Add pending bit

2015 Mar 16

0

[PATCH 3/9] qspinlock: Add pending bit

...<< _Q_ ## type ## _OFFSET) @@ -45,7 +46,11 @@ typedef struct qspinlock { #define _Q_LOCKED_BITS 8 #define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) -#define _Q_TAIL_IDX_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#define _Q_PENDING_BITS 1 +#define _Q_PENDING_MASK _Q_SET_MASK(PENDING) + +#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) #define _Q_TAIL_IDX_BITS 2 #define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX) @@ -54,5 +59,6 @@ typedef struct qspinlock { #define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) #define...

[PATCH v9 06/19] qspinlock: prolong the stay in the pending bit path

2014 Apr 17

2

[PATCH v9 06/19] qspinlock: prolong the stay in the pending bit path

...will be cleared. > + * If that is no change, we return and be queued. > + */ > + if (!retry) > + return 0; > + retry--; > + cpu_relax(); > + cpu_relax(); > + *pval = val = atomic_read(&lock->val); > + continue; Since you gave up optimizing the _Q_PENDING_BITS != 8 case why bother with this? The switch from _Q_PENDING_VAL to _Q_LOCKED_VAL is atomic by virtue of your (endian challenged) clear_pending_set_locked(). > + } else if ((val & _Q_LOCKED_PENDING_MASK) == _Q_PENDING_VAL) { > + /* > + * Pending bit is set, but not the lock bit. &...

search for: _q_pending_bits