thr3ads.net - search: "define_per_cpu

[PATCH v5 1/8] qspinlock: Introducing a 4-byte queue spinlock implementation

2014 Feb 26

1

[PATCH v5 1/8] qspinlock: Introducing a 4-byte queue spinlock implementation

...wait; /* Waiting flag */ > + struct qnode *next; /* Next queue node addr */ > +}; > + > +struct qnode_set { > + struct qnode nodes[MAX_QNODES]; > + int node_idx; /* Current node to use */ > +}; > + > +/* > + * Per-CPU queue node structures > + */ > +static DEFINE_PER_CPU_ALIGNED(struct qnode_set, qnset) = { {{0}}, 0 }; So I've not yet wrapped my head around any of this; and I see a later patch adds some paravirt gunk to this, but it does blow you can't keep it a single cacheline for the sane case.

[PATCH v5 1/8] qspinlock: Introducing a 4-byte queue spinlock implementation

2014 Feb 26

1

[PATCH v5 1/8] qspinlock: Introducing a 4-byte queue spinlock implementation

...wait; /* Waiting flag */ > + struct qnode *next; /* Next queue node addr */ > +}; > + > +struct qnode_set { > + struct qnode nodes[MAX_QNODES]; > + int node_idx; /* Current node to use */ > +}; > + > +/* > + * Per-CPU queue node structures > + */ > +static DEFINE_PER_CPU_ALIGNED(struct qnode_set, qnset) = { {{0}}, 0 }; So I've not yet wrapped my head around any of this; and I see a later patch adds some paravirt gunk to this, but it does blow you can't keep it a single cacheline for the sane case.

[PATCH v6 04/11] qspinlock: Optimized code path for 2 contending tasks

2014 Mar 12

0

[PATCH v6 04/11] qspinlock: Optimized code path for 2 contending tasks

...Queue code */ }; + u16 lock_wait; /* Lock and wait bits */ u32 qlcode; /* Complete lock word */ }; diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 52d3580..0030fad 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -112,6 +112,8 @@ static DEFINE_PER_CPU_ALIGNED(struct qnode_set, qnset) = { { { 0 } }, 0 }; * o lock - the lock byte * * o qcode - the queue node code * * o qlcode - the 32-bit qspinlock word * + * o wait - the waiting byte * + * o lock_wait - the combined lock and waiting bytes *...

[PATCH v10 08/19] qspinlock: Make a new qnode structure to support virtualization

2014 May 07

0

[PATCH v10 08/19] qspinlock: Make a new qnode structure to support virtualization

...ure. So + * a new queue node structure will have to be defined and used here. + */ +struct qnode { + struct mcs_spinlock mcs; +}; + +/* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one cacheline. */ -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); +static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[4]); /* * We must be able to distinguish between no-tail and the tail at 0:0, @@ -79,12 +88,12 @@ static inline u32 encode_tail(int cpu, int idx) return tail; } -static inline struct mcs_spinlock *deco...

[PATCH 8/9] qspinlock: Generic paravirt support

2015 Mar 16

0

[PATCH 8/9] qspinlock: Generic paravirt support

...e MAX_NODES 4 +#endif + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one 64-byte cacheline on a 64-bit architecture. + * + * PV doubles the storage and uses the second cacheline for PV state. */ -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); +static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); /* * We must be able to distinguish between no-tail and the tail at 0:0, @@ -230,6 +241,32 @@ static __always_inline void set_locked(s WRITE_ONCE(l->locked, _Q_LOCKED_VAL); }...

[PATCH 8/9] qspinlock: Generic paravirt support

2015 Mar 16

0

[PATCH 8/9] qspinlock: Generic paravirt support

...e MAX_NODES 4 +#endif + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one 64-byte cacheline on a 64-bit architecture. + * + * PV doubles the storage and uses the second cacheline for PV state. */ -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); +static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); /* * We must be able to distinguish between no-tail and the tail at 0:0, @@ -230,6 +241,32 @@ static __always_inline void set_locked(s WRITE_ONCE(l->locked, _Q_LOCKED_VAL); }...

[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock

2015 Apr 07

0

[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock

...e MAX_NODES 4 +#endif + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one 64-byte cacheline on a 64-bit architecture. + * + * PV doubles the storage and uses the second cacheline for PV state. */ -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); +static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); /* * We must be able to distinguish between no-tail and the tail at 0:0, @@ -220,6 +231,33 @@ static __always_inline void set_locked(struct qspinlock *lock) WRITE_ONCE(l->locked,...

[PATCH 10/11] qspinlock: Paravirt support

2014 Jun 15

0

[PATCH 10/11] qspinlock: Paravirt support

...+{ + return false; +} +#endif /* PARAVIRT_SPINLOCKS */ + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one cacheline. + * + * PV doubles the storage and uses the second cacheline for PV state. */ -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); +static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); /* * We must be able to distinguish between no-tail and the tail at 0:0, @@ -218,6 +238,156 @@ static __always_inline void set_locked(s ACCESS_ONCE(l->locked) = _Q_LOCKED_VAL; }...

[PATCH v16 08/14] pvqspinlock: Implement simple paravirt support for the qspinlock

2015 Apr 24

0

[PATCH v16 08/14] pvqspinlock: Implement simple paravirt support for the qspinlock

...e MAX_NODES 4 +#endif + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one 64-byte cacheline on a 64-bit architecture. + * + * PV doubles the storage and uses the second cacheline for PV state. */ -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); +static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); /* * We must be able to distinguish between no-tail and the tail at 0:0, @@ -220,6 +231,32 @@ static __always_inline void set_locked(struct qspinlock *lock) WRITE_ONCE(l->locked,...

[PATCH v16 08/14] pvqspinlock: Implement simple paravirt support for the qspinlock

2015 May 04

1

[PATCH v16 08/14] pvqspinlock: Implement simple paravirt support for the qspinlock

...e MAX_NODES 4 +#endif + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one 64-byte cacheline on a 64-bit architecture. + * + * PV doubles the storage and uses the second cacheline for PV state. */ -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); +static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); /* * We must be able to distinguish between no-tail and the tail at 0:0, @@ -220,6 +231,32 @@ static __always_inline void set_locked(s WRITE_ONCE(l->locked, _Q_LOCKED_VAL); }...

[PATCH v16 08/14] pvqspinlock: Implement simple paravirt support for the qspinlock

2015 May 04

1

[PATCH v16 08/14] pvqspinlock: Implement simple paravirt support for the qspinlock

...e MAX_NODES 4 +#endif + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one 64-byte cacheline on a 64-bit architecture. + * + * PV doubles the storage and uses the second cacheline for PV state. */ -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); +static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); /* * We must be able to distinguish between no-tail and the tail at 0:0, @@ -220,6 +231,32 @@ static __always_inline void set_locked(s WRITE_ONCE(l->locked, _Q_LOCKED_VAL); }...

[PATCH 8/9] qspinlock: Generic paravirt support

2015 Mar 18

2

[PATCH 8/9] qspinlock: Generic paravirt support

...queue node structures; we can never have more than 4 nested > * contexts: task, softirq, hardirq, nmi. > * > * Exactly fits one 64-byte cacheline on a 64-bit architecture. > + * > + * PV doubles the storage and uses the second cacheline for PV state. > */ > -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); > +static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); > > /* > * We must be able to distinguish between no-tail and the tail at 0:0, > @@ -230,6 +241,32 @@ static __always_inline void set_locked(s > WRITE_ONCE(l-&g...

[PATCH 8/9] qspinlock: Generic paravirt support

2015 Mar 18

2

[PATCH 8/9] qspinlock: Generic paravirt support

...queue node structures; we can never have more than 4 nested > * contexts: task, softirq, hardirq, nmi. > * > * Exactly fits one 64-byte cacheline on a 64-bit architecture. > + * > + * PV doubles the storage and uses the second cacheline for PV state. > */ > -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); > +static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); > > /* > * We must be able to distinguish between no-tail and the tail at 0:0, > @@ -230,6 +241,32 @@ static __always_inline void set_locked(s > WRITE_ONCE(l-&g...

[PATCH v12 09/11] pvqspinlock, x86: Add para-virtualization support

2014 Oct 16

2

[PATCH v12 09/11] pvqspinlock, x86: Add para-virtualization support

...PARAVIRT_SPINLOCKS */ + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one 64-byte cacheline on a 64-bit architecture. + * + * PV doubles the storage and uses the second cacheline for PV state. */ -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); +static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); /* * We must be able to distinguish between no-tail and the tail at 0:0, @@ -228,6 +248,43 @@ static __always_inline void set_locked(struct qspinlock *lock) ACCESS_ONCE(l->locked...

[PATCH v12 09/11] pvqspinlock, x86: Add para-virtualization support

2014 Oct 16

2

[PATCH v12 09/11] pvqspinlock, x86: Add para-virtualization support

...PARAVIRT_SPINLOCKS */ + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one 64-byte cacheline on a 64-bit architecture. + * + * PV doubles the storage and uses the second cacheline for PV state. */ -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); +static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); /* * We must be able to distinguish between no-tail and the tail at 0:0, @@ -228,6 +248,43 @@ static __always_inline void set_locked(struct qspinlock *lock) ACCESS_ONCE(l->locked...

[PATCH v13 09/11] pvqspinlock, x86: Add para-virtualization support

2014 Oct 29

1

[PATCH v13 09/11] pvqspinlock, x86: Add para-virtualization support

...MAX_NODES 4 +#endif + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one 64-byte cacheline on a 64-bit architecture. + * + * PV doubles the storage and uses the second cacheline for PV states. */ -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); +static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); /* * We must be able to distinguish between no-tail and the tail at 0:0, @@ -228,6 +237,33 @@ static __always_inline void set_locked(struct qspinlock *lock) ACCESS_ONCE(l->locked...

[PATCH v13 09/11] pvqspinlock, x86: Add para-virtualization support

2014 Oct 29

1

[PATCH v13 09/11] pvqspinlock, x86: Add para-virtualization support

...MAX_NODES 4 +#endif + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one 64-byte cacheline on a 64-bit architecture. + * + * PV doubles the storage and uses the second cacheline for PV states. */ -static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]); +static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); /* * We must be able to distinguish between no-tail and the tail at 0:0, @@ -228,6 +237,33 @@ static __always_inline void set_locked(struct qspinlock *lock) ACCESS_ONCE(l->locked...

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

2014 Feb 26

0

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

...se +# define _QCODE_OFFSET 16 +#endif + #define _QSPINLOCK_LOCKED 1U #define _QSPINLOCK_LOCK_MASK 0xff diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index ed5efa7..22a63fa 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -109,8 +109,11 @@ static DEFINE_PER_CPU_ALIGNED(struct qnode_set, qnset) = { {{0}}, 0 }; * 2) A smp_u8_store_release() macro for byte size store operation * * 3) A "union arch_qspinlock" structure that include the individual * * fields of the qspinlock structure, including: * - * o slock - the qspinlock structure...

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

2014 Feb 27

0

[PATCH v5 3/8] qspinlock, x86: Add x86 specific optimization for 2 contending tasks

...se +# define _QCODE_OFFSET 16 +#endif + #define _QSPINLOCK_LOCKED 1U #define _QSPINLOCK_LOCK_MASK 0xff diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index ed5efa7..22a63fa 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -109,8 +109,11 @@ static DEFINE_PER_CPU_ALIGNED(struct qnode_set, qnset) = { {{0}}, 0 }; * 2) A smp_u8_store_release() macro for byte size store operation * * 3) A "union arch_qspinlock" structure that include the individual * * fields of the qspinlock structure, including: * - * o slock - the qspinlock structure...

[PATCH v7 00/11] qspinlock: a 4-byte queue spinlock with PV support

2014 Mar 19

15

[PATCH v7 00/11] qspinlock: a 4-byte queue spinlock with PV support

v6->v7: - Remove an atomic operation from the 2-task contending code - Shorten the names of some macros - Make the queue waiter to attempt to steal lock when unfair lock is enabled. - Remove lock holder kick from the PV code and fix a race condition - Run the unfair lock & PV code on overcommitted KVM guests to collect performance data. v5->v6: - Change the optimized

search for: define_per_cpu_aligned