thr3ads.net - search: "pv

[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock

2015 Apr 13

1

[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock

...+ for (loop = SPIN_THRESHOLD; loop; loop--) { > >>+ if (!READ_ONCE(l->locked)) > >>+ return; > >>+ > >>+ cpu_relax(); > >>+ } > >>+ > >>+ WRITE_ONCE(pn->state, vcpu_halted); > >>+ if (!lp) > >>+ lp = pv_hash(lock, pn); > >>+ /* > >>+ * lp must be set before setting _Q_SLOW_VAL > >>+ * > >>+ * [S] lp = lock [RmW] l = l->locked = 0 > >>+ * MB MB > >>+ * [S] l->locked = _Q_SLOW_VAL [L]...

[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock

2015 Apr 13

1

[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock

...+ for (loop = SPIN_THRESHOLD; loop; loop--) { > >>+ if (!READ_ONCE(l->locked)) > >>+ return; > >>+ > >>+ cpu_relax(); > >>+ } > >>+ > >>+ WRITE_ONCE(pn->state, vcpu_halted); > >>+ if (!lp) > >>+ lp = pv_hash(lock, pn); > >>+ /* > >>+ * lp must be set before setting _Q_SLOW_VAL > >>+ * > >>+ * [S] lp = lock [RmW] l = l->locked = 0 > >>+ * MB MB > >>+ * [S] l->locked = _Q_SLOW_VAL [L]...

[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock

2015 Apr 09

6

[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock

...uckets should be at least > + * 256 to fully utilize a 4k page. > + */ > +#define LFSR_MIN_BITS 8 > +#define LFSR_MAX_BITS (2 + NR_CPUS_BITS) > +#if LFSR_MAX_BITS < LFSR_MIN_BITS > +#undef LFSR_MAX_BITS > +#define LFSR_MAX_BITS LFSR_MIN_BITS > +#endif > + > +struct pv_hash_bucket { > + struct qspinlock *lock; > + struct pv_node *node; > +}; > +#define PV_HB_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_bucket)) > +#define HB_RESERVED ((struct qspinlock *)1) This is unused. > + > +static struct pv_hash_bucket *pv_lock_hash; > +static un...

[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock

2015 Apr 09

6

[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock

...uckets should be at least > + * 256 to fully utilize a 4k page. > + */ > +#define LFSR_MIN_BITS 8 > +#define LFSR_MAX_BITS (2 + NR_CPUS_BITS) > +#if LFSR_MAX_BITS < LFSR_MIN_BITS > +#undef LFSR_MAX_BITS > +#define LFSR_MAX_BITS LFSR_MIN_BITS > +#endif > + > +struct pv_hash_bucket { > + struct qspinlock *lock; > + struct pv_node *node; > +}; > +#define PV_HB_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_bucket)) > +#define HB_RESERVED ((struct qspinlock *)1) This is unused. > + > +static struct pv_hash_bucket *pv_lock_hash; > +static un...

[PATCH v15 13/15] pvqspinlock: Only kick CPU at unlock time

2015 Apr 09

2

[PATCH v15 13/15] pvqspinlock: Only kick CPU at unlock time

...k into the hash table & set the _Q_SLOW_VAL in the lock. > + * As this is the same CPU that will check the _Q_SLOW_VAL value and > + * the hash table later on at unlock time, no atomic instruction is > + * needed. > + */ > + WRITE_ONCE(l->locked, _Q_SLOW_VAL); > + (void)pv_hash(lock, pn); > } This is broken. The unlock path relies on: pv_hash() MB l->locked = SLOW such that when it observes SLOW, it must then also observe a consistent bucket. The above can have us do pv_hash_find() _before_ we actually hash the lock, which will result in us triggering th...

[PATCH v15 13/15] pvqspinlock: Only kick CPU at unlock time

2015 Apr 09

2

[PATCH v15 13/15] pvqspinlock: Only kick CPU at unlock time

...k into the hash table & set the _Q_SLOW_VAL in the lock. > + * As this is the same CPU that will check the _Q_SLOW_VAL value and > + * the hash table later on at unlock time, no atomic instruction is > + * needed. > + */ > + WRITE_ONCE(l->locked, _Q_SLOW_VAL); > + (void)pv_hash(lock, pn); > } This is broken. The unlock path relies on: pv_hash() MB l->locked = SLOW such that when it observes SLOW, it must then also observe a consistent bucket. The above can have us do pv_hash_find() _before_ we actually hash the lock, which will result in us triggering th...

[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock

2015 Apr 09

0

[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock

...56 to fully utilize a 4k page. >> + */ >> +#define LFSR_MIN_BITS 8 >> +#define LFSR_MAX_BITS (2 + NR_CPUS_BITS) >> +#if LFSR_MAX_BITS< LFSR_MIN_BITS >> +#undef LFSR_MAX_BITS >> +#define LFSR_MAX_BITS LFSR_MIN_BITS >> +#endif >> + >> +struct pv_hash_bucket { >> + struct qspinlock *lock; >> + struct pv_node *node; >> +}; >> +#define PV_HB_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_bucket)) >> +#define HB_RESERVED ((struct qspinlock *)1) > This is unused. You are right, I will remove that. >> + &...

[PATCH 8/9] qspinlock: Generic paravirt support

2015 Mar 19

4

[PATCH 8/9] qspinlock: Generic paravirt support

...75, which is around the point where open addressing + * breaks down. + * + * Instead of probing just the immediate bucket we probe all buckets in the + * same cacheline. + * + * http://en.wikipedia.org/wiki/Hash_table#Open_addressing + * + */ + +#define HB_RESERVED ((struct qspinlock *)1) + +struct pv_hash_bucket { + struct qspinlock *lock; + int cpu; +}; + +/* + * XXX dynamic allocate using nr_cpu_ids instead... + */ +#define PV_LOCK_HASH_BITS (2 + NR_CPUS_BITS) + +#if PV_LOCK_HASH_BITS < 6 +#undef PV_LOCK_HASH_BITS +#define PB_LOCK_HASH_BITS 6 +#endif + +#define PV_LOCK_HASH_SIZE (1 << PV_...

[PATCH 8/9] qspinlock: Generic paravirt support

2015 Mar 19

4

[PATCH 8/9] qspinlock: Generic paravirt support

...75, which is around the point where open addressing + * breaks down. + * + * Instead of probing just the immediate bucket we probe all buckets in the + * same cacheline. + * + * http://en.wikipedia.org/wiki/Hash_table#Open_addressing + * + */ + +#define HB_RESERVED ((struct qspinlock *)1) + +struct pv_hash_bucket { + struct qspinlock *lock; + int cpu; +}; + +/* + * XXX dynamic allocate using nr_cpu_ids instead... + */ +#define PV_LOCK_HASH_BITS (2 + NR_CPUS_BITS) + +#if PV_LOCK_HASH_BITS < 6 +#undef PV_LOCK_HASH_BITS +#define PB_LOCK_HASH_BITS 6 +#endif + +#define PV_LOCK_HASH_SIZE (1 << PV_...

[PATCH v15 13/15] pvqspinlock: Only kick CPU at unlock time

2015 Apr 07

0

[PATCH v15 13/15] pvqspinlock: Only kick CPU at unlock time

...ed even + * if it is not halted to avoid potential racing condition. + */ enum vcpu_state { vcpu_running = 0, vcpu_halted, + vcpu_hashed }; struct pv_node { @@ -97,7 +104,13 @@ static inline u32 hash_align(u32 hash) return hash & ~(PV_HB_PER_LINE - 1); } -static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node) +/* + * Set up an entry in the lock hash table + * This is not inlined to reduce size of generated code as it is included + * twice and is used only in the slowest path of handling CPU halting. + */ +static noinline struct qspinlock ** +pv_hash(struct q...

[PATCH v16 08/14] pvqspinlock: Implement simple paravirt support for the qspinlock

2015 May 04

1

[PATCH v16 08/14] pvqspinlock: Implement simple paravirt support for the qspinlock

I changed it to the below; I've not gotten around to compiling or even running it yet :-( The biggest change is the pv_hash/pv_unhash functions, which I've rewritten to hopefully be clearer (and also hopefully not wrecked them). I took out the cacheline sized structure which takes out that double loop and simplifies things. I've also added some comments which hopefully explain how/why we ended up with this exact...

[PATCH v16 08/14] pvqspinlock: Implement simple paravirt support for the qspinlock

2015 May 04

1

[PATCH v16 08/14] pvqspinlock: Implement simple paravirt support for the qspinlock

I changed it to the below; I've not gotten around to compiling or even running it yet :-( The biggest change is the pv_hash/pv_unhash functions, which I've rewritten to hopefully be clearer (and also hopefully not wrecked them). I took out the cacheline sized structure which takes out that double loop and simplifies things. I've also added some comments which hopefully explain how/why we ended up with this exact...

[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock

2015 Apr 07

0

[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock

...e + * granularity. So the minimum number of hash buckets should be at least + * 256 to fully utilize a 4k page. + */ +#define LFSR_MIN_BITS 8 +#define LFSR_MAX_BITS (2 + NR_CPUS_BITS) +#if LFSR_MAX_BITS < LFSR_MIN_BITS +#undef LFSR_MAX_BITS +#define LFSR_MAX_BITS LFSR_MIN_BITS +#endif + +struct pv_hash_bucket { + struct qspinlock *lock; + struct pv_node *node; +}; +#define PV_HB_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_bucket)) +#define HB_RESERVED ((struct qspinlock *)1) + +static struct pv_hash_bucket *pv_lock_hash; +static unsigned int pv_lock_hash_bits __read_mostly; + +#include &l...

[PATCH 8/9] qspinlock: Generic paravirt support

2015 Apr 02

3

[PATCH 8/9] qspinlock: Generic paravirt support

...that guarantees that if we observe _Q_SLOW_VAL we must also observe the hash bucket with the lock value. > So we need to have > some kind of synchronization mechanism to let the lookup CPU know when is a > good time to look up. No, its all already ordered and working. pv_wait_head(): pv_hash() /* MB as per cmpxchg */ cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL); VS __pv_queue_spin_unlock(): if (xchg(&l->locked, 0) != _Q_SLOW_VAL) return; /* MB as per xchg */ pv_hash_find(lock);

[PATCH 8/9] qspinlock: Generic paravirt support

2015 Apr 02

3

[PATCH 8/9] qspinlock: Generic paravirt support

...that guarantees that if we observe _Q_SLOW_VAL we must also observe the hash bucket with the lock value. > So we need to have > some kind of synchronization mechanism to let the lookup CPU know when is a > good time to look up. No, its all already ordered and working. pv_wait_head(): pv_hash() /* MB as per cmpxchg */ cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL); VS __pv_queue_spin_unlock(): if (xchg(&l->locked, 0) != _Q_SLOW_VAL) return; /* MB as per xchg */ pv_hash_find(lock);

[PATCH v15 00/15] qspinlock: a 4-byte queue spinlock with PV support

2015 Apr 07

18

[PATCH v15 00/15] qspinlock: a 4-byte queue spinlock with PV support

v14->v15: - Incorporate PeterZ's v15 qspinlock patch and improve upon the PV qspinlock code by dynamically allocating the hash table as well as some other performance optimization. - Simplified the Xen PV qspinlock code as suggested by David Vrabel <david.vrabel at citrix.com>. - Add benchmarking data for 3.19 kernel to compare the performance of a spinlock heavy test

[PATCH v15 00/15] qspinlock: a 4-byte queue spinlock with PV support

2015 Apr 07

18

[PATCH v15 00/15] qspinlock: a 4-byte queue spinlock with PV support

v14->v15: - Incorporate PeterZ's v15 qspinlock patch and improve upon the PV qspinlock code by dynamically allocating the hash table as well as some other performance optimization. - Simplified the Xen PV qspinlock code as suggested by David Vrabel <david.vrabel at citrix.com>. - Add benchmarking data for 3.19 kernel to compare the performance of a spinlock heavy test

[PATCH v16 08/14] pvqspinlock: Implement simple paravirt support for the qspinlock

2015 Apr 24

0

[PATCH v16 08/14] pvqspinlock: Implement simple paravirt support for the qspinlock

...6 (64-bit) or 512 (32-bit) to fully utilize a 4k page. + * + * Since we should not be holding locks from NMI context (very rare indeed) the + * max load factor is 0.75, which is around the point where open addressing + * breaks down. + * + */ +#define PV_HE_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_entry)) +#define PV_HB_MIN (PAGE_SIZE / sizeof(struct pv_hash_bucket)) + +struct pv_hash_entry { + struct qspinlock *lock; + struct pv_node *node; +}; + +struct pv_hash_bucket { + struct pv_hash_entry ent[PV_HE_PER_LINE]; +}; + +static struct pv_hash_bucket *pv_lock_hash; +static unsigned int pv_...

[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock

2015 Apr 09

0

[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock

On Thu, Apr 09, 2015 at 08:13:27PM +0200, Peter Zijlstra wrote: > On Mon, Apr 06, 2015 at 10:55:44PM -0400, Waiman Long wrote: > > +#define PV_HB_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_bucket)) > > +static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node) > > +{ > > + unsigned long init_hash, hash = hash_ptr(lock, pv_lock_hash_bits); > > + struct pv_hash_bucket *hb, *end; > > + > > + if (!hash) > > + hash = 1; &gt...

[PATCH 8/9] qspinlock: Generic paravirt support

2015 Mar 18

2

[PATCH 8/9] qspinlock: Generic paravirt support

On 03/16/2015 09:16 AM, Peter Zijlstra wrote: > Implement simple paravirt support for the qspinlock. > > Provide a separate (second) version of the spin_lock_slowpath for > paravirt along with a special unlock path. > > The second slowpath is generated by adding a few pv hooks to the > normal slowpath, but where those will compile away for the native > case, they expand

search for: pv_hash