Displaying 20 results from an estimated 26 matches for "pv_hash".
2015 Apr 13
1
[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock
...+ for (loop = SPIN_THRESHOLD; loop; loop--) {
> >>+ if (!READ_ONCE(l->locked))
> >>+ return;
> >>+
> >>+ cpu_relax();
> >>+ }
> >>+
> >>+ WRITE_ONCE(pn->state, vcpu_halted);
> >>+ if (!lp)
> >>+ lp = pv_hash(lock, pn);
> >>+ /*
> >>+ * lp must be set before setting _Q_SLOW_VAL
> >>+ *
> >>+ * [S] lp = lock [RmW] l = l->locked = 0
> >>+ * MB MB
> >>+ * [S] l->locked = _Q_SLOW_VAL [L]...
2015 Apr 13
1
[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock
...+ for (loop = SPIN_THRESHOLD; loop; loop--) {
> >>+ if (!READ_ONCE(l->locked))
> >>+ return;
> >>+
> >>+ cpu_relax();
> >>+ }
> >>+
> >>+ WRITE_ONCE(pn->state, vcpu_halted);
> >>+ if (!lp)
> >>+ lp = pv_hash(lock, pn);
> >>+ /*
> >>+ * lp must be set before setting _Q_SLOW_VAL
> >>+ *
> >>+ * [S] lp = lock [RmW] l = l->locked = 0
> >>+ * MB MB
> >>+ * [S] l->locked = _Q_SLOW_VAL [L]...
2015 Apr 09
6
[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock
...uckets should be at least
> + * 256 to fully utilize a 4k page.
> + */
> +#define LFSR_MIN_BITS 8
> +#define LFSR_MAX_BITS (2 + NR_CPUS_BITS)
> +#if LFSR_MAX_BITS < LFSR_MIN_BITS
> +#undef LFSR_MAX_BITS
> +#define LFSR_MAX_BITS LFSR_MIN_BITS
> +#endif
> +
> +struct pv_hash_bucket {
> + struct qspinlock *lock;
> + struct pv_node *node;
> +};
> +#define PV_HB_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_bucket))
> +#define HB_RESERVED ((struct qspinlock *)1)
This is unused.
> +
> +static struct pv_hash_bucket *pv_lock_hash;
> +static un...
2015 Apr 09
6
[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock
...uckets should be at least
> + * 256 to fully utilize a 4k page.
> + */
> +#define LFSR_MIN_BITS 8
> +#define LFSR_MAX_BITS (2 + NR_CPUS_BITS)
> +#if LFSR_MAX_BITS < LFSR_MIN_BITS
> +#undef LFSR_MAX_BITS
> +#define LFSR_MAX_BITS LFSR_MIN_BITS
> +#endif
> +
> +struct pv_hash_bucket {
> + struct qspinlock *lock;
> + struct pv_node *node;
> +};
> +#define PV_HB_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_bucket))
> +#define HB_RESERVED ((struct qspinlock *)1)
This is unused.
> +
> +static struct pv_hash_bucket *pv_lock_hash;
> +static un...
2015 Apr 09
2
[PATCH v15 13/15] pvqspinlock: Only kick CPU at unlock time
...k into the hash table & set the _Q_SLOW_VAL in the lock.
> + * As this is the same CPU that will check the _Q_SLOW_VAL value and
> + * the hash table later on at unlock time, no atomic instruction is
> + * needed.
> + */
> + WRITE_ONCE(l->locked, _Q_SLOW_VAL);
> + (void)pv_hash(lock, pn);
> }
This is broken. The unlock path relies on:
pv_hash()
MB
l->locked = SLOW
such that when it observes SLOW, it must then also observe a consistent
bucket.
The above can have us do pv_hash_find() _before_ we actually hash the
lock, which will result in us triggering th...
2015 Apr 09
2
[PATCH v15 13/15] pvqspinlock: Only kick CPU at unlock time
...k into the hash table & set the _Q_SLOW_VAL in the lock.
> + * As this is the same CPU that will check the _Q_SLOW_VAL value and
> + * the hash table later on at unlock time, no atomic instruction is
> + * needed.
> + */
> + WRITE_ONCE(l->locked, _Q_SLOW_VAL);
> + (void)pv_hash(lock, pn);
> }
This is broken. The unlock path relies on:
pv_hash()
MB
l->locked = SLOW
such that when it observes SLOW, it must then also observe a consistent
bucket.
The above can have us do pv_hash_find() _before_ we actually hash the
lock, which will result in us triggering th...
2015 Apr 09
0
[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock
...56 to fully utilize a 4k page.
>> + */
>> +#define LFSR_MIN_BITS 8
>> +#define LFSR_MAX_BITS (2 + NR_CPUS_BITS)
>> +#if LFSR_MAX_BITS< LFSR_MIN_BITS
>> +#undef LFSR_MAX_BITS
>> +#define LFSR_MAX_BITS LFSR_MIN_BITS
>> +#endif
>> +
>> +struct pv_hash_bucket {
>> + struct qspinlock *lock;
>> + struct pv_node *node;
>> +};
>> +#define PV_HB_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_bucket))
>> +#define HB_RESERVED ((struct qspinlock *)1)
> This is unused.
You are right, I will remove that.
>> +
&...
2015 Mar 19
4
[PATCH 8/9] qspinlock: Generic paravirt support
...75, which is around the point where open addressing
+ * breaks down.
+ *
+ * Instead of probing just the immediate bucket we probe all buckets in the
+ * same cacheline.
+ *
+ * http://en.wikipedia.org/wiki/Hash_table#Open_addressing
+ *
+ */
+
+#define HB_RESERVED ((struct qspinlock *)1)
+
+struct pv_hash_bucket {
+ struct qspinlock *lock;
+ int cpu;
+};
+
+/*
+ * XXX dynamic allocate using nr_cpu_ids instead...
+ */
+#define PV_LOCK_HASH_BITS (2 + NR_CPUS_BITS)
+
+#if PV_LOCK_HASH_BITS < 6
+#undef PV_LOCK_HASH_BITS
+#define PB_LOCK_HASH_BITS 6
+#endif
+
+#define PV_LOCK_HASH_SIZE (1 << PV_...
2015 Mar 19
4
[PATCH 8/9] qspinlock: Generic paravirt support
...75, which is around the point where open addressing
+ * breaks down.
+ *
+ * Instead of probing just the immediate bucket we probe all buckets in the
+ * same cacheline.
+ *
+ * http://en.wikipedia.org/wiki/Hash_table#Open_addressing
+ *
+ */
+
+#define HB_RESERVED ((struct qspinlock *)1)
+
+struct pv_hash_bucket {
+ struct qspinlock *lock;
+ int cpu;
+};
+
+/*
+ * XXX dynamic allocate using nr_cpu_ids instead...
+ */
+#define PV_LOCK_HASH_BITS (2 + NR_CPUS_BITS)
+
+#if PV_LOCK_HASH_BITS < 6
+#undef PV_LOCK_HASH_BITS
+#define PB_LOCK_HASH_BITS 6
+#endif
+
+#define PV_LOCK_HASH_SIZE (1 << PV_...
2015 Apr 07
0
[PATCH v15 13/15] pvqspinlock: Only kick CPU at unlock time
...ed even
+ * if it is not halted to avoid potential racing condition.
+ */
enum vcpu_state {
vcpu_running = 0,
vcpu_halted,
+ vcpu_hashed
};
struct pv_node {
@@ -97,7 +104,13 @@ static inline u32 hash_align(u32 hash)
return hash & ~(PV_HB_PER_LINE - 1);
}
-static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
+/*
+ * Set up an entry in the lock hash table
+ * This is not inlined to reduce size of generated code as it is included
+ * twice and is used only in the slowest path of handling CPU halting.
+ */
+static noinline struct qspinlock **
+pv_hash(struct q...
2015 May 04
1
[PATCH v16 08/14] pvqspinlock: Implement simple paravirt support for the qspinlock
I changed it to the below; I've not gotten around to compiling or even
running it yet :-(
The biggest change is the pv_hash/pv_unhash functions, which I've
rewritten to hopefully be clearer (and also hopefully not wrecked them).
I took out the cacheline sized structure which takes out that double
loop and simplifies things. I've also added some comments which
hopefully explain how/why we ended up with this exact...
2015 May 04
1
[PATCH v16 08/14] pvqspinlock: Implement simple paravirt support for the qspinlock
I changed it to the below; I've not gotten around to compiling or even
running it yet :-(
The biggest change is the pv_hash/pv_unhash functions, which I've
rewritten to hopefully be clearer (and also hopefully not wrecked them).
I took out the cacheline sized structure which takes out that double
loop and simplifies things. I've also added some comments which
hopefully explain how/why we ended up with this exact...
2015 Apr 07
0
[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock
...e
+ * granularity. So the minimum number of hash buckets should be at least
+ * 256 to fully utilize a 4k page.
+ */
+#define LFSR_MIN_BITS 8
+#define LFSR_MAX_BITS (2 + NR_CPUS_BITS)
+#if LFSR_MAX_BITS < LFSR_MIN_BITS
+#undef LFSR_MAX_BITS
+#define LFSR_MAX_BITS LFSR_MIN_BITS
+#endif
+
+struct pv_hash_bucket {
+ struct qspinlock *lock;
+ struct pv_node *node;
+};
+#define PV_HB_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_bucket))
+#define HB_RESERVED ((struct qspinlock *)1)
+
+static struct pv_hash_bucket *pv_lock_hash;
+static unsigned int pv_lock_hash_bits __read_mostly;
+
+#include &l...
2015 Apr 02
3
[PATCH 8/9] qspinlock: Generic paravirt support
...that guarantees that if we observe
_Q_SLOW_VAL we must also observe the hash bucket with the lock value.
> So we need to have
> some kind of synchronization mechanism to let the lookup CPU know when is a
> good time to look up.
No, its all already ordered and working.
pv_wait_head():
pv_hash()
/* MB as per cmpxchg */
cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL);
VS
__pv_queue_spin_unlock():
if (xchg(&l->locked, 0) != _Q_SLOW_VAL)
return;
/* MB as per xchg */
pv_hash_find(lock);
2015 Apr 02
3
[PATCH 8/9] qspinlock: Generic paravirt support
...that guarantees that if we observe
_Q_SLOW_VAL we must also observe the hash bucket with the lock value.
> So we need to have
> some kind of synchronization mechanism to let the lookup CPU know when is a
> good time to look up.
No, its all already ordered and working.
pv_wait_head():
pv_hash()
/* MB as per cmpxchg */
cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL);
VS
__pv_queue_spin_unlock():
if (xchg(&l->locked, 0) != _Q_SLOW_VAL)
return;
/* MB as per xchg */
pv_hash_find(lock);
2015 Apr 07
18
[PATCH v15 00/15] qspinlock: a 4-byte queue spinlock with PV support
v14->v15:
- Incorporate PeterZ's v15 qspinlock patch and improve upon the PV
qspinlock code by dynamically allocating the hash table as well
as some other performance optimization.
- Simplified the Xen PV qspinlock code as suggested by David Vrabel
<david.vrabel at citrix.com>.
- Add benchmarking data for 3.19 kernel to compare the performance
of a spinlock heavy test
2015 Apr 07
18
[PATCH v15 00/15] qspinlock: a 4-byte queue spinlock with PV support
v14->v15:
- Incorporate PeterZ's v15 qspinlock patch and improve upon the PV
qspinlock code by dynamically allocating the hash table as well
as some other performance optimization.
- Simplified the Xen PV qspinlock code as suggested by David Vrabel
<david.vrabel at citrix.com>.
- Add benchmarking data for 3.19 kernel to compare the performance
of a spinlock heavy test
2015 Apr 24
0
[PATCH v16 08/14] pvqspinlock: Implement simple paravirt support for the qspinlock
...6 (64-bit) or 512 (32-bit) to fully utilize a 4k page.
+ *
+ * Since we should not be holding locks from NMI context (very rare indeed) the
+ * max load factor is 0.75, which is around the point where open addressing
+ * breaks down.
+ *
+ */
+#define PV_HE_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_entry))
+#define PV_HB_MIN (PAGE_SIZE / sizeof(struct pv_hash_bucket))
+
+struct pv_hash_entry {
+ struct qspinlock *lock;
+ struct pv_node *node;
+};
+
+struct pv_hash_bucket {
+ struct pv_hash_entry ent[PV_HE_PER_LINE];
+};
+
+static struct pv_hash_bucket *pv_lock_hash;
+static unsigned int pv_...
2015 Apr 09
0
[PATCH v15 09/15] pvqspinlock: Implement simple paravirt support for the qspinlock
On Thu, Apr 09, 2015 at 08:13:27PM +0200, Peter Zijlstra wrote:
> On Mon, Apr 06, 2015 at 10:55:44PM -0400, Waiman Long wrote:
> > +#define PV_HB_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_bucket))
> > +static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
> > +{
> > + unsigned long init_hash, hash = hash_ptr(lock, pv_lock_hash_bits);
> > + struct pv_hash_bucket *hb, *end;
> > +
> > + if (!hash)
> > + hash = 1;
>...
2015 Mar 18
2
[PATCH 8/9] qspinlock: Generic paravirt support
On 03/16/2015 09:16 AM, Peter Zijlstra wrote:
> Implement simple paravirt support for the qspinlock.
>
> Provide a separate (second) version of the spin_lock_slowpath for
> paravirt along with a special unlock path.
>
> The second slowpath is generated by adding a few pv hooks to the
> normal slowpath, but where those will compile away for the native
> case, they expand