Jan Beulich
2011-Mar-25 17:01 UTC
[Xen-devel] [PATCH] x86: improve scalability of rwlock implementation
>From an abstract perspective, it should be possible to replace normalspin locks with the write version of rwlocks without correctness issues. The implementation so far had a very small probability of not scaling beyond 128 CPUs (which would become more significant if aforementioned replacement was done), since the sign bit of a 32-bit integer is being used to determine various conditions, and 129 or more CPUs simultaneously subtracting RW_LOCK_BIAS from a single lock would cause an undetected overflow. Therefore, the bias gets adjusted so that it can cover up to 2048 CPUs, and the lock structure gets widened to account for the unlikely case of getting built for and run on a system with even more CPUs. While touching this code, I also used the same .subsection approach that was already used elsewhere to eliminate taken branches from the fast paths. And the output constraints of the affected asm()-s got fixed to use "+" rather than "=". Signed-off-by: Jan Beulich <jbeulich@novell.com> --- a/xen/include/asm-x86/spinlock.h +++ b/xen/include/asm-x86/spinlock.h @@ -31,39 +31,67 @@ static always_inline int _raw_spin_trylo return (oldval > 0); } -typedef struct { +#if NR_CPUS <= 2048 + +typedef union { volatile int lock; + volatile int write; +} raw_rwlock_t; + +#define RW_LOCK_BIAS 0x00100000 +#define READ_LOCK_SUFFIX "l" +#define WRITE_LOCK_ADD(n) "addl %" #n "," +#define WRITE_LOCK_SUB(n) "subl %" #n "," +#define WRITE_LOCK_CMP(n) "%" #n + +#else + +typedef union { + volatile s64 lock; + struct { + volatile u32 read; + volatile s32 write; + }; } raw_rwlock_t; -#define RW_LOCK_BIAS 0x01000000 +#define RW_LOCK_BIAS (1L << 32) +#define READ_LOCK_SUFFIX "q" +#define WRITE_LOCK_ADD(n) "incl" +#define WRITE_LOCK_SUB(n) "decl" +#define WRITE_LOCK_CMP(n) "$1" + +#endif + #define _RAW_RW_LOCK_UNLOCKED /*(raw_rwlock_t)*/ { RW_LOCK_BIAS } static always_inline void _raw_read_lock(raw_rwlock_t *rw) { asm volatile ( - "1: lock; decl %0 \n" - " jns 3f \n" - " lock; incl %0 \n" - "2: rep; nop \n" - " cmpl $1,%0 \n" - " js 2b \n" - " jmp 1b \n" - "3:" - : "=m" (rw->lock) : : "memory" ); + "1: lock; dec" READ_LOCK_SUFFIX " %0\n" + " js 3f \n" + " .subsection 1 \n" + "3: lock; inc" READ_LOCK_SUFFIX " %0\n" + "2: rep; nop \n" + " cmp" READ_LOCK_SUFFIX " $1,%0 \n" + " js 2b \n" + " jmp 1b \n" + " .subsection 0 \n" + : "+m" (rw->lock) : : "memory" ); } static always_inline void _raw_write_lock(raw_rwlock_t *rw) { asm volatile ( - "1: lock; subl %1,%0 \n" - " jz 3f \n" - " lock; addl %1,%0 \n" - "2: rep; nop \n" - " cmpl %1,%0 \n" - " jne 2b \n" - " jmp 1b \n" - "3:" - : "=m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory" ); + "1: lock; " WRITE_LOCK_SUB(1) " %0\n" + " jnz 3f \n" + " .subsection 1 \n" + "3: lock; " WRITE_LOCK_ADD(1) " %0\n" + "2: rep; nop \n" + " cmpl " WRITE_LOCK_CMP(1) ", %0\n" + " jne 2b \n" + " jmp 1b \n" + " .subsection 0 \n" + : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory" ); } static always_inline int _raw_write_trylock(raw_rwlock_t *rw) @@ -71,12 +99,12 @@ static always_inline int _raw_write_tryl int rc; asm volatile ( - " lock; subl %2,%0 \n" - " jz 1f \n" - " lock; addl %2,%0 \n" - " dec %1 \n" + " lock; " WRITE_LOCK_SUB(2) " %0\n" + " jz 1f \n" + " lock; " WRITE_LOCK_ADD(2) " %0\n" + " dec %1 \n" "1:" - : "=m" (rw->lock), "=r" (rc) : "i" (RW_LOCK_BIAS), "1" (1) + : "+m" (rw->write), "=r" (rc) : "i" (RW_LOCK_BIAS), "1" (1) : "memory" ); return rc; @@ -85,18 +113,24 @@ static always_inline int _raw_write_tryl static always_inline void _raw_read_unlock(raw_rwlock_t *rw) { asm volatile ( - "lock ; incl %0" - : "=m" ((rw)->lock) : : "memory" ); + " lock ; inc" READ_LOCK_SUFFIX " %0" + : "+m" (rw->lock) : : "memory" ); } static always_inline void _raw_write_unlock(raw_rwlock_t *rw) { asm volatile ( - "lock ; addl %1,%0" - : "=m" ((rw)->lock) : "i" (RW_LOCK_BIAS) : "memory" ); + " lock ; " WRITE_LOCK_ADD(1) " %0" + : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory" ); } #define _raw_rw_is_locked(x) ((x)->lock < RW_LOCK_BIAS) -#define _raw_rw_is_write_locked(x) ((x)->lock <= 0) +#define _raw_rw_is_write_locked(x) ((x)->write <= 0) + +#undef READ_LOCK_SUFFIX +#undef WRITE_LOCK_ADD +#undef WRITE_LOCK_SUB +#undef WRITE_LOCK_CMP + #endif /* __ASM_SPINLOCK_H */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel