This patch adds support to find next 1 or 0 bit in a xbmitmap range and clear a range of bits. More possible optimizations to add in the future: 1) xb_set_bit_range: set a range of bits. 2) when searching a bit, if the bit is not found in the slot, move on to the next slot directly. 3) add tags to help searching. Signed-off-by: Wei Wang <wei.w.wang at intel.com> Cc: Matthew Wilcox <mawilcox at microsoft.com> Cc: Andrew Morton <akpm at linux-foundation.org> Cc: Michal Hocko <mhocko at kernel.org> Cc: Michael S. Tsirkin <mst at redhat.com> Cc: Tetsuo Handa <penguin-kernel at I-love.SAKURA.ne.jp> Suggested-by: Matthew Wilcox <mawilcox at microsoft.com> --- include/linux/xbitmap.h | 6 ++ lib/xbitmap.c | 205 +++++++++++++++++++++++++++++++++++++++++++ tools/include/linux/bitmap.h | 34 +++++++ tools/include/linux/kernel.h | 2 + 4 files changed, 247 insertions(+) v20 RESEND Changes: - fixed the !node path - added the test cases for the !node path - change __builtin_constant_p(start & 7) to __builtin_constant_p(start) diff --git a/include/linux/xbitmap.h b/include/linux/xbitmap.h index 108f929..ede1029 100644 --- a/include/linux/xbitmap.h +++ b/include/linux/xbitmap.h @@ -35,6 +35,12 @@ static inline void xb_init(struct xb *xb) int xb_set_bit(struct xb *xb, unsigned long bit); bool xb_test_bit(const struct xb *xb, unsigned long bit); void xb_clear_bit(struct xb *xb, unsigned long bit); +void xb_clear_bit_range(struct xb *xb, unsigned long start, + unsigned long nbits); +unsigned long xb_find_set(struct xb *xb, unsigned long size, + unsigned long offset); +unsigned long xb_find_zero(struct xb *xb, unsigned long size, + unsigned long offset); static inline bool xb_empty(const struct xb *xb) { diff --git a/lib/xbitmap.c b/lib/xbitmap.c index a1c0abd..1c99586 100644 --- a/lib/xbitmap.c +++ b/lib/xbitmap.c @@ -3,6 +3,16 @@ #include <linux/bitmap.h> #include <linux/slab.h> +/* + * Developer notes: + * - locks are required to gurantee there is no concurrent + * calls of xb_set_bit, xb_clear_bit, xb_clear_bit_range, xb_test_bit, + * xb_find_set, or xb_find_clear to operate on the same ida bitmap. + * - The current implementation of xb_clear_bit_range, xb_find_set and + * xb_find_clear may cause long latency when the bit range to operate + * on is super large (e.g. [0, ULONG_MAX]). + */ + /** * xb_set_bit - set a bit in the xbitmap * @xb: the xbitmap tree used to record the bit @@ -72,6 +82,49 @@ void xb_clear_bit(struct xb *xb, unsigned long bit) EXPORT_SYMBOL(xb_clear_bit); /** + * xb_clear_bit_range - clear a range of bits in the xbitmap + * @start: the start of the bit range, inclusive + * @nbits: number of bits to clear + * + * This function is used to clear a range of bits in the xbitmap. If all the + * bits in the bitmap are 0, the bitmap will be freed. + */ +void xb_clear_bit_range(struct xb *xb, unsigned long start, + unsigned long nbits) +{ + struct radix_tree_root *root = &xb->xbrt; + struct radix_tree_node *node; + void __rcu **slot; + struct ida_bitmap *bitmap; + unsigned long index = start / IDA_BITMAP_BITS; + unsigned long bit = start % IDA_BITMAP_BITS; + + if (nbits > ULONG_MAX - start) + nbits = ULONG_MAX - start; + + while (nbits) { + unsigned int __nbits = min(nbits, + (unsigned long)IDA_BITMAP_BITS - bit); + + bitmap = __radix_tree_lookup(root, index, &node, &slot); + if (bitmap) { + if (__nbits != IDA_BITMAP_BITS) + bitmap_clear(bitmap->bitmap, bit, __nbits); + + if (__nbits == IDA_BITMAP_BITS || + bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) { + kfree(bitmap); + __radix_tree_delete(root, node, slot); + } + } + bit = 0; + index++; + nbits -= __nbits; + } +} +EXPORT_SYMBOL(xb_clear_bit_range); + +/** * xb_test_bit - test a bit in the xbitmap * @xb: the xbitmap tree used to record the bit * @bit: index of the bit to test @@ -94,6 +147,98 @@ bool xb_test_bit(const struct xb *xb, unsigned long bit) } EXPORT_SYMBOL(xb_test_bit); +/** + * xb_find_set - find the next set bit in a range of bits + * @xb: the xbitmap to search from + * @offset: the offset in the range to start searching + * @size: the size of the range + * + * Returns: the found bit or, @size if no set bit is found. + */ +unsigned long xb_find_set(struct xb *xb, unsigned long size, + unsigned long offset) +{ + struct radix_tree_root *root = &xb->xbrt; + struct radix_tree_node *node; + void __rcu **slot; + struct ida_bitmap *bitmap; + unsigned long index = offset / IDA_BITMAP_BITS; + unsigned long index_end = size / IDA_BITMAP_BITS; + unsigned long bit = offset % IDA_BITMAP_BITS; + + if (unlikely(offset >= size)) + return size; + + while (index <= index_end) { + unsigned long ret; + unsigned int nbits = size - index * IDA_BITMAP_BITS; + + bitmap = __radix_tree_lookup(root, index, &node, &slot); + + if (!node && !bitmap) + return size; + + if (bitmap) { + if (nbits > IDA_BITMAP_BITS) + nbits = IDA_BITMAP_BITS; + + ret = find_next_bit(bitmap->bitmap, nbits, bit); + if (ret != nbits) + return ret + index * IDA_BITMAP_BITS; + } + bit = 0; + index++; + } + + return size; +} +EXPORT_SYMBOL(xb_find_set); + +/** + * xb_find_zero - find the next zero bit in a range of bits + * @xb: the xbitmap to search from + * @offset: the offset in the range to start searching + * @size: the size of the range + * + * Returns: the found bit or, @size if no zero bit is found. + */ +unsigned long xb_find_zero(struct xb *xb, unsigned long size, + unsigned long offset) +{ + struct radix_tree_root *root = &xb->xbrt; + struct radix_tree_node *node; + void __rcu **slot; + struct ida_bitmap *bitmap; + unsigned long index = offset / IDA_BITMAP_BITS; + unsigned long index_end = size / IDA_BITMAP_BITS; + unsigned long bit = offset % IDA_BITMAP_BITS; + + if (unlikely(offset >= size)) + return size; + + while (index <= index_end) { + unsigned long ret; + unsigned int nbits = size - index * IDA_BITMAP_BITS; + + bitmap = __radix_tree_lookup(root, index, &node, &slot); + if (bitmap) { + if (nbits > IDA_BITMAP_BITS) + nbits = IDA_BITMAP_BITS; + + ret = find_next_zero_bit(bitmap->bitmap, nbits, bit); + if (ret != nbits) + return ret + index * IDA_BITMAP_BITS; + } else { + return bit + index * IDA_BITMAP_BITS; + } + bit = 0; + index++; + } + + return size; +} +EXPORT_SYMBOL(xb_find_zero); + #ifndef __KERNEL__ static DEFINE_XB(xb1); @@ -111,6 +256,64 @@ void xbitmap_check_bit(unsigned long bit) xb_preload_end(); } +static void xbitmap_check_bit_range(void) +{ + /* Regular test1: node = NULL */ + xb_preload(GFP_KERNEL); + xb_set_bit(&xb1, 700); + xb_preload_end(); + assert(xb_find_set(&xb1, ULONG_MAX, 0) == 700); + assert(xb_find_set(&xb1, ULONG_MAX, 800) == ULONG_MAX); + xb_clear_bit_range(&xb1, 0, 1024); + + /* + * Regular test 2 + * set bit 2000, 2001, 2040 + * Next 1 in [0, 2048) --> 2000 + * Next 1 in [2000, 2002) --> 2000 + * Next 1 in [2002, 2041) --> 2040 + * Next 1 in [2002, 2040) --> none + * Next 0 in [2000, 2048) --> 2002 + * Next 0 in [2048, 2060) --> 2048 + */ + xb_preload(GFP_KERNEL); + assert(!xb_set_bit(&xb1, 2000)); + assert(!xb_set_bit(&xb1, 2001)); + assert(!xb_set_bit(&xb1, 2040)); + assert(xb_find_set(&xb1, 2048, 0) == 2000); + assert(xb_find_set(&xb1, 2002, 2000) == 2000); + assert(xb_find_set(&xb1, 2041, 2002) == 2040); + assert(xb_find_set(&xb1, 2040, 2002) == 2040); + assert(xb_find_zero(&xb1, 2048, 2000) == 2002); + assert(xb_find_zero(&xb1, 2060, 2048) == 2048); + xb_clear_bit_range(&xb1, 0, 2048); + assert(xb_find_set(&xb1, 2048, 0) == 2048); + xb_preload_end(); + + /* + * Overflow tests: + * Set bit 1 and ULONG_MAX - 4 + * Next 1 in [ULONG_MAX - 4, ULONG_MAX) --> ULONG_MAX - 4 + * Next 1 [ULONG_MAX - 3, ULONG_MAX + 4) --> none + * Next 0 [ULONG_MAX - 4, ULONG_MAX + 4) --> none + */ + xb_preload(GFP_KERNEL); + assert(!xb_set_bit(&xb1, 1)); + xb_preload_end(); + xb_preload(GFP_KERNEL); + assert(!xb_set_bit(&xb1, ULONG_MAX - 4)); + assert(xb_find_set(&xb1, ULONG_MAX, ULONG_MAX - 4) == ULONG_MAX - 4); + assert(xb_find_set(&xb1, ULONG_MAX + 4, ULONG_MAX - 3) =+ ULONG_MAX + 4); + assert(xb_find_zero(&xb1, ULONG_MAX + 4, ULONG_MAX - 4) =+ ULONG_MAX + 4); + xb_clear_bit_range(&xb1, ULONG_MAX - 4, 4); + assert(xb_find_set(&xb1, ULONG_MAX, ULONG_MAX - 10) == ULONG_MAX); + xb_clear_bit_range(&xb1, 0, 2); + assert(xb_find_set(&xb1, 2, 0) == 2); + xb_preload_end(); +} + void xbitmap_checks(void) { xb_init(&xb1); @@ -122,6 +325,8 @@ void xbitmap_checks(void) xbitmap_check_bit(1025); xbitmap_check_bit((1UL << 63) | (1UL << 24)); xbitmap_check_bit((1UL << 63) | (1UL << 24) | 70); + + xbitmap_check_bit_range(); } int __weak main(void) diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index ca16027..6b559ee 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -37,6 +37,40 @@ static inline void bitmap_zero(unsigned long *dst, int nbits) } } +static inline void __bitmap_clear(unsigned long *map, unsigned int start, + int len) +{ + unsigned long *p = map + BIT_WORD(start); + const unsigned int size = start + len; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + + while (len - bits_to_clear >= 0) { + *p &= ~mask_to_clear; + len -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = ~0UL; + p++; + } + if (len) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + *p &= ~mask_to_clear; + } +} + +static inline __always_inline void bitmap_clear(unsigned long *map, + unsigned int start, + unsigned int nbits) +{ + if (__builtin_constant_p(nbits) && nbits == 1) + __clear_bit(start, map); + else if (__builtin_constant_p(start) && IS_ALIGNED(start, 8) && + __builtin_constant_p(nbits) && IS_ALIGNED(nbits, 8)) + memset((char *)map + start / 8, 0, nbits / 8); + else + __bitmap_clear(map, start, nbits); +} + static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) { unsigned int nlongs = BITS_TO_LONGS(nbits); diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 0ad8844..3c992ae 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -13,6 +13,8 @@ #define UINT_MAX (~0U) #endif +#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) + #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1) -- 2.7.4
First of all, the test-suite doesn't build, so I don't know whether you ran it or not. Then I added the xb_find_set() call below, and it fails the assert, so you should probably fix that. diff --git a/lib/xbitmap.c b/lib/xbitmap.c index f03a0f9f9e29..b29af08a7597 100644 --- a/lib/xbitmap.c +++ b/lib/xbitmap.c @@ -249,11 +249,12 @@ void xbitmap_check_bit(unsigned long bit) assert(!xb_test_bit(&xb1, bit)); assert(xb_set_bit(&xb1, bit) == 0); assert(xb_test_bit(&xb1, bit)); - assert(xb_clear_bit(&xb1, bit) == 0); + xb_clear_bit(&xb1, bit); assert(xb_empty(&xb1)); - assert(xb_clear_bit(&xb1, bit) == 0); + xb_clear_bit(&xb1, bit); assert(xb_empty(&xb1)); xb_preload_end(); + assert(xb_find_set(&xb1, ULONG_MAX, 0) == bit); } static void xbitmap_check_bit_range(void) diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 34ece7883629..adf36e34dd77 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -1,9 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address -LDFLAGS += -fsanitize=address -LDLIBS+= -lpthread -lurcu -TARGETS = main idr-test multiorder +LDFLAGS += -fsanitize=address $(LDLIBS) +LDLIBS := -lpthread -lurcu +TARGETS = main idr-test multiorder xbitmap CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \ tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o \ On Thu, Dec 21, 2017 at 10:30:06AM +0800, Wei Wang wrote:> v20 RESEND Changes: > - fixed the !node path > - added the test cases for the !node path > - change __builtin_constant_p(start & 7) to __builtin_constant_p(start)Why would you do such a thing? Just copy the kernel definitions.> diff --git a/include/linux/xbitmap.h b/include/linux/xbitmap.h > index 108f929..ede1029 100644 > --- a/include/linux/xbitmap.h > +++ b/include/linux/xbitmap.h > @@ -35,6 +35,12 @@ static inline void xb_init(struct xb *xb) > int xb_set_bit(struct xb *xb, unsigned long bit); > bool xb_test_bit(const struct xb *xb, unsigned long bit); > void xb_clear_bit(struct xb *xb, unsigned long bit); > +void xb_clear_bit_range(struct xb *xb, unsigned long start, > + unsigned long nbits);This is xb_zero(). I thought we talked about this before?> +unsigned long xb_find_set(struct xb *xb, unsigned long size, > + unsigned long offset); > +unsigned long xb_find_zero(struct xb *xb, unsigned long size, > + unsigned long offset);Since you're using xb_find_zero(), I think we need the tags from the IDR. At that point, I'm not sure there's a point in keeping the xbitmap and the IDR as separate data structures.> +/** > + * xb_find_set - find the next set bit in a range of bits > + * @xb: the xbitmap to search from > + * @offset: the offset in the range to start searching > + * @size: the size of the range > + * > + * Returns: the found bit or, @size if no set bit is found. > + */ > +unsigned long xb_find_set(struct xb *xb, unsigned long size, > + unsigned long offset) > +{ > + struct radix_tree_root *root = &xb->xbrt; > + struct radix_tree_node *node; > + void __rcu **slot; > + struct ida_bitmap *bitmap; > + unsigned long index = offset / IDA_BITMAP_BITS; > + unsigned long index_end = size / IDA_BITMAP_BITS; > + unsigned long bit = offset % IDA_BITMAP_BITS; > + > + if (unlikely(offset >= size)) > + return size; > + > + while (index <= index_end) { > + unsigned long ret; > + unsigned int nbits = size - index * IDA_BITMAP_BITS; > + > + bitmap = __radix_tree_lookup(root, index, &node, &slot); > + > + if (!node && !bitmap) > + return size; > + > + if (bitmap) { > + if (nbits > IDA_BITMAP_BITS) > + nbits = IDA_BITMAP_BITS; > + > + ret = find_next_bit(bitmap->bitmap, nbits, bit); > + if (ret != nbits) > + return ret + index * IDA_BITMAP_BITS; > + } > + bit = 0; > + index++; > + } > + > + return size; > +} > +EXPORT_SYMBOL(xb_find_set);This is going to be slower than the implementation I sent yesterday. If I call: xb_init(xb); xb_set_bit(xb, ULONG_MAX); xb_find_set(xb, ULONG_MAX, 0); it's going to call __radix_tree_lookup() 16 quadrillion times. My implementation will walk the tree precisely once.
OK, here's a rewrite of xbitmap. Compared to the version you sent: - xb_find_set() is the rewrite I sent out yesterday. - xb_find_clear() is a new implementation. I use the IDR_FREE tag to find clear bits. This led to me finding a bug in radix_tree_for_each_tagged(). - xb_zero() is also a new implementation (replacing xb_clear_bit_range). It should also be more efficient in deep trees. - Did not accept your change to xb_set_bit(); I think it's important to leave the radix tree node in place, so we're guaranteed to make progress in low-memory situations. We're expecting the caller to retry, so the memory should not leak. - Redid a lot of the kernel-doc. Thanks for adding it! I removed mentions of implementation details, leaving only information useful to those who are using it. Compared to the version I put out back in February: - Accepted your change to xb_preload(); I think it's an improvement. - Rewrote xb_set_bit() to use the radix_tree_iter_ family of functions. Should improve performance for deep trees. - Rewrote xb_clear_bit() for the same reason. - Left out the use of radix tree exceptional entries. Thanks for taking them out! Keeps it clearer for now; if they prove useful, we can put them back in. - Removed the use of __radix_tree_delete and __radix_tree_create, so drop the changes to those functions. Other miscellaneous notes - I left xb_fill() in the header file, even though there's no implementation yet. Wouldn't be hard to add once we have a user. - Used SPDX tags instead of a license notice. I think we need more test cases ... in reviewing this to send out, I found a bug in xb_zero(), which I've only fixed because I don't have time to write a test case for it. diff --git a/include/linux/xbitmap.h b/include/linux/xbitmap.h new file mode 100644 index 000000000000..c008309a9494 --- /dev/null +++ b/include/linux/xbitmap.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * eXtensible Bitmaps + * Copyright (c) 2017 Microsoft Corporation + * Author: Matthew Wilcox <mawilcox at microsoft.com> + * + * eXtensible Bitmaps provide an unlimited-size sparse bitmap facility. + * All bits are initially zero. + * + * Locking is to be provided by the user. No xb_ function is safe to + * call concurrently with any other xb_ function. + */ + +#include <linux/idr.h> + +struct xb { + struct radix_tree_root xbrt; +}; + +#define XB_INIT { \ + .xbrt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT), \ +} +#define DEFINE_XB(name) struct xb name = XB_INIT + +static inline void xb_init(struct xb *xb) +{ + INIT_RADIX_TREE(&xb->xbrt, IDR_RT_MARKER | GFP_NOWAIT); +} + +int xb_set_bit(struct xb *xb, unsigned long bit); +bool xb_test_bit(const struct xb *xb, unsigned long bit); +void xb_clear_bit(struct xb *xb, unsigned long bit); +void xb_zero(struct xb *xb, unsigned long min, unsigned long max); +void xb_fill(struct xb *xb, unsigned long min, unsigned long max); +bool xb_find_set(const struct xb *xb, unsigned long max, unsigned long *bit); +bool xb_find_zero(const struct xb *xb, unsigned long max, unsigned long *bit); + +static inline bool xb_empty(const struct xb *xb) +{ + return radix_tree_empty(&xb->xbrt); +} + +int __must_check xb_preload(gfp_t); + +static inline void xb_preload_end(void) +{ + preempt_enable(); +} diff --git a/lib/Makefile b/lib/Makefile index d11c48ec8ffd..08a8183c390b 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -19,7 +19,7 @@ KCOV_INSTRUMENT_dynamic_debug.o := n lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ - idr.o int_sqrt.o extable.o \ + idr.o xbitmap.o int_sqrt.o extable.o \ sha1.o chacha20.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ diff --git a/lib/radix-tree.c b/lib/radix-tree.c index c8d55565fafa..d2bd8feb7b85 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -37,7 +37,7 @@ #include <linux/rcupdate.h> #include <linux/slab.h> #include <linux/string.h> - +#include <linux/xbitmap.h> /* Number of nodes in fully populated tree of given height */ static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly; @@ -77,6 +77,11 @@ static struct kmem_cache *radix_tree_node_cachep; RADIX_TREE_MAP_SHIFT)) #define IDA_PRELOAD_SIZE (IDA_MAX_PATH * 2 - 1) +#define XB_INDEX_BITS (BITS_PER_LONG - ilog2(IDA_BITMAP_BITS)) +#define XB_MAX_PATH (DIV_ROUND_UP(XB_INDEX_BITS, \ + RADIX_TREE_MAP_SHIFT)) +#define XB_PRELOAD_SIZE (XB_MAX_PATH * 2 - 1) + /* * Per-cpu pool of preloaded nodes */ @@ -1781,7 +1786,7 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, child = rcu_dereference_raw(node->slots[offset]); } - if (!child) + if (!child && !is_idr(root)) goto restart; if (child == RADIX_TREE_RETRY) break; @@ -2135,6 +2140,35 @@ int ida_pre_get(struct ida *ida, gfp_t gfp) } EXPORT_SYMBOL(ida_pre_get); +/** + * xb_preload - preload for xb_set_bit() + * @gfp_mask: allocation mask to use for preloading + * + * Preallocate memory to use for the next call to xb_set_bit(). On success, + * return zero, with preemption disabled. On error, return -ENOMEM with + * preemption not disabled. + */ +int xb_preload(gfp_t gfp) +{ + if (!this_cpu_read(ida_bitmap)) { + struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp); + + if (!bitmap) + return -ENOMEM; + /* + * The per-CPU variable is updated with preemption enabled. + * If the calling task is unlucky to be scheduled to another + * CPU which has no ida_bitmap allocation, it will be detected + * when setting a bit (i.e. xb_set_bit()). + */ + bitmap = this_cpu_cmpxchg(ida_bitmap, NULL, bitmap); + kfree(bitmap); + } + + return __radix_tree_preload(gfp, XB_PRELOAD_SIZE); +} +EXPORT_SYMBOL(xb_preload); + void __rcu **idr_get_free_cmn(struct radix_tree_root *root, struct radix_tree_iter *iter, gfp_t gfp, unsigned long max) diff --git a/lib/xbitmap.c b/lib/xbitmap.c new file mode 100644 index 000000000000..d596ba247b45 --- /dev/null +++ b/lib/xbitmap.c @@ -0,0 +1,396 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * XBitmap implementation + * Copyright (c) 2017 Microsoft Corporation + * Author: Matthew Wilcox <mawilcox at microsoft.com> + */ + +#include <linux/bitmap.h> +#include <linux/export.h> +#include <linux/slab.h> +#include <linux/xbitmap.h> + +/** + * xb_set_bit() - Set a bit in the XBitmap. + * @xb: The XBitmap. + * @bit: Index of the bit to set. + * + * This function is used to set a bit in the xbitmap. + * + * Return: 0 on success. -ENOMEM if memory could not be allocated. + */ +int xb_set_bit(struct xb *xb, unsigned long bit) +{ + unsigned long index = bit / IDA_BITMAP_BITS; + struct radix_tree_root *root = &xb->xbrt; + struct radix_tree_iter iter; + void __rcu **slot; + struct ida_bitmap *bitmap; + + bit %= IDA_BITMAP_BITS; + radix_tree_iter_init(&iter, index); + slot = idr_get_free_cmn(root, &iter, GFP_NOWAIT | __GFP_NOWARN, index); + if (IS_ERR(slot)) { + if (slot == ERR_PTR(-ENOSPC)) + return 0; /* Already set */ + return -ENOMEM; + } + bitmap = rcu_dereference_raw(*slot); + if (!bitmap) { + bitmap = this_cpu_xchg(ida_bitmap, NULL); + if (!bitmap) + return -ENOMEM; + memset(bitmap, 0, sizeof(*bitmap)); + radix_tree_iter_replace(root, &iter, slot, bitmap); + } + + __set_bit(bit, bitmap->bitmap); + if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS)) + radix_tree_iter_tag_clear(root, &iter, IDR_FREE); + return 0; +} +EXPORT_SYMBOL(xb_set_bit); + +/** + * xb_clear_bit() - Clear a bit in the XBitmap. + * @xb: The XBitmap. + * @bit: Index of the bit to clear. + * + * This function is used to clear a bit in the xbitmap. + */ +void xb_clear_bit(struct xb *xb, unsigned long bit) +{ + unsigned long index = bit / IDA_BITMAP_BITS; + struct radix_tree_root *root = &xb->xbrt; + struct radix_tree_iter iter; + void __rcu **slot; + struct ida_bitmap *bitmap; + + bit %= IDA_BITMAP_BITS; + slot = radix_tree_iter_lookup(root, &iter, index); + if (!slot) + return; + bitmap = radix_tree_deref_slot(slot); + if (!bitmap) + return; + + radix_tree_iter_tag_set(root, &iter, IDR_FREE); + __clear_bit(bit, bitmap->bitmap); + if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) { + kfree(bitmap); + radix_tree_iter_delete(root, &iter, slot); + } +} +EXPORT_SYMBOL(xb_clear_bit); + +/** + * xb_zero() - Clear a range of bits in the XBitmap. + * @xb: The XBitmap. + * @min: The first bit to clear. + * @max: The last bit to clear. + * + * This function is used to clear a range of bits in the xbitmap. + */ +void xb_zero(struct xb *xb, unsigned long min, unsigned long max) +{ + struct radix_tree_root *root = &xb->xbrt; + struct radix_tree_iter iter; + void __rcu **slot; + struct ida_bitmap *bitmap; + unsigned long index = min / IDA_BITMAP_BITS; + unsigned long first = min % IDA_BITMAP_BITS; + unsigned long maxindex = max / IDA_BITMAP_BITS; + + radix_tree_for_each_slot(slot, root, &iter, index) { + unsigned long nbits = IDA_BITMAP_BITS; + if (index > maxindex) + break; + bitmap = radix_tree_deref_slot(slot); + if (!bitmap) + continue; + radix_tree_iter_tag_set(root, &iter, IDR_FREE); + + if (!first && iter.index < maxindex) + goto delete; + if (iter.index == maxindex) + nbits = max % IDA_BITMAP_BITS + 1; + bitmap_clear(bitmap->bitmap, first, nbits); + first = 0; + if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) + goto delete; + continue; +delete: + kfree(bitmap); + radix_tree_iter_delete(root, &iter, slot); + } +} +EXPORT_SYMBOL(xb_zero); + +/** + * xb_test_bit() - Test a bit in the xbitmap. + * @xb: The XBitmap. + * @bit: Index of the bit to test. + * + * This function is used to test a bit in the xbitmap. + * + * Return: %true if the bit is set. + */ +bool xb_test_bit(const struct xb *xb, unsigned long bit) +{ + unsigned long index = bit / IDA_BITMAP_BITS; + struct ida_bitmap *bitmap = radix_tree_lookup(&xb->xbrt, index); + + bit %= IDA_BITMAP_BITS; + + if (!bitmap) + return false; + return test_bit(bit, bitmap->bitmap); +} +EXPORT_SYMBOL(xb_test_bit); + +/** + * xb_find_set() - Find the next set bit in a range of bits. + * @xb: The XBitmap. + * @max: The maximum position to search. + * @bit: The first bit to examine, and on exit, the found bit. + * + * On entry, @bit points to the index of the first bit to search. On exit, + * if this function returns %true, @bit will be updated to the index of the + * first found bit. It will not be updated if this function returns %false. + * + * Return: %true if a set bit was found. + */ +bool xb_find_set(const struct xb *xb, unsigned long max, unsigned long *bit) +{ + struct radix_tree_iter iter; + void __rcu **slot; + struct ida_bitmap *bitmap; + unsigned long index = *bit / IDA_BITMAP_BITS; + unsigned int first = *bit % IDA_BITMAP_BITS; + unsigned long maxindex = max / IDA_BITMAP_BITS; + + radix_tree_for_each_slot(slot, &xb->xbrt, &iter, index) { + if (iter.index > maxindex) + break; + bitmap = radix_tree_deref_slot(slot); + if (bitmap) { + unsigned int nbits = IDA_BITMAP_BITS; + if (iter.index == maxindex) + nbits = max % IDA_BITMAP_BITS + 1; + first = find_next_bit(bitmap->bitmap, nbits, first); + if (first != nbits) { + *bit = first + iter.index * IDA_BITMAP_BITS; + return true; + } + } + first = 0; + } + + return false; +} +EXPORT_SYMBOL(xb_find_set); + +/** + * xb_find_zero() - Find the next zero bit in a range of bits + * @xb: The XBitmap. + * @max: The maximum index to search. + * @bit: Pointer to an index. + * + * On entry, @bit points to the index of the first bit to search. On exit, + * if this function returns %true, @bit will be updated to the index of the + * first found bit. It will not be updated if this function returns %false. + * + * Return: %true if a clear bit was found. + */ +bool xb_find_zero(const struct xb *xb, unsigned long max, unsigned long *bit) +{ + void __rcu **slot; + struct radix_tree_iter iter; + struct ida_bitmap *bitmap; + unsigned long index = *bit / IDA_BITMAP_BITS; + unsigned long first = *bit % IDA_BITMAP_BITS; + unsigned long maxindex = max / IDA_BITMAP_BITS; + + radix_tree_for_each_tagged(slot, &xb->xbrt, &iter, index, IDR_FREE) { + unsigned int nbits = IDA_BITMAP_BITS; + if (iter.index > maxindex) + return false; + bitmap = radix_tree_deref_slot(slot); + if (!bitmap) + break; + if (iter.index == maxindex) + nbits = max % IDA_BITMAP_BITS + 1; + first = find_next_zero_bit(bitmap->bitmap, nbits, first); + if (first != nbits) + break; + first = 0; + } + + *bit = first + iter.index * IDA_BITMAP_BITS; + return true; +} +EXPORT_SYMBOL(xb_find_zero); + +#ifndef __KERNEL__ + +static DEFINE_XB(xb1); + +static void xbitmap_check_bit(unsigned long bit) +{ + unsigned long nbit = 0; + + xb_preload(GFP_KERNEL); + assert(!xb_test_bit(&xb1, bit)); + assert(xb_set_bit(&xb1, bit) == 0); + assert(xb_test_bit(&xb1, bit)); + assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true); + assert(nbit == bit); + assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true); + assert(nbit == bit); + nbit++; + assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false); + assert(nbit == bit + 1); + xb_clear_bit(&xb1, bit); + assert(xb_empty(&xb1)); + xb_clear_bit(&xb1, bit); + assert(xb_empty(&xb1)); + nbit = 0; + assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false); + assert(nbit == 0); + xb_preload_end(); +} + +static void xbitmap_check_bit_range(void) +{ + unsigned long nbit = 0; + + /* Regular test1: node = NULL */ + xb_preload(GFP_KERNEL); + xb_set_bit(&xb1, 700); + xb_preload_end(); + assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true); + assert(nbit == 700); + nbit++; + assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false); + assert(nbit == 701); + xb_zero(&xb1, 0, 1023); + + /* + * Regular test 2 + * set bit 2000, 2001, 2040 + * Next 1 in [0, 2048) --> 2000 + * Next 1 in [2000, 2002) --> 2000 + * Next 1 in [2002, 2041) --> 2040 + * Next 1 in [2002, 2040) --> none + * Next 0 in [2000, 2048) --> 2002 + * Next 0 in [2048, 2060) --> 2048 + */ + xb_preload(GFP_KERNEL); + assert(!xb_set_bit(&xb1, 2000)); + assert(!xb_set_bit(&xb1, 2001)); + assert(!xb_set_bit(&xb1, 2040)); + nbit = 0; + assert(xb_find_set(&xb1, 2048, &nbit) == true); + assert(nbit == 2000); + assert(xb_find_set(&xb1, 2002, &nbit) == true); + assert(nbit == 2000); + nbit = 2002; + assert(xb_find_set(&xb1, 2041, &nbit) == true); + assert(nbit == 2040); + nbit = 2002; + assert(xb_find_set(&xb1, 2040, &nbit) == true); + assert(nbit == 2040); + nbit = 2000; + assert(xb_find_zero(&xb1, 2048, &nbit) == true); + assert(nbit == 2002); + nbit = 2048; + assert(xb_find_zero(&xb1, 2060, &nbit) == true); + assert(nbit == 2048); + xb_zero(&xb1, 0, 2047); + nbit = 0; + assert(xb_find_set(&xb1, 2048, &nbit) == false); + assert(nbit == 0); + xb_preload_end(); + + /* + * Overflow tests: + * Set bit 1 and ULONG_MAX - 4 + * Next 1 in [ULONG_MAX - 4, ULONG_MAX) --> ULONG_MAX - 4 + * Next 1 [ULONG_MAX - 3, ULONG_MAX + 4) --> none + * Next 0 [ULONG_MAX - 4, ULONG_MAX + 4) --> none + */ + xb_preload(GFP_KERNEL); + assert(!xb_set_bit(&xb1, 1)); + xb_preload_end(); + xb_preload(GFP_KERNEL); + assert(!xb_set_bit(&xb1, ULONG_MAX - 4)); + nbit = ULONG_MAX - 4; + assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == true); + assert(nbit == ULONG_MAX - 4); + nbit++; + assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false); + assert(nbit == ULONG_MAX - 3); + nbit--; + assert(xb_find_zero(&xb1, ULONG_MAX, &nbit) == true); + assert(nbit == ULONG_MAX - 3); + xb_zero(&xb1, ULONG_MAX - 4, ULONG_MAX); + nbit = ULONG_MAX - 10; + assert(xb_find_set(&xb1, ULONG_MAX, &nbit) == false); + assert(nbit == ULONG_MAX - 10); + xb_zero(&xb1, 0, 1); + nbit = 0; + assert(xb_find_set(&xb1, 2, &nbit) == false); + assert(nbit == 0); + xb_preload_end(); +} + +/* Check that setting an already-full bitmap works */ +static void xbitmap_check_set(unsigned long base) +{ + unsigned long i; + + assert(xb_empty(&xb1)); + + for (i = 0; i < 64 * 1024; i++) { + xb_preload(GFP_KERNEL); + assert(xb_set_bit(&xb1, base + i) == 0); + xb_preload_end(); + } + for (i = 0; i < 64 * 1024; i++) + assert(xb_set_bit(&xb1, base + i) == 0); + + for (i = 0; i < 64 * 1024; i++) + xb_clear_bit(&xb1, base + i); + + assert(xb_empty(&xb1)); +} + +static void xbitmap_checks(void) +{ + xb_init(&xb1); + xbitmap_check_bit(0); + xbitmap_check_bit(30); + xbitmap_check_bit(31); + xbitmap_check_bit(62); + xbitmap_check_bit(63); + xbitmap_check_bit(64); + xbitmap_check_bit(700); + xbitmap_check_bit(1023); + xbitmap_check_bit(1024); + xbitmap_check_bit(1025); + xbitmap_check_bit((1UL << 63) | (1UL << 24)); + xbitmap_check_bit((1UL << 63) | (1UL << 24) | 70); + + xbitmap_check_bit_range(); + + xbitmap_check_set(0); + xbitmap_check_set(1024); + xbitmap_check_set(1024 * 64); +} + +int __weak main(void) +{ + radix_tree_init(); + xbitmap_checks(); +} +#endif diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index ca160270fdfa..6b559ee25def 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -37,6 +37,40 @@ static inline void bitmap_zero(unsigned long *dst, int nbits) } } +static inline void __bitmap_clear(unsigned long *map, unsigned int start, + int len) +{ + unsigned long *p = map + BIT_WORD(start); + const unsigned int size = start + len; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + + while (len - bits_to_clear >= 0) { + *p &= ~mask_to_clear; + len -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = ~0UL; + p++; + } + if (len) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + *p &= ~mask_to_clear; + } +} + +static inline __always_inline void bitmap_clear(unsigned long *map, + unsigned int start, + unsigned int nbits) +{ + if (__builtin_constant_p(nbits) && nbits == 1) + __clear_bit(start, map); + else if (__builtin_constant_p(start) && IS_ALIGNED(start, 8) && + __builtin_constant_p(nbits) && IS_ALIGNED(nbits, 8)) + memset((char *)map + start / 8, 0, nbits / 8); + else + __bitmap_clear(map, start, nbits); +} + static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) { unsigned int nlongs = BITS_TO_LONGS(nbits); diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 0ad884452c5c..3c992ae15440 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -13,6 +13,8 @@ #define UINT_MAX (~0U) #endif +#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) + #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1) diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index fa7ee369b3c9..adf36e34dd77 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -1,12 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address -LDFLAGS += -fsanitize=address -LDLIBS+= -lpthread -lurcu -TARGETS = main idr-test multiorder +LDFLAGS += -fsanitize=address $(LDLIBS) +LDLIBS := -lpthread -lurcu +TARGETS = main idr-test multiorder xbitmap CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \ - tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o + tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o \ + xbitmap.o ifndef SHIFT SHIFT=3 @@ -25,8 +26,11 @@ idr-test: idr-test.o $(CORE_OFILES) multiorder: multiorder.o $(CORE_OFILES) +xbitmap: xbitmap.o $(CORE_OFILES) + $(CC) $(CFLAGS) $(LDFLAGS) $^ -o xbitmap + clean: - $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h + $(RM) $(TARGETS) *.o radix-tree.c idr.c xbitmap.c generated/map-shift.h vpath %.c ../../lib @@ -34,6 +38,7 @@ $(OFILES): Makefile *.h */*.h generated/map-shift.h \ ../../include/linux/*.h \ ../../include/asm/*.h \ ../../../include/linux/radix-tree.h \ + ../../../include/linux/xbitmap.h \ ../../../include/linux/idr.h radix-tree.c: ../../../lib/radix-tree.c @@ -42,6 +47,9 @@ radix-tree.c: ../../../lib/radix-tree.c idr.c: ../../../lib/idr.c sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ +xbitmap.c: ../../../lib/xbitmap.c + sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ + .PHONY: mapshift mapshift: diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index c3bc3f364f68..426f32f28547 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -17,6 +17,4 @@ #define pr_debug printk #define pr_cont printk -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - #endif /* _KERNEL_H */ diff --git a/tools/testing/radix-tree/linux/xbitmap.h b/tools/testing/radix-tree/linux/xbitmap.h new file mode 100644 index 000000000000..61de214542e7 --- /dev/null +++ b/tools/testing/radix-tree/linux/xbitmap.h @@ -0,0 +1 @@ +#include "../../../../include/linux/xbitmap.h" diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index 257f3f8aacaa..d112363262ae 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -326,6 +326,10 @@ static void single_thread_tests(bool long_run) rcu_barrier(); printv(2, "after idr_checks: %d allocated, preempt %d\n", nr_allocated, preempt_count); + xbitmap_checks(); + rcu_barrier(); + printv(2, "after xbitmap_checks: %d allocated, preempt %d\n", + nr_allocated, preempt_count); big_gang_check(long_run); rcu_barrier(); printv(2, "after big_gang_check: %d allocated, preempt %d\n", diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index d9c031dbeb1a..8175d6b23b32 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -38,6 +38,7 @@ void benchmark(void); void idr_checks(void); void ida_checks(void); void ida_thread_tests(void); +void xbitmap_checks(void); struct item * item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);
On 12/21/2017 10:37 PM, Tetsuo Handa wrote:> Matthew Wilcox wrote: >>> +/** >>> + * xb_find_set - find the next set bit in a range of bits >>> + * @xb: the xbitmap to search from >>> + * @offset: the offset in the range to start searching >>> + * @size: the size of the range >>> + * >>> + * Returns: the found bit or, @size if no set bit is found. >>> + */ >>> +unsigned long xb_find_set(struct xb *xb, unsigned long size, >>> + unsigned long offset) >>> +{ >>> + struct radix_tree_root *root = &xb->xbrt; >>> + struct radix_tree_node *node; >>> + void __rcu **slot; >>> + struct ida_bitmap *bitmap; >>> + unsigned long index = offset / IDA_BITMAP_BITS; >>> + unsigned long index_end = size / IDA_BITMAP_BITS; >>> + unsigned long bit = offset % IDA_BITMAP_BITS; >>> + >>> + if (unlikely(offset >= size)) >>> + return size; >>> + >>> + while (index <= index_end) { >>> + unsigned long ret; >>> + unsigned int nbits = size - index * IDA_BITMAP_BITS; >>> + >>> + bitmap = __radix_tree_lookup(root, index, &node, &slot); >>> + >>> + if (!node && !bitmap) >>> + return size; >>> + >>> + if (bitmap) { >>> + if (nbits > IDA_BITMAP_BITS) >>> + nbits = IDA_BITMAP_BITS; >>> + >>> + ret = find_next_bit(bitmap->bitmap, nbits, bit); >>> + if (ret != nbits) >>> + return ret + index * IDA_BITMAP_BITS; >>> + } >>> + bit = 0; >>> + index++; >>> + } >>> + >>> + return size; >>> +} >>> +EXPORT_SYMBOL(xb_find_set); >> This is going to be slower than the implementation I sent yesterday. If I >> call: >> xb_init(xb); >> xb_set_bit(xb, ULONG_MAX); >> xb_find_set(xb, ULONG_MAX, 0); >> >> it's going to call __radix_tree_lookup() 16 quadrillion times. >> My implementation will walk the tree precisely once. >> > Yes. Wei's patch still can not work. > We should start reviewing Matthew's implementation.It runs without any issue on my machine. I didn't generate an "xbitmap" executable (I just found adding xbitmap executable causes a build error due to a Makefile error), instead, I tested it within "main" and it passed all the tests. Matthew has implemented a new version, let's start from there. Best, Wei
On 12/22/2017 05:03 AM, Matthew Wilcox wrote:> OK, here's a rewrite of xbitmap. > > Compared to the version you sent: > - xb_find_set() is the rewrite I sent out yesterday. > - xb_find_clear() is a new implementation. I use the IDR_FREE tag to find > clear bits. This led to me finding a bug in radix_tree_for_each_tagged(). > - xb_zero() is also a new implementation (replacing xb_clear_bit_range). > It should also be more efficient in deep trees. > - Did not accept your change to xb_set_bit(); I think it's important to > leave the radix tree node in place, so we're guaranteed to make progress > in low-memory situations. We're expecting the caller to retry, so the > memory should not leak. > - Redid a lot of the kernel-doc. Thanks for adding it! I removed mentions > of implementation details, leaving only information useful to those who > are using it. > > Compared to the version I put out back in February: > - Accepted your change to xb_preload(); I think it's an improvement. > - Rewrote xb_set_bit() to use the radix_tree_iter_ family of functions. > Should improve performance for deep trees. > - Rewrote xb_clear_bit() for the same reason. > - Left out the use of radix tree exceptional entries. Thanks for taking > them out! Keeps it clearer for now; if they prove useful, we can put > them back in. > - Removed the use of __radix_tree_delete and __radix_tree_create, so drop > the changes to those functions. > > Other miscellaneous notes > - I left xb_fill() in the header file, even though there's no implementation > yet. Wouldn't be hard to add once we have a user. > - Used SPDX tags instead of a license notice. > > I think we need more test cases ... in reviewing this to send out, > I found a bug in xb_zero(), which I've only fixed because I don't have > time to write a test case for it.Thanks for the improvement. I also found a small bug in xb_zero. With the following changes, it has passed the current test cases and tested with the virtio-balloon usage without any issue.> + > +/** > + * xb_zero() - Clear a range of bits in the XBitmap. > + * @xb: The XBitmap. > + * @min: The first bit to clear. > + * @max: The last bit to clear. > + * > + * This function is used to clear a range of bits in the xbitmap. > + */ > +void xb_zero(struct xb *xb, unsigned long min, unsigned long max) > +{ > + struct radix_tree_root *root = &xb->xbrt; > + struct radix_tree_iter iter; > + void __rcu **slot; > + struct ida_bitmap *bitmap; > + unsigned long index = min / IDA_BITMAP_BITS; > + unsigned long first = min % IDA_BITMAP_BITS; > + unsigned long maxindex = max / IDA_BITMAP_BITS; > + > + radix_tree_for_each_slot(slot, root, &iter, index) { > + unsigned long nbits = IDA_BITMAP_BITS; > + if (index > maxindex) > + break; > + bitmap = radix_tree_deref_slot(slot); > + if (!bitmap) > + continue; > + radix_tree_iter_tag_set(root, &iter, IDR_FREE); > + > + if (!first && iter.index < maxindex) > + goto delete; > + if (iter.index == maxindex) > + nbits = max % IDA_BITMAP_BITS + 1; > + bitmap_clear(bitmap->bitmap, first, nbits);It should be: bitmap_clear(.., first, nbits - first);> diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile > index fa7ee369b3c9..adf36e34dd77 100644 > --- a/tools/testing/radix-tree/Makefile > +++ b/tools/testing/radix-tree/Makefile > @@ -1,12 +1,13 @@ > # SPDX-License-Identifier: GPL-2.0 > > CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address > -LDFLAGS += -fsanitize=address > -LDLIBS+= -lpthread -lurcu > -TARGETS = main idr-test multiorder > +LDFLAGS += -fsanitize=address $(LDLIBS) > +LDLIBS := -lpthread -lurcu > +TARGETS = main idr-test multiorder xbitmap > CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o > OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \ > - tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o > + tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o \ > + xbitmap.o > > ifndef SHIFT > SHIFT=3 > @@ -25,8 +26,11 @@ idr-test: idr-test.o $(CORE_OFILES) > > multiorder: multiorder.o $(CORE_OFILES) > > +xbitmap: xbitmap.o $(CORE_OFILES) > + $(CC) $(CFLAGS) $(LDFLAGS) $^ -o xbitmap > +I think the "$(CC).." line should be removed, which will fix the build error when adding "xbitmap" to TARGET. Best, Wei
On Sat, Dec 23, 2017 at 11:59:54AM +0900, Tetsuo Handa wrote:> Matthew Wilcox wrote: > > + bit %= IDA_BITMAP_BITS; > > + radix_tree_iter_init(&iter, index); > > + slot = idr_get_free_cmn(root, &iter, GFP_NOWAIT | __GFP_NOWARN, index); > > + if (IS_ERR(slot)) { > > + if (slot == ERR_PTR(-ENOSPC)) > > + return 0; /* Already set */ > > Why already set? I guess something is there, but is it guaranteed that > there is a bitmap with the "bit" set?Yes. For radix trees tagged with IDR_RT_MARKER, newly created slots have the IDR_FREE tag set. We only clear the IDR_FREE tag once the bitmap is full. So if we try to find a free slot and the tag is clear, we know the bitmap is full.> > + bitmap = rcu_dereference_raw(*slot); > > + if (!bitmap) { > > + bitmap = this_cpu_xchg(ida_bitmap, NULL); > > + if (!bitmap) > > + return -ENOMEM; > > I can't understand this. I can understand if it were > > BUG_ON(!bitmap); > > because you called xb_preload(). > > But > > /* > * Regular test 2 > * set bit 2000, 2001, 2040 > * Next 1 in [0, 2048) --> 2000 > * Next 1 in [2000, 2002) --> 2000 > * Next 1 in [2002, 2041) --> 2040 > * Next 1 in [2002, 2040) --> none > * Next 0 in [2000, 2048) --> 2002 > * Next 0 in [2048, 2060) --> 2048 > */ > xb_preload(GFP_KERNEL); > assert(!xb_set_bit(&xb1, 2000)); > assert(!xb_set_bit(&xb1, 2001)); > assert(!xb_set_bit(&xb1, 2040));[...]> xb_preload_end(); > > you are not calling xb_preload() prior to each xb_set_bit() call. > This means that, if each xb_set_bit() is not surrounded with > xb_preload()/xb_preload_end(), there is possibility of hitting > this_cpu_xchg(ida_bitmap, NULL) == NULL.This is just a lazy test. We "know" that the bits in the range 1024-2047 will all land in the same bitmap, so there's no need to preload for each of them.> If bitmap == NULL at this_cpu_xchg(ida_bitmap, NULL) is allowed, > you can use kzalloc(sizeof(*bitmap), GFP_NOWAIT | __GFP_NOWARN) > and get rid of xb_preload()/xb_preload_end().No, we can't. GFP_NOWAIT | __GFP_NOWARN won't try very hard to allocate memory. There's no reason to fail the call if the user is in a context where they can try harder to free memory.> You are using idr_get_free_cmn(GFP_NOWAIT | __GFP_NOWARN), which > means that the caller has to be prepared for allocation failure > when calling xb_set_bit(). Thus, there is no need to use preload > in order to avoid failing to allocate "bitmap".xb_preload also preloads radix tree nodes.> Also, please clarify why it is OK to just return here. > I don't know what > > radix_tree_iter_replace(root, &iter, slot, bitmap); > > is doing. If you created a slot but did not assign "bitmap", > what the caller of xb_test_bit() etc. will find? If there is an > assumption about this slot, won't this cause a problem?xb_test_bit will find NULL if bitmap wasn't assigned. That doesn't harm anything.
On Sat, Dec 23, 2017 at 11:33:45PM +0900, Tetsuo Handa wrote:> Matthew Wilcox wrote: > > On Sat, Dec 23, 2017 at 11:59:54AM +0900, Tetsuo Handa wrote: > > > Matthew Wilcox wrote: > > > > + bit %= IDA_BITMAP_BITS; > > > > + radix_tree_iter_init(&iter, index); > > > > + slot = idr_get_free_cmn(root, &iter, GFP_NOWAIT | __GFP_NOWARN, index); > > > > + if (IS_ERR(slot)) { > > > > + if (slot == ERR_PTR(-ENOSPC)) > > > > + return 0; /* Already set */ > > > > > > Why already set? I guess something is there, but is it guaranteed that > > > there is a bitmap with the "bit" set? > > > > Yes. For radix trees tagged with IDR_RT_MARKER, newly created slots > > have the IDR_FREE tag set. We only clear the IDR_FREE tag once the > > bitmap is full. So if we try to find a free slot and the tag is clear, > > we know the bitmap is full. > > > > OK. But does using IDR_FREE tag have more benefit than cost? > You are doing > > if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS)) > radix_tree_iter_tag_clear(root, &iter, IDR_FREE); > > for each xb_set_bit() call. How likely do we hit ERR_PTR(-ENOSPC) path? > Isn't removing both bitmap_full() and ERR_PTR(-ENOSPC) better?You're assuming that the purpose of using IDR_FREE is to save xb_set_bit from walking the tree unnecessarily. It isn't; that's just a happy side-effect. Its main purpose is to make xb_find_zero() efficient. If we have large ranges of set bits, xb_find_zero() will be able to skip them.> > This is just a lazy test. We "know" that the bits in the range 1024-2047 > > will all land in the same bitmap, so there's no need to preload for each > > of them. > > Testcases also serves as how to use that API. > Assuming such thing leads to incorrect usage.Sure. Would you like to submit a patch?> > > If bitmap == NULL at this_cpu_xchg(ida_bitmap, NULL) is allowed, > > > you can use kzalloc(sizeof(*bitmap), GFP_NOWAIT | __GFP_NOWARN) > > > and get rid of xb_preload()/xb_preload_end(). > > > > No, we can't. GFP_NOWAIT | __GFP_NOWARN won't try very hard to allocate > > memory. There's no reason to fail the call if the user is in a context > > where they can try harder to free memory. > > But there is no reason to use GFP_NOWAIT at idr_get_free_cmn() if it is > safe to use GFP_KERNEL. If we don't require xb_preload() which forces > idr_get_free_cmn() to use GFP_NOWAIT due to possibility of preemption > disabled by xb_preload(), we can allow passing gfp flags to xb_set_bit().The assumption is that the user has done: xb_preload(GFP_KERNEL); spin_lock(my_lock); xb_set_bit(xb, bit); spin_unlock(my_lock); xb_preload_end(); This is not the world's greatest interface. Once I have the XArray finished, we'll be able to ditch the external spinlock and the preload interface and be able to call: xb_set_bit(xb, bit, GFP_KERNEL);> > xb_preload also preloads radix tree nodes. > > But it after all forces idr_get_free_cmn() to use GFP_NOWAIT, doesn't it?I think you don't understand how the radix tree allocates nodes. preloading means that it will be able to access the nodes which were allocated earlier.> Speak of initial user (i.e. virtio-balloon), xb_preload() won't be able to > use GFP_KERNEL in order to avoid OOM lockup. Therefore, I don't see > advantages with using xb_preload(). If xb_set_bit() receives gfp flags, > the caller can pass GFP_KERNEL if it is safe to use GFP_KERNEL.I haven't reviewed how virtio-balloon is using the interfaces.
On 12/23/2017 10:33 PM, Tetsuo Handa wrote:>>>> + bitmap = rcu_dereference_raw(*slot); >>>> + if (!bitmap) { >>>> + bitmap = this_cpu_xchg(ida_bitmap, NULL); >>>> + if (!bitmap) >>>> + return -ENOMEM; >>> I can't understand this. I can understand if it were >>> >>> BUG_ON(!bitmap); >>> >>> because you called xb_preload(). >>> >>> But >>> >>> /* >>> * Regular test 2 >>> * set bit 2000, 2001, 2040 >>> * Next 1 in [0, 2048) --> 2000 >>> * Next 1 in [2000, 2002) --> 2000 >>> * Next 1 in [2002, 2041) --> 2040 >>> * Next 1 in [2002, 2040) --> none >>> * Next 0 in [2000, 2048) --> 2002 >>> * Next 0 in [2048, 2060) --> 2048 >>> */ >>> xb_preload(GFP_KERNEL); >>> assert(!xb_set_bit(&xb1, 2000)); >>> assert(!xb_set_bit(&xb1, 2001)); >>> assert(!xb_set_bit(&xb1, 2040)); >> [...] >>> xb_preload_end(); >>> >>> you are not calling xb_preload() prior to each xb_set_bit() call. >>> This means that, if each xb_set_bit() is not surrounded with >>> xb_preload()/xb_preload_end(), there is possibility of hitting >>> this_cpu_xchg(ida_bitmap, NULL) == NULL. >> This is just a lazy test. We "know" that the bits in the range 1024-2047 >> will all land in the same bitmap, so there's no need to preload for each >> of them. > Testcases also serves as how to use that API. > Assuming such thing leads to incorrect usage.If callers are aware that the bits that they going to record locate in the same bitmap, I think they should also perform the xb_ APIs with only one preload. So the test cases here have shown them a correct example. We can probably add some comments above to explain this. Best, Wei