Richard W.M. Jones
2022-Jan-27 16:39 UTC
[Libguestfs] [PATCH nbdkit v2 0/4] common/allocators: Always align mlock buffer
Earlier patch was reviewed here: https://listman.redhat.com/archives/libguestfs/2022-January/msg00172.html For this series I pulled out two smaller changes into separate commits (patches 1 & 2). I then decided to modify the vector library to add a new <vector>_reserve_page_aligned function which always allocates page-aligned memory (patch 3). Currently this is using posix_memalign, maybe in future using mmap since there seems to be some dispute about whether calling mlock on anonymous memory is safe. This makes the change to common/allocators/malloc.c quite straightforward (patch 4), at the cost of making the change to the vector library rather headache-inducing. At least it is confined to a new function and doesn't affect any existing callers! Rich.
Richard W.M. Jones
2022-Jan-27 16:39 UTC
[Libguestfs] [PATCH nbdkit v2 1/4] common/allocators/malloc.c: Avoid calling munlock (NULL, ...)
I'm not certain if it is wrong to ever call munlock(2) with the first parameter NULL (second parameter 0). Linux permits it. POSIX doesn't say either way. In any case it's not necessary to do it, so add an extra check to avoid it here. --- common/allocators/malloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/allocators/malloc.c b/common/allocators/malloc.c index eea44432..9a2dd549 100644 --- a/common/allocators/malloc.c +++ b/common/allocators/malloc.c @@ -96,7 +96,7 @@ extend (struct m_alloc *ma, uint64_t new_size) /* Since the memory might be moved by realloc, we must unlock the * original array. */ - if (ma->use_mlock) + if (ma->use_mlock && ma->ba.ptr != NULL) munlock (ma->ba.ptr, ma->ba.cap); #endif -- 2.32.0
Richard W.M. Jones
2022-Jan-27 16:39 UTC
[Libguestfs] [PATCH nbdkit v2 2/4] tests: Use a larger buffer for mlock test
This test fails sometimes on machines with 64K page sizes ? see following commits ? and by using a larger buffer size we can make that a little bit more likely. I increased the buffer size to 10K so that on machines with a 4K page size we are deliberately allocating a partial page which should be rounded up by the implementation. --- tests/test-memory-allocator-malloc-mlock.sh | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/test-memory-allocator-malloc-mlock.sh b/tests/test-memory-allocator-malloc-mlock.sh index 4ef92db6..078cacab 100755 --- a/tests/test-memory-allocator-malloc-mlock.sh +++ b/tests/test-memory-allocator-malloc-mlock.sh @@ -46,9 +46,9 @@ if ! nbdkit memory --dump-plugin | grep -sq mlock=yes; then fi # ulimit -l is measured in kilobytes and so for this test must be at -# least 2 (kilobytes) and we actually check it's a bit larger to allow -# room for error. On Linux the default is usually 64. -requires test `ulimit -l` -gt 8 +# least 10 (kilobytes) and we actually check it's a bit larger to +# allow room for error. On Linux the default is usually 64. +requires test `ulimit -l` -gt 16 sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX) files="memory-allocator-malloc-mlock.pid $sock" @@ -57,17 +57,21 @@ cleanup_fn rm -f $files # Run nbdkit with memory plugin. start_nbdkit -P memory-allocator-malloc-mlock.pid -U $sock \ - memory 2048 allocator=malloc,mlock=true + memory 10240 allocator=malloc,mlock=true nbdsh --connect "nbd+unix://?socket=$sock" \ -c ' -# Write some stuff to the beginning, middle and end. +# Write some stuff. buf1 = b"1" * 512 h.pwrite(buf1, 0) buf2 = b"2" * 512 h.pwrite(buf2, 1024) buf3 = b"3" * 512 h.pwrite(buf3, 1536) +buf4 = b"4" * 512 +h.pwrite(buf4, 4096) +buf5 = b"5" * 1024 +h.pwrite(buf5, 10240-len(buf5)) # Read it back. buf11 = h.pread(len(buf1), 0) @@ -76,4 +80,8 @@ buf22 = h.pread(len(buf2), 1024) assert buf2 == buf22 buf33 = h.pread(len(buf3), 1536) assert buf3 == buf33 +buf44 = h.pread(len(buf4), 4096) +assert buf4 == buf44 +buf55 = h.pread(len(buf5), 10240-len(buf5)) +assert buf5 == buf55 ' -- 2.32.0
Richard W.M. Jones
2022-Jan-27 16:39 UTC
[Libguestfs] [PATCH nbdkit v2 3/4] common/utils: Add vector_reserve_page_aligned
Allow vectors to be page aligned (eg. for performance or page protection reasons). To create a page aligned vector you have to use <vector>_reserve_page_aligned instead of plain <vector>_reserve. Note that <vector>_duplicate creates unaligned duplicates. --- configure.ac | 4 ++- common/utils/vector.h | 12 +++++++ common/utils/vector.c | 82 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 94 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index c426aec8..1ab85e3e 100644 --- a/configure.ac +++ b/configure.ac @@ -384,7 +384,9 @@ AC_CHECK_FUNCS([\ pipe \ pipe2 \ ppoll \ - posix_fadvise]) + posix_fadvise \ + posix_memalign \ + valloc]) dnl Check for structs and members. AC_CHECK_MEMBERS([struct dirent.d_type], [], [], [[#include <dirent.h>]]) diff --git a/common/utils/vector.h b/common/utils/vector.h index 1d04f812..50dd895d 100644 --- a/common/utils/vector.h +++ b/common/utils/vector.h @@ -101,6 +101,15 @@ sizeof (type)); \ } \ \ + /* Same as _reserve, but the allocation will be page aligned. \ + */ \ + static inline int \ + name##_reserve_page_aligned (name *v, size_t n) \ + { \ + return generic_vector_reserve_page_aligned ((struct generic_vector *)v, \ + n, sizeof (type)); \ + } \ + \ /* Insert at i'th element. i=0 => beginning i=len => append */ \ static inline int \ name##_insert (name *v, type elem, size_t i) \ @@ -197,4 +206,7 @@ struct generic_vector { extern int generic_vector_reserve (struct generic_vector *v, size_t n, size_t itemsize); +extern int generic_vector_reserve_page_aligned (struct generic_vector *v, + size_t n, size_t itemsize); + #endif /* NBDKIT_VECTOR_H */ diff --git a/common/utils/vector.c b/common/utils/vector.c index 54e6b810..8f0c33b1 100644 --- a/common/utils/vector.c +++ b/common/utils/vector.c @@ -34,15 +34,17 @@ #include <stdio.h> #include <stdlib.h> +#include <unistd.h> #include <errno.h> +#include <assert.h> #include "checked-overflow.h" #include "vector.h" -int -generic_vector_reserve (struct generic_vector *v, size_t n, size_t itemsize) +static int +calculate_capacity (struct generic_vector *v, size_t n, size_t itemsize, + size_t *newcap_r, size_t *newbytes_r) { - void *newptr; size_t reqcap, reqbytes, newcap, newbytes, t; /* New capacity requested. We must allocate this minimum (or fail). @@ -71,6 +73,20 @@ generic_vector_reserve (struct generic_vector *v, size_t n, size_t itemsize) newbytes = reqbytes; } + *newcap_r = newcap; + *newbytes_r = newbytes; + return 0; +} + +int +generic_vector_reserve (struct generic_vector *v, size_t n, size_t itemsize) +{ + void *newptr; + size_t newcap, newbytes; + + if (calculate_capacity (v, n, itemsize, &newcap, &newbytes) == -1) + return -1; + newptr = realloc (v->ptr, newbytes); if (newptr == NULL) return -1; @@ -79,3 +95,63 @@ generic_vector_reserve (struct generic_vector *v, size_t n, size_t itemsize) v->cap = newcap; return 0; } + +/* Always allocates a buffer which is page aligned (both base and size). */ +int +generic_vector_reserve_page_aligned (struct generic_vector *v, + size_t n, size_t itemsize) +{ +#ifdef HAVE_POSIX_MEMALIGN + int r; +#endif + void *newptr; + size_t newcap, newbytes; + long pagesize; + size_t extra, extra_items; + + pagesize = sysconf (_SC_PAGE_SIZE); + assert (pagesize > 0); + + if (calculate_capacity (v, n, itemsize, &newcap, &newbytes) == -1) + return -1; + + /* If the new size (in bytes) is not a full page then we have to + * round up to the next page. Note we make the implicit assumption + * here that itemsize is divisible by pagesize, but I assume you + * know what you're doing if you're using this function. + */ + extra = newbytes & (pagesize - 1); + if (extra > 0) { + extra_items = (pagesize - extra) / itemsize; + + if (ADD_OVERFLOW (newcap, extra_items, &newcap) || + ADD_OVERFLOW (newbytes, extra_items * itemsize, &newbytes)) { + errno = ENOMEM; + return -1; + } + } + +#ifdef HAVE_POSIX_MEMALIGN + if ((r = posix_memalign (&newptr, pagesize, newbytes)) != 0) { + errno = r; + return -1; + } +#elif HAVE_VALLOC + newptr = valloc (newbytes); + if (newptr == NULL) + return -1; +#else +#error "this platform does not have posix_memalign or valloc" +#endif + + /* NB: reserve always makes the buffer larger so we don't have to + * deal with the case of a shrinking buffer here. Also reserve + * (like the underlying realloc) does not zero the reserved + * elements. So we're just copying over the old elements here. + */ + memcpy (newptr, v->ptr, v->len * itemsize); + free (v->ptr); + v->ptr = newptr; + v->cap = newcap; + return 0; +} -- 2.32.0
Richard W.M. Jones
2022-Jan-27 16:39 UTC
[Libguestfs] [PATCH nbdkit v2 4/4] common/allocators: Always align mlock buffer
On platforms with 64K page sizes (RHEL + aarch64 or ppc64le) we have hit a problem in test-memory-allocator-malloc-mlock.sh which appears to be caused by: 1. The test requests a 2K mlocked buffer. 2. The allocated buffer straddles a page boundary sometimes. 3. Linux expands the mlock request to two pages, ie. the requested size becomes 2 * 64K = 128K 4. The standard mlock limit on Linux is 64K so the mlock request is rejected and the test fails. Also POSIX permits the implementation to require mlock parameters to be page aligned (although Linux does not require this). This commit attempts to fix both problems by ensuring the mlock request is always page aligned, using the new vector_reserve_page_aligned function. Thanks: Laszlo Ersek --- common/allocators/malloc.c | 53 ++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/common/allocators/malloc.c b/common/allocators/malloc.c index 9a2dd549..19df666a 100644 --- a/common/allocators/malloc.c +++ b/common/allocators/malloc.c @@ -81,11 +81,32 @@ m_alloc_free (struct allocator *a) } } -/* Extend the underlying bytearray if needed. mlock if requested. */ +/* Extend the underlying bytearray if needed. */ static int -extend (struct m_alloc *ma, uint64_t new_size) +extend_without_mlock (struct m_alloc *ma, uint64_t new_size) +{ + size_t old_size, n; + + if (ma->ba.cap < new_size) { + old_size = ma->ba.cap; + n = new_size - ma->ba.cap; + + if (bytearray_reserve (&ma->ba, n) == -1) { + nbdkit_error ("realloc: %m"); + return -1; + } + + /* Initialize the newly allocated memory to 0. */ + memset (ma->ba.ptr + old_size, 0, n); + } + + return 0; +} + +#ifdef HAVE_MLOCK +static int +extend_with_mlock (struct m_alloc *ma, uint64_t new_size) { - ACQUIRE_WRLOCK_FOR_CURRENT_SCOPE (&ma->lock); size_t old_size, n; if (ma->ba.cap < new_size) { @@ -100,7 +121,7 @@ extend (struct m_alloc *ma, uint64_t new_size) munlock (ma->ba.ptr, ma->ba.cap); #endif - if (bytearray_reserve (&ma->ba, n) == -1) { + if (bytearray_reserve_page_aligned (&ma->ba, n) == -1) { nbdkit_error ("realloc: %m"); return -1; } @@ -108,18 +129,28 @@ extend (struct m_alloc *ma, uint64_t new_size) /* Initialize the newly allocated memory to 0. */ memset (ma->ba.ptr + old_size, 0, n); -#ifdef HAVE_MLOCK - if (ma->use_mlock) { - if (mlock (ma->ba.ptr, ma->ba.cap) == -1) { - nbdkit_error ("allocator=malloc: mlock: %m"); - return -1; - } + if (mlock (ma->ba.ptr, ma->ba.cap) == -1) { + nbdkit_error ("allocator=malloc: mlock: %m"); + return -1; } -#endif } return 0; } +#endif /* HAVE_MLOCK */ + +static int +extend (struct m_alloc *ma, uint64_t new_size) +{ + ACQUIRE_WRLOCK_FOR_CURRENT_SCOPE (&ma->lock); + +#ifdef HAVE_MLOCK + if (ma->use_mlock) + return extend_with_mlock (ma, new_size); +#endif + + return extend_without_mlock (ma, new_size); +} static int m_alloc_set_size_hint (struct allocator *a, uint64_t size_hint) -- 2.32.0