thr3ads.net - Libguestfs - [Libguestfs] [PATCH nbdkit v2 4/4] common/allocators: Always align mlock buffer [Jan 2022]

If this information is useful, please help other people find it:
Share via:

Richard W.M. Jones

2022-Jan-27 16:39 UTC

[Libguestfs] [PATCH nbdkit v2 0/4] common/allocators: Always align mlock buffer

Earlier patch was reviewed here:

https://listman.redhat.com/archives/libguestfs/2022-January/msg00172.html

For this series I pulled out two smaller changes into separate commits
(patches 1 & 2).

I then decided to modify the vector library to add a new
<vector>_reserve_page_aligned function which always allocates
page-aligned memory (patch 3).  Currently this is using
posix_memalign, maybe in future using mmap since there seems to be
some dispute about whether calling mlock on anonymous memory is safe.

This makes the change to common/allocators/malloc.c quite
straightforward (patch 4), at the cost of making the change to the
vector library rather headache-inducing.  At least it is confined to a
new function and doesn't affect any existing callers!

Rich.

Richard W.M. Jones

2022-Jan-27 16:39 UTC

head link

[Libguestfs] [PATCH nbdkit v2 1/4] common/allocators/malloc.c: Avoid calling munlock (NULL, ...)

I'm not certain if it is wrong to ever call munlock(2) with the first
parameter NULL (second parameter 0).  Linux permits it.  POSIX doesn't
say either way.  In any case it's not necessary to do it, so add an
extra check to avoid it here.
---
 common/allocators/malloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/allocators/malloc.c b/common/allocators/malloc.c
index eea44432..9a2dd549 100644
--- a/common/allocators/malloc.c
+++ b/common/allocators/malloc.c
@@ -96,7 +96,7 @@ extend (struct m_alloc *ma, uint64_t new_size)
     /* Since the memory might be moved by realloc, we must unlock the
      * original array.
      */
-    if (ma->use_mlock)
+    if (ma->use_mlock && ma->ba.ptr != NULL)
       munlock (ma->ba.ptr, ma->ba.cap);
 #endif
 
-- 
2.32.0

Richard W.M. Jones

2022-Jan-27 16:39 UTC

head link

[Libguestfs] [PATCH nbdkit v2 2/4] tests: Use a larger buffer for mlock test

This test fails sometimes on machines with 64K page sizes ? see
following commits ? and by using a larger buffer size we can make that
a little bit more likely.  I increased the buffer size to 10K so that
on machines with a 4K page size we are deliberately allocating a
partial page which should be rounded up by the implementation.
---
 tests/test-memory-allocator-malloc-mlock.sh | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/tests/test-memory-allocator-malloc-mlock.sh
b/tests/test-memory-allocator-malloc-mlock.sh
index 4ef92db6..078cacab 100755
--- a/tests/test-memory-allocator-malloc-mlock.sh
+++ b/tests/test-memory-allocator-malloc-mlock.sh
@@ -46,9 +46,9 @@ if ! nbdkit memory --dump-plugin | grep -sq mlock=yes; then
 fi
 
 # ulimit -l is measured in kilobytes and so for this test must be at
-# least 2 (kilobytes) and we actually check it's a bit larger to allow
-# room for error.  On Linux the default is usually 64.
-requires test `ulimit -l` -gt 8
+# least 10 (kilobytes) and we actually check it's a bit larger to
+# allow room for error.  On Linux the default is usually 64.
+requires test `ulimit -l` -gt 16
 
 sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
 files="memory-allocator-malloc-mlock.pid $sock"
@@ -57,17 +57,21 @@ cleanup_fn rm -f $files
 
 # Run nbdkit with memory plugin.
 start_nbdkit -P memory-allocator-malloc-mlock.pid -U $sock \
-             memory 2048 allocator=malloc,mlock=true
+             memory 10240 allocator=malloc,mlock=true
 
 nbdsh --connect "nbd+unix://?socket=$sock" \
       -c '
-# Write some stuff to the beginning, middle and end.
+# Write some stuff.
 buf1 = b"1" * 512
 h.pwrite(buf1, 0)
 buf2 = b"2" * 512
 h.pwrite(buf2, 1024)
 buf3 = b"3" * 512
 h.pwrite(buf3, 1536)
+buf4 = b"4" * 512
+h.pwrite(buf4, 4096)
+buf5 = b"5" * 1024
+h.pwrite(buf5, 10240-len(buf5))
 
 # Read it back.
 buf11 = h.pread(len(buf1), 0)
@@ -76,4 +80,8 @@ buf22 = h.pread(len(buf2), 1024)
 assert buf2 == buf22
 buf33 = h.pread(len(buf3), 1536)
 assert buf3 == buf33
+buf44 = h.pread(len(buf4), 4096)
+assert buf4 == buf44
+buf55 = h.pread(len(buf5), 10240-len(buf5))
+assert buf5 == buf55
 '
-- 
2.32.0

Richard W.M. Jones

2022-Jan-27 16:39 UTC

head link

[Libguestfs] [PATCH nbdkit v2 3/4] common/utils: Add vector_reserve_page_aligned

Allow vectors to be page aligned (eg. for performance or page
protection reasons).  To create a page aligned vector you have to use
<vector>_reserve_page_aligned instead of plain <vector>_reserve.

Note that <vector>_duplicate creates unaligned duplicates.
---
 configure.ac          |  4 ++-
 common/utils/vector.h | 12 +++++++
 common/utils/vector.c | 82 +++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/configure.ac b/configure.ac
index c426aec8..1ab85e3e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -384,7 +384,9 @@ AC_CHECK_FUNCS([\
         pipe \
         pipe2 \
         ppoll \
-        posix_fadvise])
+        posix_fadvise \
+        posix_memalign \
+        valloc])
 
 dnl Check for structs and members.
 AC_CHECK_MEMBERS([struct dirent.d_type], [], [], [[#include <dirent.h>]])
diff --git a/common/utils/vector.h b/common/utils/vector.h
index 1d04f812..50dd895d 100644
--- a/common/utils/vector.h
+++ b/common/utils/vector.h
@@ -101,6 +101,15 @@
                                    sizeof (type));                      \
   }                                                                     \
                                                                         \
+  /* Same as _reserve, but the allocation will be page aligned.         \
+   */                                                                   \
+  static inline int                                                     \
+  name##_reserve_page_aligned (name *v, size_t n)                       \
+  {                                                                     \
+    return generic_vector_reserve_page_aligned ((struct generic_vector *)v, \
+                                                n, sizeof (type));      \
+  }                                                                     \
+                                                                        \
   /* Insert at i'th element.  i=0 => beginning  i=len => append */   
\
   static inline int                                                     \
   name##_insert (name *v, type elem, size_t i)                          \
@@ -197,4 +206,7 @@ struct generic_vector {
 extern int generic_vector_reserve (struct generic_vector *v,
                                    size_t n, size_t itemsize);
 
+extern int generic_vector_reserve_page_aligned (struct generic_vector *v,
+                                                size_t n, size_t itemsize);
+
 #endif /* NBDKIT_VECTOR_H */
diff --git a/common/utils/vector.c b/common/utils/vector.c
index 54e6b810..8f0c33b1 100644
--- a/common/utils/vector.c
+++ b/common/utils/vector.c
@@ -34,15 +34,17 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <errno.h>
+#include <assert.h>
 
 #include "checked-overflow.h"
 #include "vector.h"
 
-int
-generic_vector_reserve (struct generic_vector *v, size_t n, size_t itemsize)
+static int
+calculate_capacity (struct generic_vector *v, size_t n, size_t itemsize,
+                    size_t *newcap_r, size_t *newbytes_r)
 {
-  void *newptr;
   size_t reqcap, reqbytes, newcap, newbytes, t;
 
   /* New capacity requested.  We must allocate this minimum (or fail).
@@ -71,6 +73,20 @@ generic_vector_reserve (struct generic_vector *v, size_t n,
size_t itemsize)
     newbytes = reqbytes;
   }
 
+  *newcap_r = newcap;
+  *newbytes_r = newbytes;
+  return 0;
+}
+
+int
+generic_vector_reserve (struct generic_vector *v, size_t n, size_t itemsize)
+{
+  void *newptr;
+  size_t newcap, newbytes;
+
+  if (calculate_capacity (v, n, itemsize, &newcap, &newbytes) == -1)
+    return -1;
+
   newptr = realloc (v->ptr, newbytes);
   if (newptr == NULL)
     return -1;
@@ -79,3 +95,63 @@ generic_vector_reserve (struct generic_vector *v, size_t n,
size_t itemsize)
   v->cap = newcap;
   return 0;
 }
+
+/* Always allocates a buffer which is page aligned (both base and size). */
+int
+generic_vector_reserve_page_aligned (struct generic_vector *v,
+                                     size_t n, size_t itemsize)
+{
+#ifdef HAVE_POSIX_MEMALIGN
+  int r;
+#endif
+  void *newptr;
+  size_t newcap, newbytes;
+  long pagesize;
+  size_t extra, extra_items;
+
+  pagesize = sysconf (_SC_PAGE_SIZE);
+  assert (pagesize > 0);
+
+  if (calculate_capacity (v, n, itemsize, &newcap, &newbytes) == -1)
+    return -1;
+
+  /* If the new size (in bytes) is not a full page then we have to
+   * round up to the next page.  Note we make the implicit assumption
+   * here that itemsize is divisible by pagesize, but I assume you
+   * know what you're doing if you're using this function.
+   */
+  extra = newbytes & (pagesize - 1);
+  if (extra > 0) {
+    extra_items = (pagesize - extra) / itemsize;
+
+    if (ADD_OVERFLOW (newcap, extra_items, &newcap) ||
+        ADD_OVERFLOW (newbytes, extra_items * itemsize, &newbytes)) {
+      errno = ENOMEM;
+      return -1;
+    }
+  }
+
+#ifdef HAVE_POSIX_MEMALIGN
+  if ((r = posix_memalign (&newptr, pagesize, newbytes)) != 0) {
+    errno = r;
+    return -1;
+  }
+#elif HAVE_VALLOC
+  newptr = valloc (newbytes);
+  if (newptr == NULL)
+    return -1;
+#else
+#error "this platform does not have posix_memalign or valloc"
+#endif
+
+  /* NB: reserve always makes the buffer larger so we don't have to
+   * deal with the case of a shrinking buffer here.  Also reserve
+   * (like the underlying realloc) does not zero the reserved
+   * elements.  So we're just copying over the old elements here.
+   */
+  memcpy (newptr, v->ptr, v->len * itemsize);
+  free (v->ptr);
+  v->ptr = newptr;
+  v->cap = newcap;
+  return 0;
+}
-- 
2.32.0

Richard W.M. Jones

2022-Jan-27 16:39 UTC

head link

[Libguestfs] [PATCH nbdkit v2 4/4] common/allocators: Always align mlock buffer

On platforms with 64K page sizes (RHEL + aarch64 or ppc64le) we have
hit a problem in test-memory-allocator-malloc-mlock.sh which appears
to be caused by:

  1. The test requests a 2K mlocked buffer.

  2. The allocated buffer straddles a page boundary sometimes.

  3. Linux expands the mlock request to two pages, ie. the requested
     size becomes 2 * 64K = 128K

  4. The standard mlock limit on Linux is 64K so the mlock request is
     rejected and the test fails.

Also POSIX permits the implementation to require mlock parameters to
be page aligned (although Linux does not require this).

This commit attempts to fix both problems by ensuring the mlock
request is always page aligned, using the new
vector_reserve_page_aligned function.

Thanks: Laszlo Ersek
---
 common/allocators/malloc.c | 53 ++++++++++++++++++++++++++++++--------
 1 file changed, 42 insertions(+), 11 deletions(-)

diff --git a/common/allocators/malloc.c b/common/allocators/malloc.c
index 9a2dd549..19df666a 100644
--- a/common/allocators/malloc.c
+++ b/common/allocators/malloc.c
@@ -81,11 +81,32 @@ m_alloc_free (struct allocator *a)
   }
 }
 
-/* Extend the underlying bytearray if needed.  mlock if requested. */
+/* Extend the underlying bytearray if needed. */
 static int
-extend (struct m_alloc *ma, uint64_t new_size)
+extend_without_mlock (struct m_alloc *ma, uint64_t new_size)
+{
+  size_t old_size, n;
+
+  if (ma->ba.cap < new_size) {
+    old_size = ma->ba.cap;
+    n = new_size - ma->ba.cap;
+
+    if (bytearray_reserve (&ma->ba, n) == -1) {
+      nbdkit_error ("realloc: %m");
+      return -1;
+    }
+
+    /* Initialize the newly allocated memory to 0. */
+    memset (ma->ba.ptr + old_size, 0, n);
+  }
+
+  return 0;
+}
+
+#ifdef HAVE_MLOCK
+static int
+extend_with_mlock (struct m_alloc *ma, uint64_t new_size)
 {
-  ACQUIRE_WRLOCK_FOR_CURRENT_SCOPE (&ma->lock);
   size_t old_size, n;
 
   if (ma->ba.cap < new_size) {
@@ -100,7 +121,7 @@ extend (struct m_alloc *ma, uint64_t new_size)
       munlock (ma->ba.ptr, ma->ba.cap);
 #endif
 
-    if (bytearray_reserve (&ma->ba, n) == -1) {
+    if (bytearray_reserve_page_aligned (&ma->ba, n) == -1) {
       nbdkit_error ("realloc: %m");
       return -1;
     }
@@ -108,18 +129,28 @@ extend (struct m_alloc *ma, uint64_t new_size)
     /* Initialize the newly allocated memory to 0. */
     memset (ma->ba.ptr + old_size, 0, n);
 
-#ifdef HAVE_MLOCK
-    if (ma->use_mlock) {
-      if (mlock (ma->ba.ptr, ma->ba.cap) == -1) {
-        nbdkit_error ("allocator=malloc: mlock: %m");
-        return -1;
-      }
+    if (mlock (ma->ba.ptr, ma->ba.cap) == -1) {
+      nbdkit_error ("allocator=malloc: mlock: %m");
+      return -1;
     }
-#endif
   }
 
   return 0;
 }
+#endif /* HAVE_MLOCK */
+
+static int
+extend (struct m_alloc *ma, uint64_t new_size)
+{
+  ACQUIRE_WRLOCK_FOR_CURRENT_SCOPE (&ma->lock);
+
+#ifdef HAVE_MLOCK
+  if (ma->use_mlock)
+    return extend_with_mlock (ma, new_size);
+#endif
+
+  return extend_without_mlock (ma, new_size);
+}
 
 static int
 m_alloc_set_size_hint (struct allocator *a, uint64_t size_hint)
-- 
2.32.0

Libguestfs - Jan 2022 - [PATCH nbdkit v2 4/4] common/allocators: Always align mlock buffer

[Libguestfs] [PATCH nbdkit v2 0/4] common/allocators: Always align mlock buffer

[Libguestfs] [PATCH nbdkit v2 1/4] common/allocators/malloc.c: Avoid calling munlock (NULL, ...)

[Libguestfs] [PATCH nbdkit v2 2/4] tests: Use a larger buffer for mlock test

[Libguestfs] [PATCH nbdkit v2 3/4] common/utils: Add vector_reserve_page_aligned

[Libguestfs] [PATCH nbdkit v2 4/4] common/allocators: Always align mlock buffer