Nir Soffer
2018-Aug-03  19:28 UTC
[Libguestfs] [PATCH v2 0/4] file: Zero for block devices and older file systems
This is the third version to support efficient zero for block devices on older kernels (e.g. RHEL 7.5), and file systems that do not support yet FALLOC_FS_ZERO_RANGE (e.g. NFS 4.2). Changes since v2: - Revert file_can_trim change, since it is too late to change the value after negotiation. Changing the capability dinamically may be useful internally, but it should be done via other means. - Do not depend on FALLOC_FL_* when including <linux/fs.h>. - Add common/includes/isaligned.h for is_aligned helper, implemented in a more efficient way with bitwise math. - If getting sector size fail, fall back to safe guess instead of hard failure. - More efficient alignment check using bitwise math. - For BLKZEROOUT, treat ENOTTY as EOPNOTSUPP. Theoretically possible with a mix of new headers and old or strangely configured kernel. - Use default multi line comment style. - Fix few typos in comments and commit message Issues to explore later: - Eric suggested to use the new FALLOC_FL_NO_HIDE_STALE. Requires benchmarking with a system supporting this flag. - Eric suggested to use tri-state for can_* flags. I don't see a need at this point. - Eric suggested to add can_zero. I'm not sure about the semantics of this, and it has the same issue of can_trim, reporting dynamic value. v2 was here: https://www.redhat.com/archives/libguestfs/2018-August/msg00025.html Changes since v1: - Split to smaller patches - Skip linux only includes on other systems - Skip code using BLKZEROOUT if the macro is not defined - Try BLKZEROOUT only if the offset and count are aligned to device sector size. - initialize h->can_* properly. Before they were uninitialized if FALLOC_FL_* macros were not defined. - Use new h->can_punch_hole in file_can_trim, so now we report the actual capability once we detected it. - Use h->can_punch_hole in file_trim, so we try only once if the operation is not supported. v1 was here: https://www.redhat.com/archives/libguestfs/2018-July/msg00084.html Nir Soffer (4): file: Avoid unsupported fallocate() calls file: Support zero without ZERO_RANGE common: Add isaligned helper module file: Zero for block devices on old kernels common/include/isaligned.h | 50 ++++++++++ plugins/file/Makefile.am | 3 +- plugins/file/file.c | 182 +++++++++++++++++++++++++++++-------- 3 files changed, 196 insertions(+), 39 deletions(-) create mode 100644 common/include/isaligned.h -- 2.17.1
Nir Soffer
2018-Aug-03  19:28 UTC
[Libguestfs] [PATCH v2 1/4] file: Avoid unsupported fallocate() calls
When using file systems not supporting ZERO_RANGE (e.g. NFS 4.2) or
block device on kernel < 4.9, we used to call fallocate() for every
zero, fail with EOPNOTSUPP, and fallback to manual zeroing.  When
trimming, we used to try unsupported fallocate() on every call.
Change file handle to remember if punching holes or zeroing range are
supported, and avoid unsupported calls.
- zero changed to:
  1. If we can punch hole and may trim, try PUNCH_HOLE
  2. If we can zero range, try ZERO_RANGE
  3. Fall back to manual writing
- trim changed to:
  1. If we can punch hole, try PUNCH_HOLE
  2. Succeed
---
 plugins/file/file.c | 80 ++++++++++++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 27 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 3bb4d17..5daab63 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -33,6 +33,7 @@
 
 #include <config.h>
 
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -118,6 +119,8 @@ file_config_complete (void)
 /* The per-connection handle. */
 struct handle {
   int fd;
+  bool can_punch_hole;
+  bool can_zero_range;
 };
 
 /* Create the per-connection handle. */
@@ -146,6 +149,18 @@ file_open (int readonly)
     return NULL;
   }
 
+#ifdef FALLOC_FL_PUNCH_HOLE
+  h->can_punch_hole = true;
+#else
+  h->can_punch_hole = false;
+#endif
+
+#ifdef FALLOC_FL_ZERO_RANGE
+  h->can_zero_range = true;
+#else
+  h->can_zero_range = false;
+#endif
+
   return h;
 }
 
@@ -252,34 +267,42 @@ file_pwrite (void *handle, const void *buf, uint32_t
count, uint64_t offset)
 static int
 file_zero (void *handle, uint32_t count, uint64_t offset, int may_trim)
 {
-#if defined(FALLOC_FL_PUNCH_HOLE) || defined(FALLOC_FL_ZERO_RANGE)
   struct handle *h = handle;
-#endif
   int r = -1;
 
 #ifdef FALLOC_FL_PUNCH_HOLE
-  if (may_trim) {
+  if (h->can_punch_hole && may_trim) {
     r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                       offset, count);
-    if (r == -1 && errno != EOPNOTSUPP) {
+    if (r == 0)
+      return r;
+
+    if (errno != EOPNOTSUPP) {
       nbdkit_error ("zero: %m");
+      return r;
     }
-    /* PUNCH_HOLE is older; if it is not supported, it is likely that
-       ZERO_RANGE will not work either, so fall back to write. */
-    return r;
+
+    h->can_punch_hole = false;
   }
 #endif
 
 #ifdef FALLOC_FL_ZERO_RANGE
-  r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
-  if (r == -1 && errno != EOPNOTSUPP) {
-    nbdkit_error ("zero: %m");
+  if (h->can_zero_range) {
+    r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
+    if (r== 0)
+      return r;
+
+    if (errno != EOPNOTSUPP) {
+      nbdkit_error ("zero: %m");
+      return r;
+    }
+
+    h->can_zero_range = false;
   }
-#else
-  /* Trigger a fall back to writing */
-  errno = EOPNOTSUPP;
 #endif
 
+  /* Trigger a fall back to writing */
+  errno = EOPNOTSUPP;
   return r;
 }
 
@@ -301,27 +324,30 @@ file_flush (void *handle)
 static int
 file_trim (void *handle, uint32_t count, uint64_t offset)
 {
-  int r = -1;
 #ifdef FALLOC_FL_PUNCH_HOLE
   struct handle *h = handle;
+  int r = -1;
+
+  if (h->can_punch_hole) {
+    r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                      offset, count);
+    if (r == -1) {
+      /* Trim is advisory; we don't care if it fails for anything other
+       * than EIO or EPERM. */
+      if (errno == EPERM || errno == EIO) {
+        nbdkit_error ("fallocate: %m");
+        return r;
+      }
+
+      if (errno == EOPNOTSUPP)
+        h->can_punch_hole = false;
 
-  /* Trim is advisory; we don't care if it fails for anything other
-   * than EIO or EPERM. */
-  r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-                    offset, count);
-  if (r < 0) {
-    if (errno != EPERM && errno != EIO) {
       nbdkit_debug ("ignoring failed fallocate during trim: %m");
-      r = 0;
     }
-    else
-      nbdkit_error ("fallocate: %m");
   }
-#else
-  /* Based on .can_trim, this should not be reached. */
-  errno = EOPNOTSUPP;
 #endif
-  return r;
+
+  return 0;
 }
 
 static struct nbdkit_plugin plugin = {
-- 
2.17.1
Nir Soffer
2018-Aug-03  19:28 UTC
[Libguestfs] [PATCH v2 2/4] file: Support zero without ZERO_RANGE
File systems not supporting FALLOC_FL_ZERO_RANGE yet fall back to manual
zeroing.
We can avoid this by combining two fallocate calls:
    fallocate(FALLOC_FL_PUNCH_HOLE)
    fallocate(0)
Based on my tests this is much more efficient compared to manual
zeroing. The idea came from this qemu patch:
https://github.com/qemu/qemu/commit/1cdc3239f1bb
Here is an example run with NFS 4.2 without this change, converting
fedora 27 image created with virt-builder:
$ export SOCK=/tmp/nbd.sock
$ export FILE=/nfs-mount/fedora-27.img
$ src/nbdkit plugins/file/.libs/nbdkit-file-plugin.so file=$FILE -U $SOCK
$ time qemu-img convert -n -f raw -O raw /var/tmp/fedora-27.img
nbd:unix:/tmp/nbd.sock
real	0m17.481s
user	0m0.199s
sys	0m0.691s
$ time qemu-img convert -n -f raw -O raw -W /var/tmp/fedora-27.img
nbd:unix:/tmp/nbd.sock
real	0m17.072s
user	0m0.191s
sys	0m0.738s
With this change:
$ time qemu-img convert -n -f raw -O raw /var/tmp/fedora-27.img
nbd:unix:/tmp/nbd.sock
real	0m6.285s
user	0m0.217s
sys	0m0.829s
$ time qemu-img convert -n -f raw -O raw -W /var/tmp/fedora-27.img
nbd:unix:/tmp/nbd.sock
real	0m3.967s
user	0m0.193s
sys	0m0.702s
Note: the image is sparse, but nbdkit creates a fully allocated image.
This may be a bug in nbdkit or qemu-img.
---
 plugins/file/file.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 5daab63..a2cea4a 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -121,6 +121,7 @@ struct handle {
   int fd;
   bool can_punch_hole;
   bool can_zero_range;
+  bool can_fallocate;
 };
 
 /* Create the per-connection handle. */
@@ -161,6 +162,8 @@ file_open (int readonly)
   h->can_zero_range = false;
 #endif
 
+  h->can_fallocate = true;
+
   return h;
 }
 
@@ -301,6 +304,35 @@ file_zero (void *handle, uint32_t count, uint64_t offset,
int may_trim)
   }
 #endif
 
+#ifdef FALLOC_FL_PUNCH_HOLE
+  /* If we can punch hole but may not trim, we can combine punching hole and
+   * fallocate to zero a range. This is expected to be more efficient than
+   * writing zeros manually. */
+  if (h->can_punch_hole && h->can_fallocate) {
+    r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                      offset, count);
+    if (r == 0) {
+      r = do_fallocate(h->fd, 0, offset, count);
+      if (r == 0)
+        return r;
+
+      if (errno != EOPNOTSUPP) {
+        nbdkit_error ("zero: %m");
+        return r;
+      }
+
+      h->can_fallocate = false;
+    } else {
+      if (errno != EOPNOTSUPP) {
+        nbdkit_error ("zero: %m");
+        return r;
+      }
+
+      h->can_punch_hole = false;
+    }
+  }
+#endif
+
   /* Trigger a fall back to writing */
   errno = EOPNOTSUPP;
   return r;
-- 
2.17.1
Nir Soffer
2018-Aug-03  19:28 UTC
[Libguestfs] [PATCH v2 3/4] common: Add isaligned helper module
is_aligned (size, align) returns true if size is aligned to align,
assuming that align is power of 2.
Suggested by Eric in
https://www.redhat.com/archives/libguestfs/2018-August/msg00036.html
---
 common/include/isaligned.h | 50 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 common/include/isaligned.h
diff --git a/common/include/isaligned.h b/common/include/isaligned.h
new file mode 100644
index 0000000..468b624
--- /dev/null
+++ b/common/include/isaligned.h
@@ -0,0 +1,50 @@
+/* nbdkit
+ * Copyright (C) 2018 Red Hat Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS
IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef NBDKIT_ISALIGNED_H
+#define NBDKIT_ISALIGNED_H
+
+#include <stdbool.h>
+
+/* Return true if size is a multiple of align. align must be power of 2.
+ *
+ * Suggested by Eric Blake.  See:
+ * https://www.redhat.com/archives/libguestfs/2018-August/msg00036.html
+ */
+static inline bool
+is_aligned (unsigned int size, unsigned int align)
+{
+  return !(size & (align - 1));
+}
+
+#endif /* NBDKIT_ISALIGNED_H */
-- 
2.17.1
Nir Soffer
2018-Aug-03  19:28 UTC
[Libguestfs] [PATCH v2 4/4] file: Zero for block devices on old kernels
fallocate(FALLOC_FL_ZERO_RANGE) is supportd for block devices with
modern kernel, but when it is not, we fall back to manual zeroing.
For block device, try also to use ioctl(BLKZEROOUT) if offset and count
are aligned to block device sector size.
Here is an example run without this change on RHEL 7.5:
$ export SOCK=/tmp/nbd.sock
$ export
BLOCK=/dev/e30bfac2-8e13-479d-8cd6-c6da5e306c4e/c9864222-bc52-4359-80d7-76e47d619b15
$ src/nbdkit plugins/file/.libs/nbdkit-file-plugin.so file=$BLOCK -U $SOCK
$ time qemu-img convert -n -f raw -O raw /var/tmp/fedora-27.img
nbd:unix:/tmp/nbd.sock
real	0m11.563s
user	0m0.219s
sys	0m0.746s
$ time qemu-img convert -n -f raw -O raw -W /var/tmp/fedora-27.img
nbd:unix:/tmp/nbd.sock
real	0m9.841s
user	0m0.167s
sys	0m0.715s
With this change:
$ time qemu-img convert -n -f raw -O raw /var/tmp/fedora-27.img
nbd:unix:/tmp/nbd.sock
real	0m2.796s
user	0m0.202s
sys	0m0.700s
$ time qemu-img convert -n -f raw -O raw -W /var/tmp/fedora-27.img
nbd:unix:/tmp/nbd.sock
real	0m1.908s
user	0m0.166s
sys	0m0.728s
---
 plugins/file/Makefile.am |  3 +-
 plugins/file/file.c      | 70 +++++++++++++++++++++++++++++++++-------
 2 files changed, 61 insertions(+), 12 deletions(-)
diff --git a/plugins/file/Makefile.am b/plugins/file/Makefile.am
index 00eaf59..91adbcb 100644
--- a/plugins/file/Makefile.am
+++ b/plugins/file/Makefile.am
@@ -41,7 +41,8 @@ nbdkit_file_plugin_la_SOURCES = \
 	$(top_srcdir)/include/nbdkit-plugin.h
 
 nbdkit_file_plugin_la_CPPFLAGS = \
-	-I$(top_srcdir)/include
+	-I$(top_srcdir)/include \
+	-I$(top_srcdir)/common/include
 nbdkit_file_plugin_la_CFLAGS = \
 	$(WARNINGS_CFLAGS)
 nbdkit_file_plugin_la_LDFLAGS = \
diff --git a/plugins/file/file.c b/plugins/file/file.c
index a2cea4a..6c8c78e 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -41,14 +41,21 @@
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/ioctl.h>
 #include <errno.h>
 
 #if defined(__linux__) && !defined(FALLOC_FL_PUNCH_HOLE)
 #include <linux/falloc.h>   /* For FALLOC_FL_*, glibc < 2.18 */
 #endif
 
+#if defined(__linux__)
+#include <linux/fs.h>       /* For BLKZEROOUT */
+#endif
+
 #include <nbdkit-plugin.h>
 
+#include "isaligned.h"
+
 #ifndef O_CLOEXEC
 #define O_CLOEXEC 0
 #endif
@@ -119,9 +126,12 @@ file_config_complete (void)
 /* The per-connection handle. */
 struct handle {
   int fd;
+  bool is_block_device;
+  int sector_size;
   bool can_punch_hole;
   bool can_zero_range;
   bool can_fallocate;
+  bool can_zeroout;
 };
 
 /* Create the per-connection handle. */
@@ -129,6 +139,7 @@ static void *
 file_open (int readonly)
 {
   struct handle *h;
+  struct stat statbuf;
   int flags;
 
   h = malloc (sizeof *h);
@@ -150,6 +161,22 @@ file_open (int readonly)
     return NULL;
   }
 
+  if (fstat (h->fd, &statbuf) == -1) {
+    nbdkit_error ("fstat: %s: %m", filename);
+    free (h);
+    return NULL;
+  }
+
+  h->is_block_device = S_ISBLK(statbuf.st_mode);
+  h->sector_size = 4096;  /* Start with safe guess */
+
+#ifdef BLKSSZGET
+  if (h->is_block_device) {
+    if (ioctl (h->fd, BLKSSZGET, &h->sector_size))
+      nbdkit_debug ("cannot get sector size: %s: %m", filename);
+  }
+#endif
+
 #ifdef FALLOC_FL_PUNCH_HOLE
   h->can_punch_hole = true;
 #else
@@ -163,6 +190,7 @@ file_open (int readonly)
 #endif
 
   h->can_fallocate = true;
+  h->can_zeroout = h->is_block_device;
 
   return h;
 }
@@ -184,27 +212,29 @@ static int64_t
 file_get_size (void *handle)
 {
   struct handle *h = handle;
-  struct stat statbuf;
 
-  if (fstat (h->fd, &statbuf) == -1) {
-    nbdkit_error ("stat: %m");
-    return -1;
-  }
-
-  if (S_ISBLK (statbuf.st_mode)) {
+  if (h->is_block_device) {
+    /* Block device, so st_size will not be the true size. */
     off_t size;
 
-    /* Block device, so st_size will not be the true size. */
     size = lseek (h->fd, 0, SEEK_END);
     if (size == -1) {
       nbdkit_error ("lseek (to find device size): %m");
       return -1;
     }
+
     return size;
-  }
+  } else {
+    /* Regular file. */
+    struct stat statbuf;
 
-  /* Else regular file. */
-  return statbuf.st_size;
+    if (fstat (h->fd, &statbuf) == -1) {
+      nbdkit_error ("fstat: %m");
+      return -1;
+    }
+
+    return statbuf.st_size;
+  }
 }
 
 static int
@@ -333,6 +363,24 @@ file_zero (void *handle, uint32_t count, uint64_t offset,
int may_trim)
   }
 #endif
 
+#ifdef BLKZEROOUT
+  /* For aligned range and block device, we can use BLKZEROOUT. */
+  if (h->can_zeroout && is_aligned (offset | count,
h->sector_size)) {
+    uint64_t range[2] = {offset, count};
+
+    r = ioctl (h->fd, BLKZEROOUT, &range);
+    if (r == 0)
+      return r;
+
+    if (errno != ENOTTY) {
+      nbdkit_error ("zero: %m");
+      return r;
+    }
+
+    h->can_zeroout = false;
+  }
+#endif
+
   /* Trigger a fall back to writing */
   errno = EOPNOTSUPP;
   return r;
-- 
2.17.1
Nir Soffer
2018-Aug-13  15:50 UTC
Re: [Libguestfs] [PATCH v2 0/4] file: Zero for block devices and older file systems
On Fri, Aug 3, 2018 at 10:28 PM Nir Soffer <nirsof@gmail.com> wrote:> This is the third version to support efficient zero for block devices > on older kernels (e.g. RHEL 7.5), and file systems that do not support > yet FALLOC_FS_ZERO_RANGE (e.g. NFS 4.2). > > Changes since v2: > - Revert file_can_trim change, since it is too late to change the value > after negotiation. Changing the capability dinamically may be useful > internally, but it should be done via other means. > - Do not depend on FALLOC_FL_* when including <linux/fs.h>. > - Add common/includes/isaligned.h for is_aligned helper, implemented > in a more efficient way with bitwise math. > - If getting sector size fail, fall back to safe guess instead of > hard failure. > - More efficient alignment check using bitwise math. > - For BLKZEROOUT, treat ENOTTY as EOPNOTSUPP. Theoretically possible > with a mix of new headers and old or strangely configured kernel. > - Use default multi line comment style. > - Fix few typos in comments and commit message > > Issues to explore later: > - Eric suggested to use the new FALLOC_FL_NO_HIDE_STALE. Requires > benchmarking with a system supporting this flag. > - Eric suggested to use tri-state for can_* flags. I don't see a need at > this point. > - Eric suggested to add can_zero. I'm not sure about the semantics of > this, and it has the same issue of can_trim, reporting dynamic value. > > v2 was here: > https://www.redhat.com/archives/libguestfs/2018-August/msg00025.html > > Changes since v1: > - Split to smaller patches > - Skip linux only includes on other systems > - Skip code using BLKZEROOUT if the macro is not defined > - Try BLKZEROOUT only if the offset and count are aligned to device > sector size. > - initialize h->can_* properly. Before they were uninitialized if > FALLOC_FL_* macros were not defined. > - Use new h->can_punch_hole in file_can_trim, so now we report the > actual capability once we detected it. > - Use h->can_punch_hole in file_trim, so we try only once if the > operation is not supported. > > v1 was here: > https://www.redhat.com/archives/libguestfs/2018-July/msg00084.html > > Nir Soffer (4): > file: Avoid unsupported fallocate() calls > file: Support zero without ZERO_RANGE > common: Add isaligned helper module > file: Zero for block devices on old kernels > > common/include/isaligned.h | 50 ++++++++++ > plugins/file/Makefile.am | 3 +- > plugins/file/file.c | 182 +++++++++++++++++++++++++++++-------- > 3 files changed, 196 insertions(+), 39 deletions(-) > create mode 100644 common/include/isaligned.h > > -- > 2.17.1 > >Ping?
Eric Blake
2018-Aug-13  17:44 UTC
Re: [Libguestfs] [PATCH v2 1/4] file: Avoid unsupported fallocate() calls
On 08/03/2018 02:28 PM, Nir Soffer wrote:> When using file systems not supporting ZERO_RANGE (e.g. NFS 4.2) or > block device on kernel < 4.9, we used to call fallocate() for every > zero, fail with EOPNOTSUPP, and fallback to manual zeroing. When > trimming, we used to try unsupported fallocate() on every call. > > Change file handle to remember if punching holes or zeroing range are > supported, and avoid unsupported calls. > > - zero changed to: > 1. If we can punch hole and may trim, try PUNCH_HOLE > 2. If we can zero range, try ZERO_RANGE > 3. Fall back to manual writing > > - trim changed to: > 1. If we can punch hole, try PUNCH_HOLE > 2. Succeed > --- > plugins/file/file.c | 80 ++++++++++++++++++++++++++++++--------------- > 1 file changed, 53 insertions(+), 27 deletions(-) >> #ifdef FALLOC_FL_ZERO_RANGE > - r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count); > - if (r == -1 && errno != EOPNOTSUPP) { > - nbdkit_error ("zero: %m"); > + if (h->can_zero_range) { > + r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count); > + if (r== 0)Spacing is off. Otherwise looks good to me. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
Eric Blake
2018-Aug-13  17:52 UTC
Re: [Libguestfs] [PATCH v2 2/4] file: Support zero without ZERO_RANGE
On 08/03/2018 02:28 PM, Nir Soffer wrote:> File systems not supporting FALLOC_FL_ZERO_RANGE yet fall back to manual > zeroing. > > We can avoid this by combining two fallocate calls: > > fallocate(FALLOC_FL_PUNCH_HOLE) > fallocate(0) > > Based on my tests this is much more efficient compared to manual > zeroing. The idea came from this qemu patch: > https://github.com/qemu/qemu/commit/1cdc3239f1bb >> > Note: the image is sparse, but nbdkit creates a fully allocated image. > This may be a bug in nbdkit or qemu-img.Calling fallocate(0) forces allocation; so anything explicitly written to 0 won't be sparse when this mode is used. There's also a question of whether your source file accurately reports holes to begin with (poor tmpfs SEEK_HOLE performance is still a common problem). But I don't see that as getting in the way of this patch going in.> --- > plugins/file/file.c | 32 ++++++++++++++++++++++++++++++++ > 1 file changed, 32 insertions(+) >LGTM -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
Eric Blake
2018-Aug-13  17:54 UTC
Re: [Libguestfs] [PATCH v2 3/4] common: Add isaligned helper module
On 08/03/2018 02:28 PM, Nir Soffer wrote:> is_aligned (size, align) returns true if size is aligned to align, > assuming that align is power of 2. > > Suggested by Eric in > https://www.redhat.com/archives/libguestfs/2018-August/msg00036.html > --- > common/include/isaligned.h | 50 ++++++++++++++++++++++++++++++++++++++ > 1 file changed, 50 insertions(+) > create mode 100644 common/include/isaligned.h >> +/* Return true if size is a multiple of align. align must be power of 2. > + * > + * Suggested by Eric Blake. See: > + * https://www.redhat.com/archives/libguestfs/2018-August/msg00036.htmlDo we need this comment in the code? I'm fine if it is just in the commit message.> + */ > +static inline bool > +is_aligned (unsigned int size, unsigned int align) > +{ > + return !(size & (align - 1)); > +}Should we assert() that align is indeed a power of 2, to make it harder for callers to misuse this function? Otherwise, looks okay to me. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
Eric Blake
2018-Aug-13  18:00 UTC
Re: [Libguestfs] [PATCH v2 4/4] file: Zero for block devices on old kernels
On 08/03/2018 02:28 PM, Nir Soffer wrote:> fallocate(FALLOC_FL_ZERO_RANGE) is supportd for block devices withs/supportd/supported/> modern kernel, but when it is not, we fall back to manual zeroing. > > For block device, try also to use ioctl(BLKZEROOUT) if offset and count > are aligned to block device sector size. > > Here is an example run without this change on RHEL 7.5: >> +++ b/plugins/file/file.c > @@ -41,14 +41,21 @@ > #include <unistd.h> > #include <sys/types.h> > #include <sys/stat.h> > +#include <sys/ioctl.h>Linux-specific header; will it cause grief on BSD compilation? (POSIX declares ioctl() in <stropts.h>, but for the obsolete STREAMS extension that no one but Solaris ever implemented, and which no one uses today - and thus which has little bearing on the Linux use of ioctl). Otherwise looks okay. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
Nir Soffer
2018-Aug-18  20:12 UTC
Re: [Libguestfs] [PATCH v2 2/4] file: Support zero without ZERO_RANGE
On Fri, Aug 3, 2018 at 10:28 PM Nir Soffer <nirsof@gmail.com> wrote:> File systems not supporting FALLOC_FL_ZERO_RANGE yet fall back to manual > zeroing. > > We can avoid this by combining two fallocate calls: > > fallocate(FALLOC_FL_PUNCH_HOLE) > fallocate(0) > > Based on my tests this is much more efficient compared to manual > zeroing. The idea came from this qemu patch: > https://github.com/qemu/qemu/commit/1cdc3239f1bb > > Here is an example run with NFS 4.2 without this change, converting > fedora 27 image created with virt-builder: > > $ export SOCK=/tmp/nbd.sock > $ export FILE=/nfs-mount/fedora-27.img > $ src/nbdkit plugins/file/.libs/nbdkit-file-plugin.so file=$FILE -U $SOCK > > $ time qemu-img convert -n -f raw -O raw /var/tmp/fedora-27.img > nbd:unix:/tmp/nbd.sock > > real 0m17.481s > user 0m0.199s > sys 0m0.691s > > $ time qemu-img convert -n -f raw -O raw -W /var/tmp/fedora-27.img > nbd:unix:/tmp/nbd.sock > > real 0m17.072s > user 0m0.191s > sys 0m0.738s > > With this change: > > $ time qemu-img convert -n -f raw -O raw /var/tmp/fedora-27.img > nbd:unix:/tmp/nbd.sock > > real 0m6.285s > user 0m0.217s > sys 0m0.829s > > $ time qemu-img convert -n -f raw -O raw -W /var/tmp/fedora-27.img > nbd:unix:/tmp/nbd.sock > > real 0m3.967s > user 0m0.193s > sys 0m0.702s > > Note: the image is sparse, but nbdkit creates a fully allocated image. > This may be a bug in nbdkit or qemu-img. > --- > plugins/file/file.c | 32 ++++++++++++++++++++++++++++++++ > 1 file changed, 32 insertions(+) > > diff --git a/plugins/file/file.c b/plugins/file/file.c > index 5daab63..a2cea4a 100644 > --- a/plugins/file/file.c > +++ b/plugins/file/file.c > @@ -121,6 +121,7 @@ struct handle { > int fd; > bool can_punch_hole; > bool can_zero_range; > + bool can_fallocate; > }; > > /* Create the per-connection handle. */ > @@ -161,6 +162,8 @@ file_open (int readonly) > h->can_zero_range = false; > #endif > > + h->can_fallocate = true; > + > return h; > } > > @@ -301,6 +304,35 @@ file_zero (void *handle, uint32_t count, uint64_t > offset, int may_trim) > } > #endif > > +#ifdef FALLOC_FL_PUNCH_HOLE > + /* If we can punch hole but may not trim, we can combine punching hole > and > + * fallocate to zero a range. This is expected to be more efficient than > + * writing zeros manually. */ > + if (h->can_punch_hole && h->can_fallocate) { > + r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, > + offset, count); > + if (r == 0) { > + r = do_fallocate(h->fd, 0, offset, count); >Space after the function name is missing here, fixing it in v3.> + if (r == 0) > + return r; > + > + if (errno != EOPNOTSUPP) { > + nbdkit_error ("zero: %m"); > + return r; > + } > + > + h->can_fallocate = false; > + } else { > + if (errno != EOPNOTSUPP) { > + nbdkit_error ("zero: %m"); > + return r; > + } > + > + h->can_punch_hole = false; > + } > + } > +#endif > + > /* Trigger a fall back to writing */ > errno = EOPNOTSUPP; > return r; > -- > 2.17.1 > >
Possibly Parallel Threads
- [PATCH v4 4/4] file: Zero for block devices on old kernels
- [PATCH nbdkit v2 1/3] file: Move file operators to a new common/fileops mini-library.
- [PATCH 3/3] file: Zero for block devices on old kernels
- [PATCH] file: Zero support for block devices and NFS 4.2
- [PATCH] file: Zero support for block devices and NFS 4.2