Nir Soffer
2021-Feb-21 01:09 UTC
[Libguestfs] [PATCH libnbd 1/2] nbdcopy: Do not use trim for zeroing
Current code uses NBD_CMD_TRIM for writing zeroes without allocating space (--allocate not specified). This incorrect and can lead to corrupting the target image. NBD manual warns about NBD_CMD_TRIM: After issuing this command, a client MUST NOT make any assumptions about the contents of the export affected by this command, until overwriting it again with NBD_CMD_WRITE or NBD_CMD_WRITE_ZEROES Add allocate flag to the zero operation, to allow zeroing by punching hole (default) or zeroing by allocating space. nbd-ops use NBD_CMD_WRITE_ZEROES with or without NBD_FLAG_NO_HOLE flag. file-ops use fallocate with PUNCH_HOLE or ZERO_RANGE flag. The copy-sparse tests was modified to require zero operation with may_trim flag. Signed-off-by: Nir Soffer <nsoffer at redhat.com> --- copy/copy-sparse.sh | 66 ++++++++++++++++++------------------- copy/file-ops.c | 49 +++++++++++++++++++-------- copy/multi-thread-copying.c | 20 ++++------- copy/nbd-ops.c | 8 ++--- copy/nbdcopy.h | 5 +-- copy/null-ops.c | 4 +-- copy/synch-copying.c | 8 ++--- 7 files changed, 88 insertions(+), 72 deletions(-) diff --git a/copy/copy-sparse.sh b/copy/copy-sparse.sh index 846767b..c43b41a 100755 --- a/copy/copy-sparse.sh +++ b/copy/copy-sparse.sh @@ -33,7 +33,7 @@ cleanup_fn rm -f $out # Copy from a sparse data disk to an nbdkit-eval-plugin instance which # is logging everything. This allows us to see exactly what nbdcopy -# is writing, to ensure it is writing and trimming the target as +# is writing, to ensure it is writing and zeroing the target as # expected. $VG nbdcopy -S 0 -- \ [ nbdkit --exit-with-parent data data=' @@ -60,38 +60,38 @@ if [ "$(cat $out)" != "pwrite 1 4294967296 pwrite 32768 0 pwrite 32768 1073709056 pwrite 32768 4294934528 -trim 134184960 32768 -trim 134184960 4160749568 -trim 134184960 939524096 -trim 134217728 1073741824 -trim 134217728 1207959552 -trim 134217728 134217728 -trim 134217728 1342177280 -trim 134217728 1476395008 -trim 134217728 1610612736 -trim 134217728 1744830464 -trim 134217728 1879048192 -trim 134217728 2013265920 -trim 134217728 2147483648 -trim 134217728 2281701376 -trim 134217728 2415919104 -trim 134217728 2550136832 -trim 134217728 268435456 -trim 134217728 2684354560 -trim 134217728 2818572288 -trim 134217728 2952790016 -trim 134217728 3087007744 -trim 134217728 3221225472 -trim 134217728 3355443200 -trim 134217728 3489660928 -trim 134217728 3623878656 -trim 134217728 3758096384 -trim 134217728 3892314112 -trim 134217728 402653184 -trim 134217728 4026531840 -trim 134217728 536870912 -trim 134217728 671088640 -trim 134217728 805306368" ]; then +zero 134184960 32768 may_trim +zero 134184960 4160749568 may_trim +zero 134184960 939524096 may_trim +zero 134217728 1073741824 may_trim +zero 134217728 1207959552 may_trim +zero 134217728 1342177280 may_trim +zero 134217728 134217728 may_trim +zero 134217728 1476395008 may_trim +zero 134217728 1610612736 may_trim +zero 134217728 1744830464 may_trim +zero 134217728 1879048192 may_trim +zero 134217728 2013265920 may_trim +zero 134217728 2147483648 may_trim +zero 134217728 2281701376 may_trim +zero 134217728 2415919104 may_trim +zero 134217728 2550136832 may_trim +zero 134217728 2684354560 may_trim +zero 134217728 268435456 may_trim +zero 134217728 2818572288 may_trim +zero 134217728 2952790016 may_trim +zero 134217728 3087007744 may_trim +zero 134217728 3221225472 may_trim +zero 134217728 3355443200 may_trim +zero 134217728 3489660928 may_trim +zero 134217728 3623878656 may_trim +zero 134217728 3758096384 may_trim +zero 134217728 3892314112 may_trim +zero 134217728 4026531840 may_trim +zero 134217728 402653184 may_trim +zero 134217728 536870912 may_trim +zero 134217728 671088640 may_trim +zero 134217728 805306368 may_trim" ]; then echo "$0: output does not match expected" exit 1 fi diff --git a/copy/file-ops.c b/copy/file-ops.c index 2a239d0..d0b9447 100644 --- a/copy/file-ops.c +++ b/copy/file-ops.c @@ -100,10 +100,9 @@ file_synch_write (struct rw *rw, } static bool -file_synch_trim (struct rw *rw, uint64_t offset, uint64_t count) +file_punch_hole(int fd, uint64_t offset, uint64_t count) { #ifdef FALLOC_FL_PUNCH_HOLE - int fd = rw->u.local.fd; int r; r = fallocate (fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, @@ -113,17 +112,14 @@ file_synch_trim (struct rw *rw, uint64_t offset, uint64_t count) exit (EXIT_FAILURE); } return true; -#else /* !FALLOC_FL_PUNCH_HOLE */ - return false; #endif + return false; } static bool -file_synch_zero (struct rw *rw, uint64_t offset, uint64_t count) +file_zero_range(int fd, uint64_t offset, uint64_t count) { - if (S_ISREG (rw->u.local.stat.st_mode)) { #ifdef FALLOC_FL_ZERO_RANGE - int fd = rw->u.local.fd; int r; r = fallocate (fd, FALLOC_FL_ZERO_RANGE, offset, count); @@ -133,11 +129,13 @@ file_synch_zero (struct rw *rw, uint64_t offset, uint64_t count) } return true; #endif - } - else if (S_ISBLK (rw->u.local.stat.st_mode) && - IS_ALIGNED (offset | count, rw->u.local.sector_size)) { + return false; +} + +static bool +file_zeroout(int fd, uint64_t offset, uint64_t count) +{ #ifdef BLKZEROOUT - int fd = rw->u.local.fd; int r; uint64_t range[2] = {offset, count}; @@ -148,6 +146,31 @@ file_synch_zero (struct rw *rw, uint64_t offset, uint64_t count) } return true; #endif + return false; +} + +static bool +file_synch_trim (struct rw *rw, uint64_t offset, uint64_t count) +{ + return file_punch_hole(rw->u.local.fd, offset, count); +} + +static bool +file_synch_zero (struct rw *rw, uint64_t offset, uint64_t count, bool allocate) +{ + int fd = rw->u.local.fd; + + if (S_ISREG (rw->u.local.stat.st_mode)) { + if (allocate) { + return file_zero_range (fd, offset, count); + } else { + return file_punch_hole (fd, offset, count); + } + } + else if (S_ISBLK (rw->u.local.stat.st_mode) && + IS_ALIGNED (offset | count, rw->u.local.sector_size)) { + /* Always allocate, discard and gurantee zeroing. */ + return file_zeroout (fd, offset, count); } return false; @@ -197,9 +220,9 @@ file_asynch_trim (struct rw *rw, struct command *command, static bool file_asynch_zero (struct rw *rw, struct command *command, - nbd_completion_callback cb) + nbd_completion_callback cb, bool allocate) { - if (!file_synch_zero (rw, command->offset, command->slice.len)) + if (!file_synch_zero (rw, command->offset, command->slice.len, allocate)) return false; errno = 0; if (cb.callback (cb.user_data, &errno) == -1) { diff --git a/copy/multi-thread-copying.c b/copy/multi-thread-copying.c index 98b4056..8061787 100644 --- a/copy/multi-thread-copying.c +++ b/copy/multi-thread-copying.c @@ -485,11 +485,12 @@ finished_read (void *vp, int *error) * --destination-is-zero: * do nothing * - * --allocated: we must write zeroes either using an efficient + * --allocated: write zeroes allocating space using an efficient * zeroing command or writing a command of zeroes * - * (neither flag) try trimming if supported, else write zeroes - * as above + * (neither flag) write zeroes punching a hole using an efficient + * zeroing command or fallback to writing a command + * of zeroes. * * This takes over ownership of the command and frees it eventually. */ @@ -501,22 +502,13 @@ fill_dst_range_with_zeroes (struct command *command) if (destination_is_zero) goto free_and_return; - if (!allocated) { - /* Try trimming. */ - if (dst.ops->asynch_trim (&dst, command, - (nbd_completion_callback) { - .callback = free_command, - .user_data = command, - })) - return; - } - /* Try efficient zeroing. */ if (dst.ops->asynch_zero (&dst, command, (nbd_completion_callback) { .callback = free_command, .user_data = command, - })) + }, + allocated)) return; /* Fall back to loop writing zeroes. This is going to be slow diff --git a/copy/nbd-ops.c b/copy/nbd-ops.c index 0bcf29b..f3a9744 100644 --- a/copy/nbd-ops.c +++ b/copy/nbd-ops.c @@ -97,13 +97,13 @@ nbd_ops_synch_trim (struct rw *rw, uint64_t offset, uint64_t count) } static bool -nbd_ops_synch_zero (struct rw *rw, uint64_t offset, uint64_t count) +nbd_ops_synch_zero (struct rw *rw, uint64_t offset, uint64_t count, bool allocate) { if (!rw->u.nbd.can_zero) return false; if (nbd_zero (rw->u.nbd.handles.ptr[0], - count, offset, LIBNBD_CMD_FLAG_NO_HOLE) == -1) { + count, offset, allocate ? LIBNBD_CMD_FLAG_NO_HOLE : 0) == -1) { fprintf (stderr, "%s: %s\n", rw->name, nbd_get_error ()); exit (EXIT_FAILURE); } @@ -158,7 +158,7 @@ nbd_ops_asynch_trim (struct rw *rw, struct command *command, static bool nbd_ops_asynch_zero (struct rw *rw, struct command *command, - nbd_completion_callback cb) + nbd_completion_callback cb, bool allocate) { if (!rw->u.nbd.can_zero) return false; @@ -167,7 +167,7 @@ nbd_ops_asynch_zero (struct rw *rw, struct command *command, if (nbd_aio_zero (rw->u.nbd.handles.ptr[command->index], command->slice.len, command->offset, - cb, LIBNBD_CMD_FLAG_NO_HOLE) == -1) { + cb, allocate ? LIBNBD_CMD_FLAG_NO_HOLE : 0) == -1) { fprintf (stderr, "%s: %s\n", rw->name, nbd_get_error ()); exit (EXIT_FAILURE); } diff --git a/copy/nbdcopy.h b/copy/nbdcopy.h index 69fac2a..21d09bf 100644 --- a/copy/nbdcopy.h +++ b/copy/nbdcopy.h @@ -134,7 +134,8 @@ struct rw_ops { bool (*synch_trim) (struct rw *rw, uint64_t offset, uint64_t count); /* Synchronously zero. If not possible, returns false. */ - bool (*synch_zero) (struct rw *rw, uint64_t offset, uint64_t count); + bool (*synch_zero) (struct rw *rw, uint64_t offset, uint64_t count, + bool allocate); /* Asynchronous I/O operations. These start the operation and call * 'cb' on completion. @@ -162,7 +163,7 @@ struct rw_ops { * returns false. */ bool (*asynch_zero) (struct rw *rw, struct command *command, - nbd_completion_callback cb); + nbd_completion_callback cb, bool allocate); /* Number of asynchronous commands in flight for a particular thread. */ unsigned (*in_flight) (struct rw *rw, uintptr_t index); diff --git a/copy/null-ops.c b/copy/null-ops.c index b2ca66f..0a7b0fb 100644 --- a/copy/null-ops.c +++ b/copy/null-ops.c @@ -63,7 +63,7 @@ null_synch_trim (struct rw *rw, uint64_t offset, uint64_t count) } static bool -null_synch_zero (struct rw *rw, uint64_t offset, uint64_t count) +null_synch_zero (struct rw *rw, uint64_t offset, uint64_t count, bool allocate) { return true; } @@ -102,7 +102,7 @@ null_asynch_trim (struct rw *rw, struct command *command, static bool null_asynch_zero (struct rw *rw, struct command *command, - nbd_completion_callback cb) + nbd_completion_callback cb, bool allocate) { errno = 0; if (cb.callback (cb.user_data, &errno) == -1) { diff --git a/copy/synch-copying.c b/copy/synch-copying.c index 2712c10..37d2d7f 100644 --- a/copy/synch-copying.c +++ b/copy/synch-copying.c @@ -69,10 +69,10 @@ synch_copying (void) assert (exts.ptr[i].length <= count); if (exts.ptr[i].zero) { - if (!dst.ops->synch_trim (&dst, offset, exts.ptr[i].length) && - !dst.ops->synch_zero (&dst, offset, exts.ptr[i].length)) { - /* If neither trimming nor efficient zeroing are possible, - * write zeroes the hard way. + if (!dst.ops->synch_zero (&dst, offset, exts.ptr[i].length, false) && + !dst.ops->synch_zero (&dst, offset, exts.ptr[i].length, true)) { + /* If efficient zeroing (punching a hole or allocating space) are + * possible, write zeroes the hard way. */ memset (buf, 0, exts.ptr[i].length); dst.ops->synch_write (&dst, buf, exts.ptr[i].length, offset); -- 2.26.2
Richard W.M. Jones
2021-Feb-22 11:39 UTC
[Libguestfs] [PATCH libnbd 1/2] nbdcopy: Do not use trim for zeroing
The patch doesn't compile for me: pipe-ops.c:121:17: error: initialization of ?_Bool (*)(struct rw *, uint64_t, uint64_t, _Bool)? {aka ?_Bool (*)(struct rw *, long unsigned int, long unsigned int, _Bool)?} from incompatible pointer type ?_Bool (*)(struct rw *, uint64_t, uint64_t)? {aka ?_Bool (*)(struct rw *, long unsigned int, long unsigned int)?} [-Werror=incompatible-pointer-types] 121 | .synch_zero = pipe_synch_trim_zero, | ^~~~~~~~~~~~~~~~~~~~ pipe-ops.c:121:17: note: (near initialization for ?pipe_ops.synch_zero?) pipe-ops.c:133:18: error: initialization of ?_Bool (*)(struct rw *, struct command *, nbd_completion_callback, _Bool)? from incompatible pointer type ?_Bool (*)(struct rw *, struct command *, nbd_completion_callback)? [-Werror=incompatible-pointer-types] 133 | .asynch_zero = pipe_asynch_trim_zero, | ^~~~~~~~~~~~~~~~~~~~~ pipe-ops.c:133:18: note: (near initialization for ?pipe_ops.asynch_zero?) More comments below. On Sun, Feb 21, 2021 at 03:09:05AM +0200, Nir Soffer wrote:> diff --git a/copy/file-ops.c b/copy/file-ops.c > index 2a239d0..d0b9447 100644 > --- a/copy/file-ops.c > +++ b/copy/file-ops.c > @@ -100,10 +100,9 @@ file_synch_write (struct rw *rw, > } > > static bool > -file_synch_trim (struct rw *rw, uint64_t offset, uint64_t count) > +file_punch_hole(int fd, uint64_t offset, uint64_t count)(Style) missing space before '(' ...> { > #ifdef FALLOC_FL_PUNCH_HOLE > - int fd = rw->u.local.fd; > int r; > > r = fallocate (fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, > @@ -113,17 +112,14 @@ file_synch_trim (struct rw *rw, uint64_t offset, uint64_t count) > exit (EXIT_FAILURE); > } > return true; > -#else /* !FALLOC_FL_PUNCH_HOLE */ > - return false; > #endif > + return false; > } > > static bool > -file_synch_zero (struct rw *rw, uint64_t offset, uint64_t count) > +file_zero_range(int fd, uint64_t offset, uint64_t count)... and here> { > - if (S_ISREG (rw->u.local.stat.st_mode)) { > #ifdef FALLOC_FL_ZERO_RANGE > - int fd = rw->u.local.fd; > int r; > > r = fallocate (fd, FALLOC_FL_ZERO_RANGE, offset, count); > @@ -133,11 +129,13 @@ file_synch_zero (struct rw *rw, uint64_t offset, uint64_t count) > } > return true; > #endif > - } > - else if (S_ISBLK (rw->u.local.stat.st_mode) && > - IS_ALIGNED (offset | count, rw->u.local.sector_size)) { > + return false; > +} > + > +static bool > +file_zeroout(int fd, uint64_t offset, uint64_t count)... and here> +{ > #ifdef BLKZEROOUT > - int fd = rw->u.local.fd; > int r; > uint64_t range[2] = {offset, count}; > > @@ -148,6 +146,31 @@ file_synch_zero (struct rw *rw, uint64_t offset, uint64_t count) > } > return true; > #endif > + return false; > +} > + > +static bool > +file_synch_trim (struct rw *rw, uint64_t offset, uint64_t count) > +{ > + return file_punch_hole(rw->u.local.fd, offset, count); > +} > + > +static bool > +file_synch_zero (struct rw *rw, uint64_t offset, uint64_t count, bool allocate)... and overlong line.> +{ > + int fd = rw->u.local.fd; > + > + if (S_ISREG (rw->u.local.stat.st_mode)) { > + if (allocate) { > + return file_zero_range (fd, offset, count); > + } else { > + return file_punch_hole (fd, offset, count); > + } > + } > + else if (S_ISBLK (rw->u.local.stat.st_mode) && > + IS_ALIGNED (offset | count, rw->u.local.sector_size)) { > + /* Always allocate, discard and gurantee zeroing. */ > + return file_zeroout (fd, offset, count); > } > return false;In the S_ISBLK && not aligned case, is giving up OK?> diff --git a/copy/nbdcopy.h b/copy/nbdcopy.h > index 69fac2a..21d09bf 100644 > --- a/copy/nbdcopy.h > +++ b/copy/nbdcopy.h > @@ -134,7 +134,8 @@ struct rw_ops { > bool (*synch_trim) (struct rw *rw, uint64_t offset, uint64_t count); > > /* Synchronously zero. If not possible, returns false. */ > - bool (*synch_zero) (struct rw *rw, uint64_t offset, uint64_t count); > + bool (*synch_zero) (struct rw *rw, uint64_t offset, uint64_t count, > + bool allocate);After this change ops->synch_trim and ops->asynch_trim are no longer used, so I guess they should be removed completely from the code? Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com virt-builder quickly builds VMs from scratch http://libguestfs.org/virt-builder.1.html