Nir Soffer
2021-Feb-22 19:34 UTC
[Libguestfs] [PATCH libnbd v2 2/3] copy: Do not use trim for zeroing
Current code uses NBD_CMD_TRIM for writing zeroes without allocating space (--allocate not specified). This incorrect and can lead to corrupting the target image. NBD manual warns about NBD_CMD_TRIM: After issuing this command, a client MUST NOT make any assumptions about the contents of the export affected by this command, until overwriting it again with NBD_CMD_WRITE or NBD_CMD_WRITE_ZEROES Add allocate flag to the zero operation, to allow zeroing by punching hole (default) or zeroing by allocating space. nbd-ops use NBD_CMD_WRITE_ZEROES with or without NBD_FLAG_NO_HOLE flag. file-ops use fallocate with PUNCH_HOLE or ZERO_RANGE flag. The copy-sparse tests was modified to require zero operation with may_trim flag. Signed-off-by: Nir Soffer <nsoffer at redhat.com> --- copy/copy-sparse.sh | 66 ++++++++++++++--------------- copy/file-ops.c | 82 +++++++++++++++++++++++-------------- copy/multi-thread-copying.c | 20 +++------ copy/nbd-ops.c | 11 ++--- copy/nbdcopy.h | 5 ++- copy/null-ops.c | 4 +- copy/synch-copying.c | 8 ++-- 7 files changed, 106 insertions(+), 90 deletions(-) diff --git a/copy/copy-sparse.sh b/copy/copy-sparse.sh index 846767b..c43b41a 100755 --- a/copy/copy-sparse.sh +++ b/copy/copy-sparse.sh @@ -33,7 +33,7 @@ cleanup_fn rm -f $out # Copy from a sparse data disk to an nbdkit-eval-plugin instance which # is logging everything. This allows us to see exactly what nbdcopy -# is writing, to ensure it is writing and trimming the target as +# is writing, to ensure it is writing and zeroing the target as # expected. $VG nbdcopy -S 0 -- \ [ nbdkit --exit-with-parent data data=' @@ -60,38 +60,38 @@ if [ "$(cat $out)" != "pwrite 1 4294967296 pwrite 32768 0 pwrite 32768 1073709056 pwrite 32768 4294934528 -trim 134184960 32768 -trim 134184960 4160749568 -trim 134184960 939524096 -trim 134217728 1073741824 -trim 134217728 1207959552 -trim 134217728 134217728 -trim 134217728 1342177280 -trim 134217728 1476395008 -trim 134217728 1610612736 -trim 134217728 1744830464 -trim 134217728 1879048192 -trim 134217728 2013265920 -trim 134217728 2147483648 -trim 134217728 2281701376 -trim 134217728 2415919104 -trim 134217728 2550136832 -trim 134217728 268435456 -trim 134217728 2684354560 -trim 134217728 2818572288 -trim 134217728 2952790016 -trim 134217728 3087007744 -trim 134217728 3221225472 -trim 134217728 3355443200 -trim 134217728 3489660928 -trim 134217728 3623878656 -trim 134217728 3758096384 -trim 134217728 3892314112 -trim 134217728 402653184 -trim 134217728 4026531840 -trim 134217728 536870912 -trim 134217728 671088640 -trim 134217728 805306368" ]; then +zero 134184960 32768 may_trim +zero 134184960 4160749568 may_trim +zero 134184960 939524096 may_trim +zero 134217728 1073741824 may_trim +zero 134217728 1207959552 may_trim +zero 134217728 1342177280 may_trim +zero 134217728 134217728 may_trim +zero 134217728 1476395008 may_trim +zero 134217728 1610612736 may_trim +zero 134217728 1744830464 may_trim +zero 134217728 1879048192 may_trim +zero 134217728 2013265920 may_trim +zero 134217728 2147483648 may_trim +zero 134217728 2281701376 may_trim +zero 134217728 2415919104 may_trim +zero 134217728 2550136832 may_trim +zero 134217728 2684354560 may_trim +zero 134217728 268435456 may_trim +zero 134217728 2818572288 may_trim +zero 134217728 2952790016 may_trim +zero 134217728 3087007744 may_trim +zero 134217728 3221225472 may_trim +zero 134217728 3355443200 may_trim +zero 134217728 3489660928 may_trim +zero 134217728 3623878656 may_trim +zero 134217728 3758096384 may_trim +zero 134217728 3892314112 may_trim +zero 134217728 4026531840 may_trim +zero 134217728 402653184 may_trim +zero 134217728 536870912 may_trim +zero 134217728 671088640 may_trim +zero 134217728 805306368 may_trim" ]; then echo "$0: output does not match expected" exit 1 fi diff --git a/copy/file-ops.c b/copy/file-ops.c index 6d098c2..d22273a 100644 --- a/copy/file-ops.c +++ b/copy/file-ops.c @@ -221,11 +221,9 @@ file_synch_write (struct rw *rw, } static bool -file_synch_trim (struct rw *rw, uint64_t offset, uint64_t count) +file_punch_hole(int fd, uint64_t offset, uint64_t count) { #ifdef FALLOC_FL_PUNCH_HOLE - struct rw_file *rwf = (struct rw_file *)rw; - int fd = rwf->fd; int r; r = fallocate (fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, @@ -235,42 +233,66 @@ file_synch_trim (struct rw *rw, uint64_t offset, uint64_t count) exit (EXIT_FAILURE); } return true; -#else /* !FALLOC_FL_PUNCH_HOLE */ - return false; #endif + return false; } static bool -file_synch_zero (struct rw *rw, uint64_t offset, uint64_t count) +file_zero_range(int fd, uint64_t offset, uint64_t count) { - struct rw_file *rwf = (struct rw_file *)rw; - - if (! rwf->is_block) { #ifdef FALLOC_FL_ZERO_RANGE - int fd = rwf->fd; - int r; + int r; - r = fallocate (fd, FALLOC_FL_ZERO_RANGE, offset, count); - if (r == -1) { - perror ("fallocate: FALLOC_FL_ZERO_RANGE"); - exit (EXIT_FAILURE); - } - return true; -#endif + r = fallocate (fd, FALLOC_FL_ZERO_RANGE, offset, count); + if (r == -1) { + perror ("fallocate: FALLOC_FL_ZERO_RANGE"); + exit (EXIT_FAILURE); } - else if (rwf->is_block && IS_ALIGNED (offset | count, rwf->sector_size)) { + return true; +#endif + return false; +} + +static bool +file_zeroout(int fd, uint64_t offset, uint64_t count) +{ #ifdef BLKZEROOUT - int fd = rwf->fd; - int r; - uint64_t range[2] = {offset, count}; + int r; + uint64_t range[2] = {offset, count}; - r = ioctl (fd, BLKZEROOUT, &range); - if (r == -1) { - perror ("ioctl: BLKZEROOUT"); - exit (EXIT_FAILURE); - } - return true; + r = ioctl (fd, BLKZEROOUT, &range); + if (r == -1) { + perror ("ioctl: BLKZEROOUT"); + exit (EXIT_FAILURE); + } + return true; #endif + return false; +} + +static bool +file_synch_trim (struct rw *rw, uint64_t offset, uint64_t count) +{ + struct rw_file *rwf = (struct rw_file *)rw; + + return file_punch_hole (rwf->fd, offset, count); +} + +static bool +file_synch_zero (struct rw *rw, uint64_t offset, uint64_t count, bool allocate) +{ + struct rw_file *rwf = (struct rw_file *)rw; + + if (!rwf->is_block) { + if (allocate) { + return file_zero_range (rwf->fd, offset, count); + } else { + return file_punch_hole (rwf->fd, offset, count); + } + } + else if (IS_ALIGNED (offset | count, rwf->sector_size)) { + /* Always allocate, discard and gurantee zeroing. */ + return file_zeroout (rwf->fd, offset, count); } return false; @@ -320,9 +342,9 @@ file_asynch_trim (struct rw *rw, struct command *command, static bool file_asynch_zero (struct rw *rw, struct command *command, - nbd_completion_callback cb) + nbd_completion_callback cb, bool allocate) { - if (!file_synch_zero (rw, command->offset, command->slice.len)) + if (!file_synch_zero (rw, command->offset, command->slice.len, allocate)) return false; errno = 0; if (cb.callback (cb.user_data, &errno) == -1) { diff --git a/copy/multi-thread-copying.c b/copy/multi-thread-copying.c index 4f57054..768bca1 100644 --- a/copy/multi-thread-copying.c +++ b/copy/multi-thread-copying.c @@ -487,11 +487,12 @@ finished_read (void *vp, int *error) * --destination-is-zero: * do nothing * - * --allocated: we must write zeroes either using an efficient + * --allocated: write zeroes allocating space using an efficient * zeroing command or writing a command of zeroes * - * (neither flag) try trimming if supported, else write zeroes - * as above + * (neither flag) write zeroes punching a hole using an efficient + * zeroing command or fallback to writing a command + * of zeroes. * * This takes over ownership of the command and frees it eventually. */ @@ -503,22 +504,13 @@ fill_dst_range_with_zeroes (struct command *command) if (destination_is_zero) goto free_and_return; - if (!allocated) { - /* Try trimming. */ - if (dst->ops->asynch_trim (dst, command, - (nbd_completion_callback) { - .callback = free_command, - .user_data = command, - })) - return; - } - /* Try efficient zeroing. */ if (dst->ops->asynch_zero (dst, command, (nbd_completion_callback) { .callback = free_command, .user_data = command, - })) + }, + allocated)) return; /* Fall back to loop writing zeroes. This is going to be slow diff --git a/copy/nbd-ops.c b/copy/nbd-ops.c index f001d02..388bd53 100644 --- a/copy/nbd-ops.c +++ b/copy/nbd-ops.c @@ -277,15 +277,16 @@ nbd_ops_synch_trim (struct rw *rw, uint64_t offset, uint64_t count) } static bool -nbd_ops_synch_zero (struct rw *rw, uint64_t offset, uint64_t count) +nbd_ops_synch_zero (struct rw *rw, uint64_t offset, uint64_t count, + bool allocate) { struct rw_nbd *rwn = (struct rw_nbd *) rw; if (!rwn->can_zero) return false; - if (nbd_zero (rwn->handles.ptr[0], - count, offset, LIBNBD_CMD_FLAG_NO_HOLE) == -1) { + if (nbd_zero (rwn->handles.ptr[0], count, offset, + allocate ? LIBNBD_CMD_FLAG_NO_HOLE : 0) == -1) { fprintf (stderr, "%s: %s\n", rw->name, nbd_get_error ()); exit (EXIT_FAILURE); } @@ -346,7 +347,7 @@ nbd_ops_asynch_trim (struct rw *rw, struct command *command, static bool nbd_ops_asynch_zero (struct rw *rw, struct command *command, - nbd_completion_callback cb) + nbd_completion_callback cb, bool allocate) { struct rw_nbd *rwn = (struct rw_nbd *) rw; @@ -357,7 +358,7 @@ nbd_ops_asynch_zero (struct rw *rw, struct command *command, if (nbd_aio_zero (rwn->handles.ptr[command->index], command->slice.len, command->offset, - cb, LIBNBD_CMD_FLAG_NO_HOLE) == -1) { + cb, allocate ? LIBNBD_CMD_FLAG_NO_HOLE : 0) == -1) { fprintf (stderr, "%s: %s\n", rw->name, nbd_get_error ()); exit (EXIT_FAILURE); } diff --git a/copy/nbdcopy.h b/copy/nbdcopy.h index 97e3b49..d553399 100644 --- a/copy/nbdcopy.h +++ b/copy/nbdcopy.h @@ -152,7 +152,8 @@ struct rw_ops { bool (*synch_trim) (struct rw *rw, uint64_t offset, uint64_t count); /* Synchronously zero. If not possible, returns false. */ - bool (*synch_zero) (struct rw *rw, uint64_t offset, uint64_t count); + bool (*synch_zero) (struct rw *rw, uint64_t offset, uint64_t count, + bool allocate); /* Asynchronous I/O operations. These start the operation and call * 'cb' on completion. @@ -180,7 +181,7 @@ struct rw_ops { * returns false. */ bool (*asynch_zero) (struct rw *rw, struct command *command, - nbd_completion_callback cb); + nbd_completion_callback cb, bool allocate); /* Number of asynchronous commands in flight for a particular thread. */ unsigned (*in_flight) (struct rw *rw, uintptr_t index); diff --git a/copy/null-ops.c b/copy/null-ops.c index b75ca1f..aed2a03 100644 --- a/copy/null-ops.c +++ b/copy/null-ops.c @@ -105,7 +105,7 @@ null_synch_trim (struct rw *rw, uint64_t offset, uint64_t count) } static bool -null_synch_zero (struct rw *rw, uint64_t offset, uint64_t count) +null_synch_zero (struct rw *rw, uint64_t offset, uint64_t count, bool allocate) { return true; } @@ -144,7 +144,7 @@ null_asynch_trim (struct rw *rw, struct command *command, static bool null_asynch_zero (struct rw *rw, struct command *command, - nbd_completion_callback cb) + nbd_completion_callback cb, bool allocate) { errno = 0; if (cb.callback (cb.user_data, &errno) == -1) { diff --git a/copy/synch-copying.c b/copy/synch-copying.c index 985f005..17bda16 100644 --- a/copy/synch-copying.c +++ b/copy/synch-copying.c @@ -69,10 +69,10 @@ synch_copying (void) assert (exts.ptr[i].length <= count); if (exts.ptr[i].zero) { - if (!dst->ops->synch_trim (dst, offset, exts.ptr[i].length) && - !dst->ops->synch_zero (dst, offset, exts.ptr[i].length)) { - /* If neither trimming nor efficient zeroing are possible, - * write zeroes the hard way. + if (!dst->ops->synch_zero (dst, offset, exts.ptr[i].length, false) && + !dst->ops->synch_zero (dst, offset, exts.ptr[i].length, true)) { + /* If efficient zeroing (punching a hole or allocating + * space) are possible, write zeroes the hard way. */ memset (buf, 0, exts.ptr[i].length); dst->ops->synch_write (dst, buf, exts.ptr[i].length, offset); -- 2.26.2
Richard W.M. Jones
2021-Feb-22 19:44 UTC
[Libguestfs] [PATCH libnbd v2 2/3] copy: Do not use trim for zeroing
This one still fails with: gcc -DHAVE_CONFIG_H -I. -I.. -I../include -I../common/include -I../common/utils -Wall -Wvla -Werror -pthread -O2 -flto=auto -ffat-lto-objects -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -fstack-protector-strong -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -O3 -mavx2 -MT nbdcopy-pipe-ops.o -MD -MP -MF .deps/nbdcopy-pipe-ops.Tpo -c -o nbdcopy-pipe-ops.o `test -f 'pipe-ops.c' || echo './'`pipe-ops.c pipe-ops.c:177:17: error: initialization of ?_Bool (*)(struct rw *, uint64_t, uint64_t, _Bool)? {aka ?_Bool (*)(struct rw *, long unsigned int, long unsigned int, _Bool)?} from incompatible pointer type ?_Bool (*)(struct rw *, uint64_t, uint64_t)? {aka ?_Bool (*)(struct rw *, long unsigned int, long unsigned int)?} [-Werror=incompatible-pointer-types] 177 | .synch_zero = pipe_synch_trim_zero, | ^~~~~~~~~~~~~~~~~~~~ pipe-ops.c:177:17: note: (near initialization for ?pipe_ops.synch_zero?) pipe-ops.c:189:18: error: initialization of ?_Bool (*)(struct rw *, struct command *, nbd_completion_callback, _Bool)? from incompatible pointer type ?_Bool (*)(struct rw *, struct command *, nbd_completion_callback)? [-Werror=incompatible-pointer-types] 189 | .asynch_zero = pipe_asynch_trim_zero, | ^~~~~~~~~~~~~~~~~~~~~ pipe-ops.c:189:18: note: (near initialization for ?pipe_ops.asynch_zero?) cc1: all warnings being treated as errors Did you post the wrong version of this patch? I notice it still has the *_trim methods. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com virt-p2v converts physical machines to virtual machines. Boot with a live CD or over the network (PXE) and turn machines into KVM guests. http://libguestfs.org/virt-v2v