Richard W.M. Jones
2021-Feb-17 12:57 UTC
[Libguestfs] [PATCH nbdkit] cache, cow: Do not round size down.
Both the cache and cow filters have the annoying behaviour that the size of the underlying plugin is rounded down to the block size used by the filters (ie. 4K). This is especially visible for single sector disk images: $ ./nbdkit memory size=512 --filter=cow --run 'nbdinfo --size $uri' 0 $ ./nbdkit memory size=512 --filter=cache --run 'nbdinfo --size $uri' 0 but affects all users of these filters somewhat randomly and unexpectedly. (For example I had a GPT partitioned disk image from a customer which did not have a 4K aligned size, which was unexpectedly "corrupted" when I tried to open it using the cow filter - the reason for the corruption was the backup partition table at the end of the disk being truncated.) Getting rid of this assumption is awkward: the general idea is that we round up the size of the backing file / bitmap by a full block. But whenever we are asked to read or write to the underlying plugin we handle the tail case carefully. This also tests various corner cases. --- filters/cache/nbdkit-cache-filter.pod | 9 +--- filters/cow/nbdkit-cow-filter.pod | 11 +---- tests/Makefile.am | 4 ++ filters/cache/blk.c | 54 +++++++++++++++++++--- filters/cache/cache.c | 12 ++--- filters/cow/blk.c | 49 +++++++++++++++++--- filters/cow/cow.c | 23 +++++++--- tests/test-cache-unaligned.sh | 66 +++++++++++++++++++++++++++ tests/test-cow-unaligned.sh | 56 +++++++++++++++++++++++ 9 files changed, 239 insertions(+), 45 deletions(-) diff --git a/filters/cache/nbdkit-cache-filter.pod b/filters/cache/nbdkit-cache-filter.pod index 3601dcd5..34fd0b29 100644 --- a/filters/cache/nbdkit-cache-filter.pod +++ b/filters/cache/nbdkit-cache-filter.pod @@ -25,12 +25,6 @@ does not have effective caching, or (with C<cache=unsafe>) to defeat flush requests from the client (which is unsafe and can cause data loss, as the name suggests). -Note that the use of this filter rounds the image size down to a -multiple of the caching granularity (the larger of 4096 or the -C<f_bsize> field of L<fstatvfs(3)>), to ease the implementation. If -you need to round the image size up instead to access the last few -bytes, combine this filter with L<nbdkit-truncate-filter(1)>. - This filter only caches image contents. To cache image metadata, use L<nbdkit-cacheextents-filter(1)> between this filter and the plugin. To accelerate sequential reads, use L<nbdkit-readahead-filter(1)> @@ -168,7 +162,6 @@ L<nbdkit(1)>, L<nbdkit-file-plugin(1)>, L<nbdkit-cacheextents-filter(1)>, L<nbdkit-readahead-filter(1)>, -L<nbdkit-truncate-filter(1)>, L<nbdkit-filter(3)>, L<qemu-img(1)>. @@ -180,4 +173,4 @@ Richard W.M. Jones =head1 COPYRIGHT -Copyright (C) 2018-2020 Red Hat Inc. +Copyright (C) 2018-2021 Red Hat Inc. diff --git a/filters/cow/nbdkit-cow-filter.pod b/filters/cow/nbdkit-cow-filter.pod index 4d5ae856..64df3fbd 100644 --- a/filters/cow/nbdkit-cow-filter.pod +++ b/filters/cow/nbdkit-cow-filter.pod @@ -31,14 +31,6 @@ that away. It also allows us to create diffs, see below. The plugin is opened read-only (as if the I<-r> flag was passed), but you should B<not> pass the I<-r> flag to nbdkit. -=item * - -Note that the use of this filter rounds the image size down to a -multiple of the caching granularity (4096), to ease the -implementation. If you need to round the image size up instead to -access the last few bytes, combine this filter with -L<nbdkit-truncate-filter(1)>. - =back Limitations of the filter include: @@ -140,7 +132,6 @@ C<nbdkit-cow-filter> first appeared in nbdkit 1.2. L<nbdkit(1)>, L<nbdkit-file-plugin(1)>, -L<nbdkit-truncate-filter(1)>, L<nbdkit-xz-filter(1)>, L<nbdkit-filter(3)>, L<qemu-img(1)>. @@ -153,4 +144,4 @@ Richard W.M. Jones =head1 COPYRIGHT -Copyright (C) 2018 Red Hat Inc. +Copyright (C) 2018-2021 Red Hat Inc. diff --git a/tests/Makefile.am b/tests/Makefile.am index 42e842b2..70898f20 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1353,11 +1353,13 @@ TESTS += \ test-cache.sh \ test-cache-on-read.sh \ test-cache-max-size.sh \ + test-cache-unaligned.sh \ $(NULL) EXTRA_DIST += \ test-cache.sh \ test-cache-on-read.sh \ test-cache-max-size.sh \ + test-cache-unaligned.sh \ $(NULL) # cacheextents filter test. @@ -1380,6 +1382,7 @@ TESTS += \ test-cow.sh \ test-cow-extents1.sh \ test-cow-extents2.sh \ + test-cow-unaligned.sh \ $(NULL) endif TESTS += test-cow-null.sh @@ -1388,6 +1391,7 @@ EXTRA_DIST += \ test-cow-extents1.sh \ test-cow-extents2.sh \ test-cow-null.sh \ + test-cow-unaligned.sh \ $(NULL) # delay filter tests. diff --git a/filters/cache/blk.c b/filters/cache/blk.c index dc1bcc57..b9af6909 100644 --- a/filters/cache/blk.c +++ b/filters/cache/blk.c @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2018-2020 Red Hat Inc. + * Copyright (C) 2018-2021 Red Hat Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -54,6 +54,7 @@ #include "bitmap.h" #include "minmax.h" +#include "rounding.h" #include "utils.h" #include "cache.h" @@ -165,18 +166,25 @@ blk_free (void) lru_free (); } +/* Because blk_set_size is called before the other blk_* functions + * this should be set to the true size before we need it. + */ +static uint64_t size = 0; + int blk_set_size (uint64_t new_size) { - if (bitmap_resize (&bm, new_size) == -1) + size = new_size; + + if (bitmap_resize (&bm, size) == -1) return -1; - if (ftruncate (fd, new_size) == -1) { + if (ftruncate (fd, ROUND_UP (size, blksize)) == -1) { nbdkit_error ("ftruncate: %m"); return -1; } - if (lru_set_size (new_size) == -1) + if (lru_set_size (size) == -1) return -1; return 0; @@ -199,9 +207,22 @@ blk_read (struct nbdkit_next_ops *next_ops, void *nxdata, "unknown"); if (state == BLOCK_NOT_CACHED) { /* Read underlying plugin. */ - if (next_ops->pread (nxdata, block, blksize, offset, 0, err) == -1) + unsigned n = blksize, tail = 0; + + if (offset + n > size) { + tail = offset + n - size; + n -= tail; + } + + if (next_ops->pread (nxdata, block, n, offset, 0, err) == -1) return -1; + /* Normally we're reading whole blocks, but at the very end of the + * file we might read a partial block. Deal with that case by + * zeroing the tail. + */ + memset (block + n, 0, tail); + /* If cache-on-read, copy the block to the cache. */ if (cache_on_read) { nbdkit_debug ("cache: cache-on-read block %" PRIu64 @@ -247,9 +268,22 @@ blk_cache (struct nbdkit_next_ops *next_ops, void *nxdata, if (state == BLOCK_NOT_CACHED) { /* Read underlying plugin, copy to cache regardless of cache-on-read. */ - if (next_ops->pread (nxdata, block, blksize, offset, 0, err) == -1) + unsigned n = blksize, tail = 0; + + if (offset + n > size) { + tail = offset + n - size; + n -= tail; + } + + if (next_ops->pread (nxdata, block, n, offset, 0, err) == -1) return -1; + /* Normally we're reading whole blocks, but at the very end of the + * file we might read a partial block. Deal with that case by + * zeroing the tail. + */ + memset (block + n, 0, tail); + nbdkit_debug ("cache: cache block %" PRIu64 " (offset %" PRIu64 ")", blknum, (uint64_t) offset); @@ -281,6 +315,12 @@ blk_writethrough (struct nbdkit_next_ops *next_ops, void *nxdata, int *err) { off_t offset = blknum * blksize; + unsigned n = blksize, tail = 0; + + if (offset + n > size) { + tail = offset + n - size; + n -= tail; + } reclaim (fd, &bm); @@ -293,7 +333,7 @@ blk_writethrough (struct nbdkit_next_ops *next_ops, void *nxdata, return -1; } - if (next_ops->pwrite (nxdata, block, blksize, offset, flags, err) == -1) + if (next_ops->pwrite (nxdata, block, n, offset, flags, err) == -1) return -1; bitmap_set_blk (&bm, blknum, BLOCK_CLEAN); diff --git a/filters/cache/cache.c b/filters/cache/cache.c index 91dcc43d..b979f256 100644 --- a/filters/cache/cache.c +++ b/filters/cache/cache.c @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2018-2020 Red Hat Inc. + * Copyright (C) 2018-2021 Red Hat Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -201,9 +201,7 @@ cache_config_complete (nbdkit_next_config_complete *next, void *nxdata) return next (nxdata); } -/* Get the file size; round it down to cache granularity before - * setting cache size. - */ +/* Get the file size, set the cache size. */ static int64_t cache_get_size (struct nbdkit_next_ops *next_ops, void *nxdata, void *handle) @@ -216,7 +214,6 @@ cache_get_size (struct nbdkit_next_ops *next_ops, void *nxdata, return -1; nbdkit_debug ("cache: underlying file size: %" PRIi64, size); - size = ROUND_DOWN (size, blksize); ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock); r = blk_set_size (size); @@ -226,8 +223,9 @@ cache_get_size (struct nbdkit_next_ops *next_ops, void *nxdata, return size; } -/* Force an early call to cache_get_size, consequently truncating the - * cache to the correct size. +/* Force an early call to cache_get_size because we have to set the + * backing file size and bitmap size before any other read or write + * calls. */ static int cache_prepare (struct nbdkit_next_ops *next_ops, void *nxdata, diff --git a/filters/cow/blk.c b/filters/cow/blk.c index ae0db1fe..51ba91c4 100644 --- a/filters/cow/blk.c +++ b/filters/cow/blk.c @@ -94,6 +94,7 @@ #include "bitmap.h" #include "fdatasync.h" +#include "rounding.h" #include "pread.h" #include "pwrite.h" #include "utils.h" @@ -176,14 +177,21 @@ blk_free (void) bitmap_free (&bm); } +/* Because blk_set_size is called before the other blk_* functions + * this should be set to the true size before we need it. + */ +static uint64_t size = 0; + /* Allocate or resize the overlay file and bitmap. */ int blk_set_size (uint64_t new_size) { - if (bitmap_resize (&bm, new_size) == -1) + size = new_size; + + if (bitmap_resize (&bm, size) == -1) return -1; - if (ftruncate (fd, new_size) == -1) { + if (ftruncate (fd, ROUND_UP (size, BLKSIZE)) == -1) { nbdkit_error ("ftruncate: %m"); return -1; } @@ -216,8 +224,24 @@ blk_read (struct nbdkit_next_ops *next_ops, void *nxdata, nbdkit_debug ("cow: blk_read block %" PRIu64 " (offset %" PRIu64 ") is %s", blknum, (uint64_t) offset, state_to_string (state)); - if (state == BLOCK_NOT_ALLOCATED) /* Read underlying plugin. */ - return next_ops->pread (nxdata, block, BLKSIZE, offset, 0, err); + if (state == BLOCK_NOT_ALLOCATED) { /* Read underlying plugin. */ + unsigned n = BLKSIZE, tail = 0; + + if (offset + n > size) { + tail = offset + n - size; + n -= tail; + } + + if (next_ops->pread (nxdata, block, n, offset, 0, err) == -1) + return -1; + + /* Normally we're reading whole blocks, but at the very end of the + * file we might read a partial block. Deal with that case by + * zeroing the tail. + */ + memset (block + n, 0, tail); + return 0; + } else if (state == BLOCK_ALLOCATED) { /* Read overlay. */ if (pread (fd, block, BLKSIZE, offset) == -1) { *err = errno; @@ -238,6 +262,12 @@ blk_cache (struct nbdkit_next_ops *next_ops, void *nxdata, { off_t offset = blknum * BLKSIZE; enum bm_entry state = bitmap_get_blk (&bm, blknum, BLOCK_NOT_ALLOCATED); + unsigned n = BLKSIZE, tail = 0; + + if (offset + n > size) { + tail = offset + n - size; + n -= tail; + } nbdkit_debug ("cow: blk_cache block %" PRIu64 " (offset %" PRIu64 ") is %s", blknum, (uint64_t) offset, state_to_string (state)); @@ -258,9 +288,16 @@ blk_cache (struct nbdkit_next_ops *next_ops, void *nxdata, if (mode == BLK_CACHE_IGNORE) return 0; if (mode == BLK_CACHE_PASSTHROUGH) - return next_ops->cache (nxdata, BLKSIZE, offset, 0, err); - if (next_ops->pread (nxdata, block, BLKSIZE, offset, 0, err) == -1) + return next_ops->cache (nxdata, n, offset, 0, err); + + if (next_ops->pread (nxdata, block, n, offset, 0, err) == -1) return -1; + /* Normally we're reading whole blocks, but at the very end of the + * file we might read a partial block. Deal with that case by + * zeroing the tail. + */ + memset (block + n, 0, tail); + if (mode == BLK_CACHE_COW) { if (pwrite (fd, block, BLKSIZE, offset) == -1) { *err = errno; diff --git a/filters/cow/cow.c b/filters/cow/cow.c index 75d3b907..93e10f24 100644 --- a/filters/cow/cow.c +++ b/filters/cow/cow.c @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2018-2020 Red Hat Inc. + * Copyright (C) 2018-2021 Red Hat Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -103,9 +103,7 @@ cow_open (nbdkit_next_open *next, void *nxdata, return NBDKIT_HANDLE_NOT_NEEDED; } -/* Get the file size; round it down to overlay granularity before - * setting overlay size. - */ +/* Get the file size, set the cache size. */ static int64_t cow_get_size (struct nbdkit_next_ops *next_ops, void *nxdata, void *handle) @@ -118,7 +116,6 @@ cow_get_size (struct nbdkit_next_ops *next_ops, void *nxdata, return -1; nbdkit_debug ("cow: underlying file size: %" PRIi64, size); - size = ROUND_DOWN (size, BLKSIZE); ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock); r = blk_set_size (size); @@ -128,8 +125,9 @@ cow_get_size (struct nbdkit_next_ops *next_ops, void *nxdata, return size; } -/* Force an early call to cow_get_size, consequently truncating the - * overlay to the correct size. +/* Force an early call to cow_get_size because we have to set the + * backing file size and bitmap size before any other read or write + * calls. */ static int cow_prepare (struct nbdkit_next_ops *next_ops, void *nxdata, @@ -627,6 +625,7 @@ cow_extents (struct nbdkit_next_ops *next_ops, void *nxdata, uint64_t range_offset = offset; uint32_t range_count = 0; size_t i; + int64_t size; /* Asking the plugin for a single block of extents is not * efficient for some plugins (eg. VDDK) so ask for as much data @@ -643,6 +642,16 @@ cow_extents (struct nbdkit_next_ops *next_ops, void *nxdata, if (present) break; } + /* Don't ask for extent data beyond the end of the plugin. */ + size = next_ops->get_size (nxdata); + if (size == -1) + return -1; + + if (range_offset + range_count > size) { + unsigned tail = range_offset + range_count - size; + range_count -= tail; + } + CLEANUP_EXTENTS_FREE struct nbdkit_extents *extents2 nbdkit_extents_full (next_ops, nxdata, range_count, range_offset, flags, err); diff --git a/tests/test-cache-unaligned.sh b/tests/test-cache-unaligned.sh new file mode 100755 index 00000000..80b3d128 --- /dev/null +++ b/tests/test-cache-unaligned.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# nbdkit +# Copyright (C) 2018-2021 Red Hat Inc. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of Red Hat nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +source ./functions.sh +set -e +set -x + +requires_filter cache +requires_nbdsh_uri + +sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX) +files="cache-unaligned.img $sock cache-unaligned.pid" +rm -f $files +cleanup_fn rm -f $files + +# Create an empty base image which is not a multiple of 4K. +truncate -s 130000 cache-unaligned.img + +# Run nbdkit with the caching filter. +start_nbdkit -P cache-unaligned.pid -U $sock --filter=cache \ + file cache-unaligned.img + +nbdsh --connect "nbd+unix://?socket=$sock" \ + -c ' +# Write some pattern data to the overlay and check it reads back OK. +buf = b"abcdefghijklm" * 10000 +h.pwrite(buf, 0) +buf2 = h.pread(130000, 0) +assert buf == buf2 + +# Flushing should write through to the underlying file. +h.flush() + +with open("cache-unaligned.img", "rb") as file: + buf2 = file.read(130000) + assert buf == buf2 +' diff --git a/tests/test-cow-unaligned.sh b/tests/test-cow-unaligned.sh new file mode 100755 index 00000000..d89b84b9 --- /dev/null +++ b/tests/test-cow-unaligned.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# nbdkit +# Copyright (C) 2018-2021 Red Hat Inc. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of Red Hat nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +source ./functions.sh +set -e +set -x + +requires_filter cow +requires_nbdsh_uri + +sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX) +files="$sock cow-unaligned.pid" +rm -f $files +cleanup_fn rm -f $files + +# Run nbdkit with the cow filter and a size which is not a multiple of +# the cow filter block size (4K). +start_nbdkit -P cow-unaligned.pid -U $sock --filter=cow memory 130000 + +nbdsh --connect "nbd+unix://?socket=$sock" \ + -c ' +# Write some pattern data to the overlay and check it reads back OK. +buf = b"abcdefghijklm" * 10000 +h.pwrite(buf, 0) +buf2 = h.pread(130000, 0) +assert buf == buf2 +' -- 2.29.0.rc2
Eric Blake
2021-Feb-17 14:57 UTC
[Libguestfs] [PATCH nbdkit] cache, cow: Do not round size down.
On 2/17/21 6:57 AM, Richard W.M. Jones wrote:> Both the cache and cow filters have the annoying behaviour that the > size of the underlying plugin is rounded down to the block size used > by the filters (ie. 4K). This is especially visible for single sector > disk images: > > $ ./nbdkit memory size=512 --filter=cow --run 'nbdinfo --size $uri' > 0 > $ ./nbdkit memory size=512 --filter=cache --run 'nbdinfo --size $uri' > 0 > > but affects all users of these filters somewhat randomly and > unexpectedly. (For example I had a GPT partitioned disk image from a > customer which did not have a 4K aligned size, which was unexpectedly > "corrupted" when I tried to open it using the cow filter - the reason > for the corruption was the backup partition table at the end of the > disk being truncated.) > > Getting rid of this assumption is awkward: the general idea is that we > round up the size of the backing file / bitmap by a full block. But > whenever we are asked to read or write to the underlying plugin we > handle the tail case carefully. > > This also tests various corner cases. > ---> +++ b/filters/cache/nbdkit-cache-filter.pod > @@ -25,12 +25,6 @@ does not have effective caching, or (with C<cache=unsafe>) to defeat > flush requests from the client (which is unsafe and can cause data > loss, as the name suggests). > > -Note that the use of this filter rounds the image size down to a > -multiple of the caching granularity (the larger of 4096 or the > -C<f_bsize> field of L<fstatvfs(3)>), to ease the implementation. If > -you need to round the image size up instead to access the last few > -bytes, combine this filter with L<nbdkit-truncate-filter(1)>.Nice to see this restriction go. Do you also want to tweak the sentence in nbdkit-truncate-filter.pod that mentions cache and cow filters?> +++ b/filters/cache/blk.c> int > blk_set_size (uint64_t new_size) > { > - if (bitmap_resize (&bm, new_size) == -1) > + size = new_size; > + > + if (bitmap_resize (&bm, size) == -1) > return -1; > > - if (ftruncate (fd, new_size) == -1) { > + if (ftruncate (fd, ROUND_UP (size, blksize)) == -1) {So the underlying fd is aligned...> nbdkit_error ("ftruncate: %m"); > return -1; > } > > - if (lru_set_size (new_size) == -1) > + if (lru_set_size (size) == -1)...while the lru size is still the original size.> @@ -199,9 +207,22 @@ blk_read (struct nbdkit_next_ops *next_ops, void *nxdata, > "unknown"); > > if (state == BLOCK_NOT_CACHED) { /* Read underlying plugin. */ > - if (next_ops->pread (nxdata, block, blksize, offset, 0, err) == -1) > + unsigned n = blksize, tail = 0; > + > + if (offset + n > size) { > + tail = offset + n - size; > + n -= tail; > + } > + > + if (next_ops->pread (nxdata, block, n, offset, 0, err) == -1) > return -1;Good, you were careful to never read beyond EOF of the underlying plugin.> > + /* Normally we're reading whole blocks, but at the very end of the > + * file we might read a partial block. Deal with that case by > + * zeroing the tail. > + */ > + memset (block + n, 0, tail); > +I don't know if that tail can ever leak out to the client, but agree that it is safer to zero it anyway.> /* If cache-on-read, copy the block to the cache. */ > if (cache_on_read) { > nbdkit_debug ("cache: cache-on-read block %" PRIu64 > @@ -247,9 +268,22 @@ blk_cache (struct nbdkit_next_ops *next_ops, void *nxdata, > > if (state == BLOCK_NOT_CACHED) { > /* Read underlying plugin, copy to cache regardless of cache-on-read. */ > - if (next_ops->pread (nxdata, block, blksize, offset, 0, err) == -1) > + unsigned n = blksize, tail = 0; > + > + if (offset + n > size) { > + tail = offset + n - size; > + n -= tail; > + } > + > + if (next_ops->pread (nxdata, block, n, offset, 0, err) == -1) > return -1; > > + /* Normally we're reading whole blocks, but at the very end of the > + * file we might read a partial block. Deal with that case by > + * zeroing the tail. > + */ > + memset (block + n, 0, tail); > +Duplicated code, but I'm not sure if it warrants a helper function.> nbdkit_debug ("cache: cache block %" PRIu64 " (offset %" PRIu64 ")", > blknum, (uint64_t) offset); > > @@ -281,6 +315,12 @@ blk_writethrough (struct nbdkit_next_ops *next_ops, void *nxdata, > int *err) > { > off_t offset = blknum * blksize; > + unsigned n = blksize, tail = 0; > + > + if (offset + n > size) { > + tail = offset + n - size; > + n -= tail; > + } > > reclaim (fd, &bm); > > @@ -293,7 +333,7 @@ blk_writethrough (struct nbdkit_next_ops *next_ops, void *nxdata, > return -1; > } > > - if (next_ops->pwrite (nxdata, block, blksize, offset, flags, err) == -1) > + if (next_ops->pwrite (nxdata, block, n, offset, flags, err) == -1) > return -1;Okay, you're careful to not write beyond EOF of the plugin, regardless of what extra may live in the tail in the local fd. LGTM -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org