Richard W.M. Jones
2021-Jul-26 17:28 UTC
[Libguestfs] [PATCH nbdkit 4/7] cache, cow: Use full pread/pwrite operations
Although it probably cannot happen on Linux, POSIX allows pread/pwrite
to return or write fewer bytes than requested. The cache and cow
filters didn't handle this situation. Replace the raw
pread(2)/pwrite(2) syscalls with alternate versions which can handle
this.
---
common/utils/Makefile.am | 1 +
common/utils/utils.h | 2 +
common/utils/full-rw.c | 81 ++++++++++++++++++++++++++++++++++++++++
filters/cache/blk.c | 10 ++---
filters/cow/blk.c | 6 +--
5 files changed, 92 insertions(+), 8 deletions(-)
diff --git a/common/utils/Makefile.am b/common/utils/Makefile.am
index 1708a4c8..14e9dfc4 100644
--- a/common/utils/Makefile.am
+++ b/common/utils/Makefile.am
@@ -40,6 +40,7 @@ libutils_la_SOURCES = \
cleanup-nbdkit.c \
cleanup.h \
environ.c \
+ full-rw.c \
quote.c \
utils.c \
utils.h \
diff --git a/common/utils/utils.h b/common/utils/utils.h
index f8f70212..83397ae1 100644
--- a/common/utils/utils.h
+++ b/common/utils/utils.h
@@ -40,5 +40,7 @@ extern int set_cloexec (int fd);
extern int set_nonblock (int fd);
extern char **copy_environ (char **env, ...) __attribute__((__sentinel__));
extern char *make_temporary_directory (void);
+extern ssize_t full_pread (int fd, void *buf, size_t count, off_t offset);
+extern ssize_t full_pwrite (int fd, const void *buf, size_t count, off_t
offset);
#endif /* NBDKIT_UTILS_H */
diff --git a/common/utils/full-rw.c b/common/utils/full-rw.c
new file mode 100644
index 00000000..55b32cdd
--- /dev/null
+++ b/common/utils/full-rw.c
@@ -0,0 +1,81 @@
+/* nbdkit
+ * Copyright (C) 2021 Red Hat Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS
IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* These functions are like pread(2)/pwrite(2) but they always read or
+ * write the full amount, or fail.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+
+ssize_t
+full_pread (int fd, void *buf, size_t count, off_t offset)
+{
+ ssize_t ret = 0, r;
+
+ while (count > 0) {
+ r = pread (fd, buf, count, offset);
+ if (r == -1) return -1;
+ if (r == 0) {
+ /* Presumably the caller wasn't expecting end-of-file here, so
+ * return an error.
+ */
+ errno = EIO;
+ return -1;
+ }
+ ret += r;
+ offset += r;
+ count -= r;
+ }
+
+ return ret;
+}
+
+ssize_t
+full_pwrite (int fd, const void *buf, size_t count, off_t offset)
+{
+ ssize_t ret = 0, r;
+
+ while (count > 0) {
+ r = pwrite (fd, buf, count, offset);
+ if (r == -1) return -1;
+ ret += r;
+ offset += r;
+ count -= r;
+ }
+
+ return ret;
+}
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
index f85ada35..42bd3779 100644
--- a/filters/cache/blk.c
+++ b/filters/cache/blk.c
@@ -250,7 +250,7 @@ _blk_read_multiple (nbdkit_next *next,
" (offset %" PRIu64 ")",
blknum, (uint64_t) offset);
- if (pwrite (fd, block, blksize * runblocks, offset) == -1) {
+ if (full_pwrite (fd, block, blksize * runblocks, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
@@ -262,7 +262,7 @@ _blk_read_multiple (nbdkit_next *next,
}
}
else { /* Read cache. */
- if (pread (fd, block, blksize * runblocks, offset) == -1) {
+ if (full_pread (fd, block, blksize * runblocks, offset) == -1) {
*err = errno;
nbdkit_error ("pread: %m");
return -1;
@@ -339,7 +339,7 @@ blk_cache (nbdkit_next *next,
nbdkit_debug ("cache: cache block %" PRIu64 " (offset
%" PRIu64 ")",
blknum, (uint64_t) offset);
- if (pwrite (fd, block, blksize, offset) == -1) {
+ if (full_pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
@@ -380,7 +380,7 @@ blk_writethrough (nbdkit_next *next,
nbdkit_debug ("cache: writethrough block %" PRIu64 " (offset
%" PRIu64 ")",
blknum, (uint64_t) offset);
- if (pwrite (fd, block, blksize, offset) == -1) {
+ if (full_pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
@@ -414,7 +414,7 @@ blk_write (nbdkit_next *next,
nbdkit_debug ("cache: writeback block %" PRIu64 " (offset
%" PRIu64 ")",
blknum, (uint64_t) offset);
- if (pwrite (fd, block, blksize, offset) == -1) {
+ if (full_pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index 9e6c8879..cebd9454 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -282,7 +282,7 @@ blk_read_multiple (nbdkit_next *next,
memset (block + n, 0, tail);
}
else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
- if (pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
+ if (full_pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
*err = errno;
nbdkit_error ("pread: %m");
return -1;
@@ -357,7 +357,7 @@ blk_cache (nbdkit_next *next,
memset (block + n, 0, tail);
if (mode == BLK_CACHE_COW) {
- if (pwrite (fd, block, BLKSIZE, offset) == -1) {
+ if (full_pwrite (fd, block, BLKSIZE, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
@@ -376,7 +376,7 @@ blk_write (uint64_t blknum, const uint8_t *block, int *err)
nbdkit_debug ("cow: blk_write block %" PRIu64 " (offset
%" PRIu64 ")",
blknum, (uint64_t) offset);
- if (pwrite (fd, block, BLKSIZE, offset) == -1) {
+ if (full_pwrite (fd, block, BLKSIZE, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
--
2.32.0
Eric Blake
2021-Jul-26 20:51 UTC
[Libguestfs] [PATCH nbdkit 4/7] cache, cow: Use full pread/pwrite operations
On Mon, Jul 26, 2021 at 06:28:57PM +0100, Richard W.M. Jones wrote:> Although it probably cannot happen on Linux, POSIX allows pread/pwrite > to return or write fewer bytes than requested. The cache and cow > filters didn't handle this situation. Replace the raw > pread(2)/pwrite(2) syscalls with alternate versions which can handle > this.Short reads and writes are unlikely to happen for regular files, but are a definite possibility on named FIFOs, as well as block and char devices. While our local cache happens to use a regular file, it is indeed always better practice to write robust code.> --- > common/utils/Makefile.am | 1 + > common/utils/utils.h | 2 + > common/utils/full-rw.c | 81 ++++++++++++++++++++++++++++++++++++++++ > filters/cache/blk.c | 10 ++--- > filters/cow/blk.c | 6 +-- > 5 files changed, 92 insertions(+), 8 deletions(-) >-- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
Martin Kletzander
2021-Aug-04 08:45 UTC
[Libguestfs] [PATCH nbdkit 4/7] cache, cow: Use full pread/pwrite operations
On Mon, Jul 26, 2021 at 06:28:57PM +0100, Richard W.M. Jones wrote:>Although it probably cannot happen on Linux, POSIX allows pread/pwrite >to return or write fewer bytes than requested. The cache and cow >filters didn't handle this situation. Replace the raw >pread(2)/pwrite(2) syscalls with alternate versions which can handle >this. >--- > common/utils/Makefile.am | 1 + > common/utils/utils.h | 2 + > common/utils/full-rw.c | 81 ++++++++++++++++++++++++++++++++++++++++ > filters/cache/blk.c | 10 ++--- > filters/cow/blk.c | 6 +-- > 5 files changed, 92 insertions(+), 8 deletions(-) > >diff --git a/common/utils/Makefile.am b/common/utils/Makefile.am >index 1708a4c8..14e9dfc4 100644 >--- a/common/utils/Makefile.am >+++ b/common/utils/Makefile.am >@@ -40,6 +40,7 @@ libutils_la_SOURCES = \ > cleanup-nbdkit.c \ > cleanup.h \ > environ.c \ >+ full-rw.c \ > quote.c \ > utils.c \ > utils.h \ >diff --git a/common/utils/utils.h b/common/utils/utils.h >index f8f70212..83397ae1 100644 >--- a/common/utils/utils.h >+++ b/common/utils/utils.h >@@ -40,5 +40,7 @@ extern int set_cloexec (int fd); > extern int set_nonblock (int fd); > extern char **copy_environ (char **env, ...) __attribute__((__sentinel__)); > extern char *make_temporary_directory (void); >+extern ssize_t full_pread (int fd, void *buf, size_t count, off_t offset); >+extern ssize_t full_pwrite (int fd, const void *buf, size_t count, off_t offset); > > #endif /* NBDKIT_UTILS_H */ >diff --git a/common/utils/full-rw.c b/common/utils/full-rw.c >new file mode 100644 >index 00000000..55b32cdd >--- /dev/null >+++ b/common/utils/full-rw.c >@@ -0,0 +1,81 @@ >+/* nbdkit >+ * Copyright (C) 2021 Red Hat Inc. >+ * >+ * Redistribution and use in source and binary forms, with or without >+ * modification, are permitted provided that the following conditions are >+ * met: >+ * >+ * * Redistributions of source code must retain the above copyright >+ * notice, this list of conditions and the following disclaimer. >+ * >+ * * Redistributions in binary form must reproduce the above copyright >+ * notice, this list of conditions and the following disclaimer in the >+ * documentation and/or other materials provided with the distribution. >+ * >+ * * Neither the name of Red Hat nor the names of its contributors may be >+ * used to endorse or promote products derived from this software without >+ * specific prior written permission. >+ * >+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND >+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, >+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A >+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR >+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, >+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT >+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF >+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND >+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, >+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT >+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF >+ * SUCH DAMAGE. >+ */ >+ >+/* These functions are like pread(2)/pwrite(2) but they always read or >+ * write the full amount, or fail. >+ */ >+ >+#include <config.h> >+ >+#include <stdio.h> >+#include <stdlib.h> >+#include <unistd.h> >+#include <errno.h> >+ >+ssize_t >+full_pread (int fd, void *buf, size_t count, off_t offset) >+{ >+ ssize_t ret = 0, r; >+ >+ while (count > 0) { >+ r = pread (fd, buf, count, offset); >+ if (r == -1) return -1; >+ if (r == 0) { >+ /* Presumably the caller wasn't expecting end-of-file here, so >+ * return an error. >+ */ >+ errno = EIO; >+ return -1; >+ } >+ ret += r; >+ offset += r; >+ count -= r; >+ } >+ >+ return ret; >+} >+ >+ssize_t >+full_pwrite (int fd, const void *buf, size_t count, off_t offset) >+{ >+ ssize_t ret = 0, r; >+ >+ while (count > 0) { >+ r = pwrite (fd, buf, count, offset); >+ if (r == -1) return -1; >+ ret += r; >+ offset += r; >+ count -= r; >+ } >+Shouldn't these continue on EINTR? -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 833 bytes Desc: not available URL: <http://listman.redhat.com/archives/libguestfs/attachments/20210804/b9beb6cb/attachment.sig>