Eric Blake
2018-Jan-19 13:40 UTC
[Libguestfs] [nbdkit PATCH v2 00/13] Add filters + FUA support to nbdkit
A combination of the work that both Rich and I have been doing lately, where filters use only the new API with flags on every command that the client can send over the wire (we can then add support for more flags in nbdkit without having to add new callbacks, as NBD adds more flags upstream). Eric Blake (4): protocol: Split flags from cmd field in requests backend: Pass flags argument through plugins: Move FUA fallback to plugins RFC: plugins: Add callbacks for FUA semantics Richard W.M. Jones (9): plugins: Move locking to a new file. Refactor plugin_* functions into a backend struct. Fix const-correctness of backend pwrite method. backend: Add a .plugin_name method. connections: Allow multiple handles to be stored in the connection object. Introduce filters. Implement filters. filters: Add nbdkit-offset-filter. filters: Move rdelay/wdelay from file plugin to new delay filter. Makefile.am | 2 +- TODO | 22 +- configure.ac | 5 +- docs/Makefile.am | 9 +- docs/nbdkit-filter.pod | 528 +++++++++++++++++++++++++ docs/nbdkit-plugin.pod | 92 ++++- docs/nbdkit.pod | 31 +- filters/Makefile.am | 35 ++ filters/delay/Makefile.am | 62 +++ filters/delay/delay.c | 162 ++++++++ filters/delay/nbdkit-delay-filter.pod | 88 +++++ filters/offset/Makefile.am | 62 +++ filters/offset/nbdkit-offset-filter.pod | 99 +++++ filters/offset/offset.c | 148 +++++++ include/Makefile.am | 4 +- include/nbdkit-filter.h | 147 +++++++ include/nbdkit-plugin.h | 33 +- nbdkit.in | 17 +- plugins/file/file.c | 76 +--- plugins/file/nbdkit-file-plugin.pod | 14 +- plugins/nbd/nbd.c | 39 +- src/Makefile.am | 7 +- src/connections.c | 124 +++--- src/filters.c | 613 ++++++++++++++++++++++++++++ src/internal.h | 92 +++-- src/locks.c | 115 ++++++ src/main.c | 134 ++++++- src/nbdkit.pc.in | 1 + src/plugins.c | 680 +++++++++++++++++--------------- src/protocol.h | 10 +- tests/test-parallel-file.sh | 4 +- tests/test-parallel-nbd.sh | 1 + 32 files changed, 2896 insertions(+), 560 deletions(-) create mode 100644 docs/nbdkit-filter.pod create mode 100644 filters/Makefile.am create mode 100644 filters/delay/Makefile.am create mode 100644 filters/delay/delay.c create mode 100644 filters/delay/nbdkit-delay-filter.pod create mode 100644 filters/offset/Makefile.am create mode 100644 filters/offset/nbdkit-offset-filter.pod create mode 100644 filters/offset/offset.c create mode 100644 include/nbdkit-filter.h create mode 100644 src/filters.c create mode 100644 src/locks.c -- 2.14.3
Eric Blake
2018-Jan-19 13:40 UTC
[Libguestfs] [nbdkit PATCH v2 01/13] protocol: Split flags from cmd field in requests
Since NBD is a big-endian protocol, the upstream spec was able to repurpose a 32-bit field with flags starting at (1<<16) OR'd into the command into two 16-bit fields (flags first, starting at 1<<0, then the command) for ease of documentation. Matching that split in our code base will also make it easier to implement smarter FUA flag support. This addresses one of the TODO in the nbd plugin. Signed-off-by: Eric Blake <eblake@redhat.com> --- plugins/nbd/nbd.c | 39 ++++++++++++++++++++------------------- src/connections.c | 12 ++++++------ src/protocol.h | 10 +++++----- 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/plugins/nbd/nbd.c b/plugins/nbd/nbd.c index 04147f1..c9727f7 100644 --- a/plugins/nbd/nbd.c +++ b/plugins/nbd/nbd.c @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2017 Red Hat Inc. + * Copyright (C) 2017-2018 Red Hat Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -249,13 +249,14 @@ find_trans_by_cookie (struct handle *h, uint64_t cookie) /* Send a request, return 0 on success or -1 on write failure. */ static int -nbd_request_raw (struct handle *h, uint32_t type, uint64_t offset, - uint32_t count, uint64_t cookie, const void *buf) +nbd_request_raw (struct handle *h, uint16_t flags, uint16_t type, + uint64_t offset, uint32_t count, uint64_t cookie, + const void *buf) { struct request req = { .magic = htobe32 (NBD_REQUEST_MAGIC), - /* TODO nbdkit should have a way to pass flags, separate from cmd type */ - .type = htobe32 (type), + .flags = htobe16 (flags), + .type = htobe16 (type), .handle = cookie, /* Opaque to server, so endianness doesn't matter */ .offset = htobe64 (offset), .count = htobe32 (count), @@ -275,8 +276,9 @@ nbd_request_raw (struct handle *h, uint32_t type, uint64_t offset, /* Perform the request half of a transaction. On success, return the non-negative fd for reading the reply; on error return -1. */ static int -nbd_request_full (struct handle *h, uint32_t type, uint64_t offset, - uint32_t count, const void *req_buf, void *rep_buf) +nbd_request_full (struct handle *h, uint16_t flags, uint16_t type, + uint64_t offset, uint32_t count, const void *req_buf, + void *rep_buf) { int err; struct transaction *trans; @@ -307,7 +309,7 @@ nbd_request_full (struct handle *h, uint32_t type, uint64_t offset, fd = trans->u.fds[0]; cookie = trans->u.cookie; nbd_unlock (h); - if (nbd_request_raw (h, type, offset, count, cookie, req_buf) == 0) + if (nbd_request_raw (h, flags, type, offset, count, cookie, req_buf) == 0) return fd; trans = find_trans_by_cookie (h, cookie); @@ -326,9 +328,10 @@ nbd_request_full (struct handle *h, uint32_t type, uint64_t offset, /* Shorthand for nbd_request_full when no extra buffers are involved. */ static int -nbd_request (struct handle *h, uint32_t type, uint64_t offset, uint32_t count) +nbd_request (struct handle *h, uint16_t flags, uint16_t type, uint64_t offset, + uint32_t count) { - return nbd_request_full (h, type, offset, count, NULL, NULL); + return nbd_request_full (h, flags, type, offset, count, NULL, NULL); } /* Read a reply, and look up the fd corresponding to the transaction. @@ -563,7 +566,7 @@ nbd_close (void *handle) struct handle *h = handle; if (!h->dead) - nbd_request_raw (h, NBD_CMD_DISC, 0, 0, 0, NULL); + nbd_request_raw (h, 0, NBD_CMD_DISC, 0, 0, 0, NULL); close (h->fd); if ((errno = pthread_join (h->reader, NULL))) nbdkit_debug ("failed to join reader thread: %m"); @@ -622,7 +625,7 @@ nbd_pread (void *handle, void *buf, uint32_t count, uint64_t offset) /* TODO Auto-fragment this if the client has a larger max transfer limit than the server */ - c = nbd_request_full (h, NBD_CMD_READ, offset, count, NULL, buf); + c = nbd_request_full (h, 0, NBD_CMD_READ, offset, count, NULL, buf); return c < 0 ? c : nbd_reply (h, c); } @@ -635,7 +638,7 @@ nbd_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) /* TODO Auto-fragment this if the client has a larger max transfer limit than the server */ - c = nbd_request_full (h, NBD_CMD_WRITE, offset, count, buf, NULL); + c = nbd_request_full (h, 0, NBD_CMD_WRITE, offset, count, buf, NULL); return c < 0 ? c : nbd_reply (h, c); } @@ -644,7 +647,6 @@ static int nbd_zero (void *handle, uint32_t count, uint64_t offset, int may_trim) { struct handle *h = handle; - uint32_t cmd = NBD_CMD_WRITE_ZEROES; int c; if (!(h->flags & NBD_FLAG_SEND_WRITE_ZEROES)) { @@ -653,9 +655,8 @@ nbd_zero (void *handle, uint32_t count, uint64_t offset, int may_trim) return -1; } - if (!may_trim) - cmd |= NBD_CMD_FLAG_NO_HOLE; - c = nbd_request (h, cmd, offset, count); + c = nbd_request (h, may_trim ? 0 : NBD_CMD_FLAG_NO_HOLE, + NBD_CMD_WRITE_ZEROES, offset, count); return c < 0 ? c : nbd_reply (h, c); } @@ -666,7 +667,7 @@ nbd_trim (void *handle, uint32_t count, uint64_t offset) struct handle *h = handle; int c; - c = nbd_request (h, NBD_CMD_TRIM, offset, count); + c = nbd_request (h, 0, NBD_CMD_TRIM, offset, count); return c < 0 ? c : nbd_reply (h, c); } @@ -677,7 +678,7 @@ nbd_flush (void *handle) struct handle *h = handle; int c; - c = nbd_request (h, NBD_CMD_FLUSH, 0, 0); + c = nbd_request (h, 0, NBD_CMD_FLUSH, 0, 0); return c < 0 ? c : nbd_reply (h, c); } diff --git a/src/connections.c b/src/connections.c index 111a810..e700ee0 100644 --- a/src/connections.c +++ b/src/connections.c @@ -760,7 +760,7 @@ valid_range (struct connection *conn, uint64_t offset, uint32_t count) static bool validate_request (struct connection *conn, - uint32_t cmd, uint32_t flags, uint64_t offset, uint32_t count, + uint16_t cmd, uint16_t flags, uint64_t offset, uint32_t count, uint32_t *error) { /* Readonly connection? */ @@ -865,7 +865,7 @@ get_error (struct connection *conn) */ static uint32_t handle_request (struct connection *conn, - uint32_t cmd, uint32_t flags, uint64_t offset, uint32_t count, + uint16_t cmd, uint16_t flags, uint64_t offset, uint32_t count, void *buf) { bool flush_after_command; @@ -979,7 +979,8 @@ recv_request_send_reply (struct connection *conn) int r; struct request request; struct reply reply; - uint32_t magic, cmd, flags, count, error = 0; + uint16_t cmd, flags; + uint32_t magic, count, error = 0; uint64_t offset; CLEANUP_FREE char *buf = NULL; @@ -1005,9 +1006,8 @@ recv_request_send_reply (struct connection *conn) return set_status (conn, -1); } - cmd = be32toh (request.type); - flags = cmd & ~NBD_CMD_MASK_COMMAND; - cmd &= NBD_CMD_MASK_COMMAND; + flags = be16toh (request.flags); + cmd = be16toh (request.type); offset = be64toh (request.offset); count = be32toh (request.count); diff --git a/src/protocol.h b/src/protocol.h index 9d9dad9..aa458a0 100644 --- a/src/protocol.h +++ b/src/protocol.h @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2013 Red Hat Inc. + * Copyright (C) 2013-2018 Red Hat Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -112,7 +112,8 @@ struct new_handshake_finish { /* Request (client -> server). */ struct request { uint32_t magic; /* NBD_REQUEST_MAGIC. */ - uint32_t type; /* Request type. */ + uint16_t flags; /* Request flags. */ + uint16_t type; /* Request type. */ uint64_t handle; /* Opaque handle. */ uint64_t offset; /* Request offset. */ uint32_t count; /* Request length. */ @@ -134,9 +135,8 @@ struct reply { #define NBD_CMD_FLUSH 3 #define NBD_CMD_TRIM 4 #define NBD_CMD_WRITE_ZEROES 6 -#define NBD_CMD_MASK_COMMAND 0xffff -#define NBD_CMD_FLAG_FUA (1<<16) -#define NBD_CMD_FLAG_NO_HOLE (2<<16) +#define NBD_CMD_FLAG_FUA (1<<0) +#define NBD_CMD_FLAG_NO_HOLE (1<<1) /* Error codes (previously errno). * See http://git.qemu.org/?p=qemu.git;a=commitdiff;h=ca4414804114fd0095b317785bc0b51862e62ebb -- 2.14.3
Eric Blake
2018-Jan-19 13:40 UTC
[Libguestfs] [nbdkit PATCH v2 02/13] plugins: Move locking to a new file.
From: "Richard W.M. Jones" <rjones@redhat.com> Mostly code motion. Message-Id: <20180117205356.8699-2-rjones@redhat.com> --- src/Makefile.am | 1 + src/connections.c | 14 +++---- src/internal.h | 14 ++++--- src/locks.c | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/plugins.c | 77 +++++------------------------------- 5 files changed, 142 insertions(+), 79 deletions(-) create mode 100644 src/locks.c diff --git a/src/Makefile.am b/src/Makefile.am index 12b9043..1f05eab 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -40,6 +40,7 @@ nbdkit_SOURCES = \ crypto.c \ errors.c \ internal.h \ + locks.c \ main.c \ plugins.c \ protocol.h \ diff --git a/src/connections.c b/src/connections.c index e700ee0..b7c815e 100644 --- a/src/connections.c +++ b/src/connections.c @@ -211,7 +211,7 @@ _handle_single_connection (int sockin, int sockout) int nworkers = threads ? threads : DEFAULT_PARALLEL_REQUESTS; pthread_t *workers = NULL; - if (!plugin_is_parallel() || nworkers == 1) + if (plugin_thread_model () < NBDKIT_THREAD_MODEL_PARALLEL || nworkers == 1) nworkers = 0; conn = new_connection (sockin, sockout, nworkers); if (!conn) @@ -287,9 +287,9 @@ handle_single_connection (int sockin, int sockout) { int r; - plugin_lock_connection (); + lock_connection (); r = _handle_single_connection (sockin, sockout); - plugin_unlock_connection (); + unlock_connection (); return r; } @@ -740,12 +740,12 @@ negotiate_handshake (struct connection *conn) { int r; - plugin_lock_request (conn); + lock_request (conn); if (!newstyle) r = _negotiate_handshake_oldstyle (conn); else r = _negotiate_handshake_newstyle (conn); - plugin_unlock_request (conn); + unlock_request (conn); return r; } @@ -1057,9 +1057,9 @@ recv_request_send_reply (struct connection *conn) error = ESHUTDOWN; } else { - plugin_lock_request (conn); + lock_request (conn); error = handle_request (conn, cmd, flags, offset, count, buf); - plugin_unlock_request (conn); + unlock_request (conn); } /* Send the reply packet. */ diff --git a/src/internal.h b/src/internal.h index 73bc09e..068204b 100644 --- a/src/internal.h +++ b/src/internal.h @@ -144,17 +144,13 @@ extern int crypto_negotiate_tls (struct connection *conn, int sockin, int sockou /* plugins.c */ extern void plugin_register (const char *_filename, void *_dl, struct nbdkit_plugin *(*plugin_init) (void)); extern void plugin_cleanup (void); +extern int plugin_thread_model (void); extern const char *plugin_name (void); extern void plugin_usage (void); extern const char *plugin_version (void); extern void plugin_dump_fields (void); extern void plugin_config (const char *key, const char *value); extern void plugin_config_complete (void); -extern void plugin_lock_connection (void); -extern void plugin_unlock_connection (void); -extern void plugin_lock_request (struct connection *conn); -extern void plugin_unlock_request (struct connection *conn); -extern bool plugin_is_parallel (void); extern int plugin_errno_is_preserved (void); extern int plugin_open (struct connection *conn, int readonly); extern void plugin_close (struct connection *conn); @@ -169,6 +165,14 @@ extern int plugin_flush (struct connection *conn); extern int plugin_trim (struct connection *conn, uint32_t count, uint64_t offset); extern int plugin_zero (struct connection *conn, uint32_t count, uint64_t offset, int may_trim); +/* locks.c */ +extern void lock_connection (void); +extern void unlock_connection (void); +extern void lock_request (struct connection *conn); +extern void unlock_request (struct connection *conn); +extern void lock_unload (void); +extern void unlock_unload (void); + /* sockets.c */ extern int *bind_unix_socket (size_t *); extern int *bind_tcpip_socket (size_t *); diff --git a/src/locks.c b/src/locks.c new file mode 100644 index 0000000..6021356 --- /dev/null +++ b/src/locks.c @@ -0,0 +1,115 @@ +/* nbdkit + * Copyright (C) 2013-2018 Red Hat Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Red Hat nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> + +#include "internal.h" + +static pthread_mutex_t connection_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t all_requests_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_rwlock_t unload_prevention_lock = PTHREAD_RWLOCK_INITIALIZER; + +void +lock_connection (void) +{ + int thread_model = plugin_thread_model (); + + if (thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_CONNECTIONS) { + debug ("acquire connection lock"); + pthread_mutex_lock (&connection_lock); + } +} + +void +unlock_connection (void) +{ + int thread_model = plugin_thread_model (); + + if (thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_CONNECTIONS) { + debug ("release connection lock"); + pthread_mutex_unlock (&connection_lock); + } +} + +void +lock_request (struct connection *conn) +{ + int thread_model = plugin_thread_model (); + + if (thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS) { + debug ("acquire global request lock"); + pthread_mutex_lock (&all_requests_lock); + } + + if (thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS) { + debug ("acquire per-connection request lock"); + pthread_mutex_lock (connection_get_request_lock (conn)); + } + + debug ("acquire unload prevention lock"); + pthread_rwlock_rdlock (&unload_prevention_lock); +} + +void +unlock_request (struct connection *conn) +{ + int thread_model = plugin_thread_model (); + + debug ("release unload prevention lock"); + pthread_rwlock_unlock (&unload_prevention_lock); + + if (thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS) { + debug ("release per-connection request lock"); + pthread_mutex_unlock (connection_get_request_lock (conn)); + } + + if (thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS) { + debug ("release global request lock"); + pthread_mutex_unlock (&all_requests_lock); + } +} + +void +lock_unload (void) +{ + pthread_rwlock_wrlock (&unload_prevention_lock); +} + +void +unlock_unload (void) +{ + pthread_rwlock_unlock (&unload_prevention_lock); +} diff --git a/src/plugins.c b/src/plugins.c index 9b5d2d5..b7ab43d 100644 --- a/src/plugins.c +++ b/src/plugins.c @@ -46,10 +46,6 @@ #include "nbdkit-plugin.h" #include "internal.h" -static pthread_mutex_t connection_lock = PTHREAD_MUTEX_INITIALIZER; -static pthread_mutex_t all_requests_lock = PTHREAD_MUTEX_INITIALIZER; -static pthread_rwlock_t unload_prevention_lock = PTHREAD_RWLOCK_INITIALIZER; - /* Maximum read or write request that we will handle. */ #define MAX_REQUEST_SIZE (64 * 1024 * 1024) @@ -165,7 +161,7 @@ plugin_cleanup (void) /* Acquiring this lock prevents any plugin callbacks from running * simultaneously. */ - pthread_rwlock_wrlock (&unload_prevention_lock); + lock_unload (); debug ("%s: unload", filename); if (plugin.unload) @@ -176,10 +172,18 @@ plugin_cleanup (void) free (filename); filename = NULL; - pthread_rwlock_unlock (&unload_prevention_lock); + unlock_unload (); } } +int +plugin_thread_model (void) +{ + assert (dl); + + return plugin._thread_model; +} + const char * plugin_name (void) { @@ -312,67 +316,6 @@ plugin_config_complete (void) exit (EXIT_FAILURE); } -/* Handle the thread model. */ -void -plugin_lock_connection (void) -{ - if (plugin._thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_CONNECTIONS) { - debug ("%s: acquire connection lock", filename); - pthread_mutex_lock (&connection_lock); - } -} - -void -plugin_unlock_connection (void) -{ - if (plugin._thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_CONNECTIONS) { - debug ("%s: release connection lock", filename); - pthread_mutex_unlock (&connection_lock); - } -} - -void -plugin_lock_request (struct connection *conn) -{ - if (plugin._thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS) { - debug ("acquire global request lock"); - pthread_mutex_lock (&all_requests_lock); - } - - if (plugin._thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS) { - debug ("acquire per-connection request lock"); - pthread_mutex_lock (connection_get_request_lock (conn)); - } - - debug ("acquire unload prevention lock"); - pthread_rwlock_rdlock (&unload_prevention_lock); -} - -void -plugin_unlock_request (struct connection *conn) -{ - debug ("release unload prevention lock"); - pthread_rwlock_unlock (&unload_prevention_lock); - - if (plugin._thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS) { - debug ("release per-connection request lock"); - pthread_mutex_unlock (connection_get_request_lock (conn)); - } - - if (plugin._thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS) { - debug ("release global request lock"); - pthread_mutex_unlock (&all_requests_lock); - } -} - -bool -plugin_is_parallel (void) -{ - assert (dl); - - return plugin._thread_model >= NBDKIT_THREAD_MODEL_PARALLEL; -} - int plugin_errno_is_preserved (void) { -- 2.14.3
Eric Blake
2018-Jan-19 13:40 UTC
[Libguestfs] [nbdkit PATCH v2 03/13] Refactor plugin_* functions into a backend struct.
From: "Richard W.M. Jones" <rjones@redhat.com> Introduce the concept of a backend. Currently the only type of backend is a plugin, and there can only be one of them. Instead of calling functions like ‘plugin_pwrite’ you call the backend method ‘backend->pwrite (backend, ...)’. The change is largely mechanical. I was able to remove ‘assert (dl)’ statements throughout since we can now prove they will never be called. Note this does not lift the restriction of one plugin per server, and it can *never* do that because plugins can use global variables. Message-Id: <20180117205356.8699-3-rjones@redhat.com> --- src/connections.c | 40 +++-- src/internal.h | 55 +++--- src/locks.c | 8 +- src/main.c | 31 ++-- src/plugins.c | 527 ++++++++++++++++++++++++++++++------------------------ 5 files changed, 367 insertions(+), 294 deletions(-) diff --git a/src/connections.c b/src/connections.c index b7c815e..aac1d05 100644 --- a/src/connections.c +++ b/src/connections.c @@ -211,16 +211,17 @@ _handle_single_connection (int sockin, int sockout) int nworkers = threads ? threads : DEFAULT_PARALLEL_REQUESTS; pthread_t *workers = NULL; - if (plugin_thread_model () < NBDKIT_THREAD_MODEL_PARALLEL || nworkers == 1) + if (backend->thread_model (backend) < NBDKIT_THREAD_MODEL_PARALLEL || + nworkers == 1) nworkers = 0; conn = new_connection (sockin, sockout, nworkers); if (!conn) goto done; - if (plugin_open (conn, readonly) == -1) + if (backend->open (backend, conn, readonly) == -1) goto done; - threadlocal_set_name (plugin_name ()); + threadlocal_set_name (backend->name (backend)); /* Handshake. */ if (negotiate_handshake (conn) == -1) @@ -251,7 +252,8 @@ _handle_single_connection (int sockin, int sockout) set_status (conn, -1); goto wait; } - if (asprintf (&worker->name, "%s.%d", plugin_name (), nworkers) < 0) { + if (asprintf (&worker->name, + "%s.%d", backend->name (backend), nworkers) < 0) { perror ("asprintf"); set_status (conn, -1); free (worker); @@ -340,7 +342,7 @@ free_connection (struct connection *conn) */ if (!quit) { if (conn->handle) - plugin_close (conn); + backend->close (backend, conn); } free (conn); @@ -352,7 +354,7 @@ compute_eflags (struct connection *conn, uint16_t *flags) uint16_t eflags = NBD_FLAG_HAS_FLAGS; int fl; - fl = plugin_can_write (conn); + fl = backend->can_write (backend, conn); if (fl == -1) return -1; if (readonly || !fl) { @@ -363,7 +365,7 @@ compute_eflags (struct connection *conn, uint16_t *flags) eflags |= NBD_FLAG_SEND_WRITE_ZEROES; } - fl = plugin_can_flush (conn); + fl = backend->can_flush (backend, conn); if (fl == -1) return -1; if (fl) { @@ -371,7 +373,7 @@ compute_eflags (struct connection *conn, uint16_t *flags) conn->can_flush = 1; } - fl = plugin_is_rotational (conn); + fl = backend->is_rotational (backend, conn); if (fl == -1) return -1; if (fl) { @@ -379,7 +381,7 @@ compute_eflags (struct connection *conn, uint16_t *flags) conn->is_rotational = 1; } - fl = plugin_can_trim (conn); + fl = backend->can_trim (backend, conn); if (fl == -1) return -1; if (fl) { @@ -407,7 +409,7 @@ _negotiate_handshake_oldstyle (struct connection *conn) return -1; } - r = plugin_get_size (conn); + r = backend->get_size (backend, conn); if (r == -1) return -1; if (r < 0) { @@ -703,7 +705,7 @@ _negotiate_handshake_newstyle (struct connection *conn) return -1; /* Finish the newstyle handshake. */ - r = plugin_get_size (conn); + r = backend->get_size (backend, conn); if (r == -1) return -1; if (r < 0) { @@ -848,7 +850,7 @@ get_error (struct connection *conn) { int ret = threadlocal_get_error (); - if (!ret && plugin_errno_is_preserved ()) + if (!ret && backend->errno_is_preserved (backend)) ret = errno; return ret ? ret : EIO; } @@ -881,28 +883,28 @@ handle_request (struct connection *conn, switch (cmd) { case NBD_CMD_READ: - if (plugin_pread (conn, buf, count, offset) == -1) + if (backend->pread (backend, conn, buf, count, offset) == -1) return get_error (conn); break; case NBD_CMD_WRITE: - if (plugin_pwrite (conn, buf, count, offset) == -1) + if (backend->pwrite (backend, conn, buf, count, offset) == -1) return get_error (conn); break; case NBD_CMD_FLUSH: - if (plugin_flush (conn) == -1) + if (backend->flush (backend, conn) == -1) return get_error (conn); break; case NBD_CMD_TRIM: - if (plugin_trim (conn, count, offset) == -1) + if (backend->trim (backend, conn, count, offset) == -1) return get_error (conn); break; case NBD_CMD_WRITE_ZEROES: - if (plugin_zero (conn, count, offset, - !(flags & NBD_CMD_FLAG_NO_HOLE)) == -1) + if (backend->zero (backend, conn, count, offset, + !(flags & NBD_CMD_FLAG_NO_HOLE)) == -1) return get_error (conn); break; @@ -910,7 +912,7 @@ handle_request (struct connection *conn, abort (); } - if (flush_after_command && plugin_flush (conn) == -1) + if (flush_after_command && backend->flush (backend, conn) == -1) return get_error (conn); return 0; diff --git a/src/internal.h b/src/internal.h index 068204b..9c4993d 100644 --- a/src/internal.h +++ b/src/internal.h @@ -35,6 +35,7 @@ #define NBDKIT_INTERNAL_H #include <stdbool.h> +#include <stddef.h> #include <stdarg.h> #include <sys/socket.h> #include <pthread.h> @@ -91,6 +92,11 @@ # endif #endif +#define container_of(ptr, type, member) ({ \ + const typeof (((type *) 0)->member) *__mptr = (ptr); \ + (type *) ((char *) __mptr - offsetof(type, member)); \ + }) + /* main.c */ extern const char *exportname; extern const char *ipaddr; @@ -108,6 +114,8 @@ extern int threads; extern volatile int quit; extern int quit_fd; +extern struct backend *backend; + /* cleanup.c */ extern void cleanup_free (void *ptr); #define CLEANUP_FREE __attribute__((cleanup (cleanup_free))) @@ -142,28 +150,31 @@ extern int crypto_negotiate_tls (struct connection *conn, int sockin, int sockou #define debug nbdkit_debug /* plugins.c */ -extern void plugin_register (const char *_filename, void *_dl, struct nbdkit_plugin *(*plugin_init) (void)); -extern void plugin_cleanup (void); -extern int plugin_thread_model (void); -extern const char *plugin_name (void); -extern void plugin_usage (void); -extern const char *plugin_version (void); -extern void plugin_dump_fields (void); -extern void plugin_config (const char *key, const char *value); -extern void plugin_config_complete (void); -extern int plugin_errno_is_preserved (void); -extern int plugin_open (struct connection *conn, int readonly); -extern void plugin_close (struct connection *conn); -extern int64_t plugin_get_size (struct connection *conn); -extern int plugin_can_write (struct connection *conn); -extern int plugin_can_flush (struct connection *conn); -extern int plugin_is_rotational (struct connection *conn); -extern int plugin_can_trim (struct connection *conn); -extern int plugin_pread (struct connection *conn, void *buf, uint32_t count, uint64_t offset); -extern int plugin_pwrite (struct connection *conn, void *buf, uint32_t count, uint64_t offset); -extern int plugin_flush (struct connection *conn); -extern int plugin_trim (struct connection *conn, uint32_t count, uint64_t offset); -extern int plugin_zero (struct connection *conn, uint32_t count, uint64_t offset, int may_trim); +struct backend { + void (*free) (struct backend *); + int (*thread_model) (struct backend *); + const char *(*name) (struct backend *); + void (*usage) (struct backend *); + const char *(*version) (struct backend *); + void (*dump_fields) (struct backend *); + void (*config) (struct backend *, const char *key, const char *value); + void (*config_complete) (struct backend *); + int (*errno_is_preserved) (struct backend *); + int (*open) (struct backend *, struct connection *conn, int readonly); + void (*close) (struct backend *, struct connection *conn); + int64_t (*get_size) (struct backend *, struct connection *conn); + int (*can_write) (struct backend *, struct connection *conn); + int (*can_flush) (struct backend *, struct connection *conn); + int (*is_rotational) (struct backend *, struct connection *conn); + int (*can_trim) (struct backend *, struct connection *conn); + int (*pread) (struct backend *, struct connection *conn, void *buf, uint32_t count, uint64_t offset); + int (*pwrite) (struct backend *, struct connection *conn, void *buf, uint32_t count, uint64_t offset); + int (*flush) (struct backend *, struct connection *conn); + int (*trim) (struct backend *, struct connection *conn, uint32_t count, uint64_t offset); + int (*zero) (struct backend *, struct connection *conn, uint32_t count, uint64_t offset, int may_trim); +}; + +extern struct backend *plugin_register (const char *_filename, void *_dl, struct nbdkit_plugin *(*plugin_init) (void)); /* locks.c */ extern void lock_connection (void); diff --git a/src/locks.c b/src/locks.c index 6021356..62b2dd0 100644 --- a/src/locks.c +++ b/src/locks.c @@ -45,7 +45,7 @@ static pthread_rwlock_t unload_prevention_lock = PTHREAD_RWLOCK_INITIALIZER; void lock_connection (void) { - int thread_model = plugin_thread_model (); + int thread_model = backend->thread_model (backend); if (thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_CONNECTIONS) { debug ("acquire connection lock"); @@ -56,7 +56,7 @@ lock_connection (void) void unlock_connection (void) { - int thread_model = plugin_thread_model (); + int thread_model = backend->thread_model (backend); if (thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_CONNECTIONS) { debug ("release connection lock"); @@ -67,7 +67,7 @@ unlock_connection (void) void lock_request (struct connection *conn) { - int thread_model = plugin_thread_model (); + int thread_model = backend->thread_model (backend); if (thread_model <= NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS) { debug ("acquire global request lock"); @@ -86,7 +86,7 @@ lock_request (struct connection *conn) void unlock_request (struct connection *conn) { - int thread_model = plugin_thread_model (); + int thread_model = backend->thread_model (backend); debug ("release unload prevention lock"); pthread_rwlock_unlock (&unload_prevention_lock); diff --git a/src/main.c b/src/main.c index 4eca859..b3e6bad 100644 --- a/src/main.c +++ b/src/main.c @@ -64,7 +64,7 @@ static int is_short_name (const char *); static char *make_random_fifo (void); -static void open_plugin_so (const char *filename, int short_name); +static struct backend *open_plugin_so (const char *filename, int short_name); static void start_serving (void); static void set_up_signals (void); static void run_command (void); @@ -103,6 +103,9 @@ volatile int quit; int quit_fd; static int write_quit_fd; +/* The currently loaded plugin. */ +struct backend *backend; + static char *random_fifo_dir = NULL; static char *random_fifo = NULL; @@ -493,12 +496,12 @@ main (int argc, char *argv[]) } } - open_plugin_so (filename, short_name); + backend = open_plugin_so (filename, short_name); if (help) { usage (); printf ("\n%s:\n\n", filename); - plugin_usage (); + backend->usage (backend); exit (EXIT_SUCCESS); } @@ -506,8 +509,8 @@ main (int argc, char *argv[]) const char *v; display_version (); - printf ("%s", plugin_name ()); - if ((v = plugin_version ()) != NULL) + printf ("%s", backend->name (backend)); + if ((v = backend->version (backend)) != NULL) printf (" %s", v); printf ("\n"); exit (EXIT_SUCCESS); @@ -518,7 +521,7 @@ main (int argc, char *argv[]) * we assume it is 'script=...'. */ if (optind < argc && (p = strchr (argv[optind], '=')) == NULL) { - plugin_config ("script", argv[optind]); + backend->config (backend, "script", argv[optind]); ++optind; } @@ -528,14 +531,14 @@ main (int argc, char *argv[]) * script=... parameter (and do not wait for config_complete). */ if (dump_plugin) { - plugin_dump_fields (); + backend->dump_fields (backend); exit (EXIT_SUCCESS); } while (optind < argc) { if ((p = strchr (argv[optind], '=')) != NULL) { *p = '\0'; - plugin_config (argv[optind], p+1); + backend->config (backend, argv[optind], p+1); ++optind; } else { @@ -546,11 +549,12 @@ main (int argc, char *argv[]) } } - plugin_config_complete (); + backend->config_complete (backend); start_serving (); - plugin_cleanup (); + backend->free (backend); + backend = NULL; free (unixsocket); free (pidfile); @@ -609,9 +613,10 @@ make_random_fifo (void) return unixsocket; } -static void +static struct backend * open_plugin_so (const char *name, int short_name) { + struct backend *ret; char *filename = (char *) name; int free_filename = 0; void *dl; @@ -647,10 +652,12 @@ open_plugin_so (const char *name, int short_name) } /* Register the plugin. */ - plugin_register (filename, dl, plugin_init); + ret = plugin_register (filename, dl, plugin_init); if (free_filename) free (filename); + + return ret; } static void diff --git a/src/plugins.c b/src/plugins.c index b7ab43d..b687849 100644 --- a/src/plugins.c +++ b/src/plugins.c @@ -49,192 +49,100 @@ /* Maximum read or write request that we will handle. */ #define MAX_REQUEST_SIZE (64 * 1024 * 1024) -/* Currently the server can only load one plugin (see TODO). Hence we - * can just use globals to store these. +/* We extend the generic backend struct with extra fields relating + * to this plugin. */ -static char *filename; -static void *dl; -static struct nbdkit_plugin plugin; - -void -plugin_register (const char *_filename, - void *_dl, struct nbdkit_plugin *(*plugin_init) (void)) +struct backend_plugin { + struct backend backend; + char *filename; + void *dl; + struct nbdkit_plugin plugin; +}; + +static void +plugin_free (struct backend *b) { - const struct nbdkit_plugin *_plugin; - size_t i, len, size; - - filename = strdup (_filename); - if (filename == NULL) { - perror ("strdup"); - exit (EXIT_FAILURE); - } - dl = _dl; - - debug ("registering %s", filename); - - /* Call the initialization function which returns the address of the - * plugin's own 'struct nbdkit_plugin'. - */ - _plugin = plugin_init (); - if (!_plugin) { - fprintf (stderr, "%s: %s: plugin registration function failed\n", - program_name, filename); - exit (EXIT_FAILURE); - } - - /* Check for incompatible future versions. */ - if (_plugin->_api_version != 1) { - fprintf (stderr, "%s: %s: plugin is incompatible with this version of nbdkit (_api_version = %d)\n", - program_name, filename, _plugin->_api_version); - exit (EXIT_FAILURE); - } + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - /* Since the plugin might be much older than the current version of - * nbdkit, only copy up to the self-declared _struct_size of the - * plugin and zero out the rest. If the plugin is much newer then - * we'll only call the "old" fields. + /* Acquiring this lock prevents any plugin callbacks from running + * simultaneously. */ - size = sizeof plugin; /* our struct */ - memset (&plugin, 0, size); - if (size > _plugin->_struct_size) - size = _plugin->_struct_size; - memcpy (&plugin, _plugin, size); + lock_unload (); - /* Check for the minimum fields which must exist in the - * plugin struct. - */ - if (plugin.name == NULL) { - fprintf (stderr, "%s: %s: plugin must have a .name field\n", - program_name, filename); - exit (EXIT_FAILURE); - } - if (plugin.open == NULL) { - fprintf (stderr, "%s: %s: plugin must have a .open callback\n", - program_name, filename); - exit (EXIT_FAILURE); - } - if (plugin.get_size == NULL) { - fprintf (stderr, "%s: %s: plugin must have a .get_size callback\n", - program_name, filename); - exit (EXIT_FAILURE); - } - if (plugin.pread == NULL) { - fprintf (stderr, "%s: %s: plugin must have a .pread callback\n", - program_name, filename); - exit (EXIT_FAILURE); - } + debug ("%s: unload", p->filename); + if (p->plugin.unload) + p->plugin.unload (); - len = strlen (plugin.name); - if (len == 0) { - fprintf (stderr, "%s: %s: plugin.name field must not be empty\n", - program_name, filename); - exit (EXIT_FAILURE); - } - for (i = 0; i < len; ++i) { - if (!((plugin.name[i] >= '0' && plugin.name[i] <= '9') || - (plugin.name[i] >= 'a' && plugin.name[i] <= 'z') || - (plugin.name[i] >= 'A' && plugin.name[i] <= 'Z'))) { - fprintf (stderr, "%s: %s: plugin.name ('%s') field must contain only ASCII alphanumeric characters\n", - program_name, filename, plugin.name); - exit (EXIT_FAILURE); - } - } - /* Copy the module's name into local storage, so that plugin.name - * survives past unload. */ - if (!(plugin.name = strdup (plugin.name))) { - perror ("strdup"); - exit (EXIT_FAILURE); - } + dlclose (p->dl); + free (p->filename); - debug ("registered %s (name %s)", filename, plugin.name); + unlock_unload (); - /* Call the on-load callback if it exists. */ - debug ("%s: load", filename); - if (plugin.load) - plugin.load (); + free (p); } -void -plugin_cleanup (void) +static int +plugin_thread_model (struct backend *b) { - if (dl) { - /* Acquiring this lock prevents any plugin callbacks from running - * simultaneously. - */ - lock_unload (); - - debug ("%s: unload", filename); - if (plugin.unload) - plugin.unload (); - - dlclose (dl); - dl = NULL; - free (filename); - filename = NULL; + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - unlock_unload (); - } + return p->plugin._thread_model; } -int -plugin_thread_model (void) +static const char * +plugin_name (struct backend *b) { - assert (dl); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - return plugin._thread_model; -} - -const char * -plugin_name (void) -{ - return plugin.name; + return p->plugin.name; } -void -plugin_usage (void) +static void +plugin_usage (struct backend *b) { - assert (dl); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - printf ("%s", plugin.name); - if (plugin.longname) - printf (" (%s)", plugin.longname); + printf ("%s", p->plugin.name); + if (p->plugin.longname) + printf (" (%s)", p->plugin.longname); printf ("\n"); - if (plugin.description) { + if (p->plugin.description) { printf ("\n"); - printf ("%s\n", plugin.description); + printf ("%s\n", p->plugin.description); } - if (plugin.config_help) { + if (p->plugin.config_help) { printf ("\n"); - printf ("%s\n", plugin.config_help); + printf ("%s\n", p->plugin.config_help); } } -const char * -plugin_version (void) +static const char * +plugin_version (struct backend *b) { - assert (dl); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - return plugin.version; + return p->plugin.version; } /* This implements the --dump-plugin option. */ -void -plugin_dump_fields (void) +static void +plugin_dump_fields (struct backend *b) { + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); char *path; - path = nbdkit_absolute_path (filename); + path = nbdkit_absolute_path (p->filename); printf ("path=%s\n", path); free (path); - printf ("name=%s\n", plugin.name); - if (plugin.version) - printf ("version=%s\n", plugin.version); + printf ("name=%s\n", p->plugin.name); + if (p->plugin.version) + printf ("version=%s\n", p->plugin.version); - printf ("api_version=%d\n", plugin._api_version); - printf ("struct_size=%" PRIu64 "\n", plugin._struct_size); + printf ("api_version=%d\n", p->plugin._api_version); + printf ("struct_size=%" PRIu64 "\n", p->plugin._struct_size); printf ("thread_model="); - switch (plugin._thread_model) { + switch (p->plugin._thread_model) { case NBDKIT_THREAD_MODEL_SERIALIZE_CONNECTIONS: printf ("serialize_connections"); break; @@ -248,13 +156,13 @@ plugin_dump_fields (void) printf ("parallel"); break; default: - printf ("%d # unknown thread model!", plugin._thread_model); + printf ("%d # unknown thread model!", p->plugin._thread_model); break; } printf ("\n"); - printf ("errno_is_preserved=%d\n", plugin.errno_is_preserved); + printf ("errno_is_preserved=%d\n", p->plugin.errno_is_preserved); -#define HAS(field) if (plugin.field) printf ("has_%s=1\n", #field) +#define HAS(field) if (p->plugin.field) printf ("has_%s=1\n", #field) HAS (longname); HAS (description); HAS (load); @@ -278,64 +186,64 @@ plugin_dump_fields (void) #undef HAS /* Custom fields. */ - if (plugin.dump_plugin) - plugin.dump_plugin (); + if (p->plugin.dump_plugin) + p->plugin.dump_plugin (); } -void -plugin_config (const char *key, const char *value) +static void +plugin_config (struct backend *b, const char *key, const char *value) { - assert (dl); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); debug ("%s: config key=%s, value=%s", - filename, key, value); + p->filename, key, value); - if (plugin.config == NULL) { + if (p->plugin.config == NULL) { fprintf (stderr, "%s: %s: this plugin does not need command line configuration\n" "Try using: %s --help %s\n", - program_name, filename, - program_name, filename); + program_name, p->filename, + program_name, p->filename); exit (EXIT_FAILURE); } - if (plugin.config (key, value) == -1) + if (p->plugin.config (key, value) == -1) exit (EXIT_FAILURE); } -void -plugin_config_complete (void) +static void +plugin_config_complete (struct backend *b) { - assert (dl); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - debug ("%s: config_complete", filename); + debug ("%s: config_complete", p->filename); - if (!plugin.config_complete) + if (!p->plugin.config_complete) return; - if (plugin.config_complete () == -1) + if (p->plugin.config_complete () == -1) exit (EXIT_FAILURE); } -int -plugin_errno_is_preserved (void) +static int +plugin_errno_is_preserved (struct backend *b) { - assert (dl); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - return plugin.errno_is_preserved; + return p->plugin.errno_is_preserved; } -int -plugin_open (struct connection *conn, int readonly) +static int +plugin_open (struct backend *b, struct connection *conn, int readonly) { + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); void *handle; - assert (dl); assert (connection_get_handle (conn) == NULL); - assert (plugin.open != NULL); + assert (p->plugin.open != NULL); - debug ("%s: open readonly=%d", filename, readonly); + debug ("%s: open readonly=%d", p->filename, readonly); - handle = plugin.open (readonly); + handle = p->plugin.open (readonly); if (!handle) return -1; @@ -343,179 +251,192 @@ plugin_open (struct connection *conn, int readonly) return 0; } -void -plugin_close (struct connection *conn) +static void +plugin_close (struct backend *b, struct connection *conn) { - assert (dl); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); + assert (connection_get_handle (conn)); debug ("close"); - if (plugin.close) - plugin.close (connection_get_handle (conn)); + if (p->plugin.close) + p->plugin.close (connection_get_handle (conn)); connection_set_handle (conn, NULL); } -int64_t -plugin_get_size (struct connection *conn) +static int64_t +plugin_get_size (struct backend *b, struct connection *conn) { - assert (dl); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); + assert (connection_get_handle (conn)); - assert (plugin.get_size != NULL); + assert (p->plugin.get_size != NULL); debug ("get_size"); - return plugin.get_size (connection_get_handle (conn)); + return p->plugin.get_size (connection_get_handle (conn)); } -int -plugin_can_write (struct connection *conn) +static int +plugin_can_write (struct backend *b, struct connection *conn) { - assert (dl); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); + assert (connection_get_handle (conn)); debug ("can_write"); - if (plugin.can_write) - return plugin.can_write (connection_get_handle (conn)); + if (p->plugin.can_write) + return p->plugin.can_write (connection_get_handle (conn)); else - return plugin.pwrite != NULL; + return p->plugin.pwrite != NULL; } -int -plugin_can_flush (struct connection *conn) +static int +plugin_can_flush (struct backend *b, struct connection *conn) { - assert (dl); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); + assert (connection_get_handle (conn)); debug ("can_flush"); - if (plugin.can_flush) - return plugin.can_flush (connection_get_handle (conn)); + if (p->plugin.can_flush) + return p->plugin.can_flush (connection_get_handle (conn)); else - return plugin.flush != NULL; + return p->plugin.flush != NULL; } -int -plugin_is_rotational (struct connection *conn) +static int +plugin_is_rotational (struct backend *b, struct connection *conn) { - assert (dl); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); + assert (connection_get_handle (conn)); debug ("is_rotational"); - if (plugin.is_rotational) - return plugin.is_rotational (connection_get_handle (conn)); + if (p->plugin.is_rotational) + return p->plugin.is_rotational (connection_get_handle (conn)); else return 0; /* assume false */ } -int -plugin_can_trim (struct connection *conn) +static int +plugin_can_trim (struct backend *b, struct connection *conn) { - assert (dl); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); + assert (connection_get_handle (conn)); debug ("can_trim"); - if (plugin.can_trim) - return plugin.can_trim (connection_get_handle (conn)); + if (p->plugin.can_trim) + return p->plugin.can_trim (connection_get_handle (conn)); else - return plugin.trim != NULL; + return p->plugin.trim != NULL; } -int -plugin_pread (struct connection *conn, +static int +plugin_pread (struct backend *b, struct connection *conn, void *buf, uint32_t count, uint64_t offset) { - assert (dl); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); + assert (connection_get_handle (conn)); - assert (plugin.pread != NULL); + assert (p->plugin.pread != NULL); debug ("pread count=%" PRIu32 " offset=%" PRIu64, count, offset); - return plugin.pread (connection_get_handle (conn), buf, count, offset); + return p->plugin.pread (connection_get_handle (conn), buf, count, offset); } -int -plugin_pwrite (struct connection *conn, +static int +plugin_pwrite (struct backend *b, struct connection *conn, void *buf, uint32_t count, uint64_t offset) { - assert (dl); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); + assert (connection_get_handle (conn)); debug ("pwrite count=%" PRIu32 " offset=%" PRIu64, count, offset); - if (plugin.pwrite != NULL) - return plugin.pwrite (connection_get_handle (conn), buf, count, offset); + if (p->plugin.pwrite != NULL) + return p->plugin.pwrite (connection_get_handle (conn), buf, count, offset); else { errno = EROFS; return -1; } } -int -plugin_flush (struct connection *conn) +static int +plugin_flush (struct backend *b, struct connection *conn) { - assert (dl); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); + assert (connection_get_handle (conn)); debug ("flush"); - if (plugin.flush != NULL) - return plugin.flush (connection_get_handle (conn)); + if (p->plugin.flush != NULL) + return p->plugin.flush (connection_get_handle (conn)); else { errno = EINVAL; return -1; } } -int -plugin_trim (struct connection *conn, uint32_t count, uint64_t offset) +static int +plugin_trim (struct backend *b, struct connection *conn, + uint32_t count, uint64_t offset) { - assert (dl); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); + assert (connection_get_handle (conn)); debug ("trim count=%" PRIu32 " offset=%" PRIu64, count, offset); - if (plugin.trim != NULL) - return plugin.trim (connection_get_handle (conn), count, offset); + if (p->plugin.trim != NULL) + return p->plugin.trim (connection_get_handle (conn), count, offset); else { errno = EINVAL; return -1; } } -int -plugin_zero (struct connection *conn, +static int +plugin_zero (struct backend *b, struct connection *conn, uint32_t count, uint64_t offset, int may_trim) { - assert (dl); - assert (connection_get_handle (conn)); + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); char *buf; uint32_t limit; int result; int err = 0; + assert (connection_get_handle (conn)); + debug ("zero count=%" PRIu32 " offset=%" PRIu64 " may_trim=%d", count, offset, may_trim); if (!count) return 0; - if (plugin.zero) { + if (p->plugin.zero) { errno = 0; - result = plugin.zero (connection_get_handle (conn), count, offset, may_trim); + result = p->plugin.zero (connection_get_handle (conn), + count, offset, may_trim); if (result == -1) { err = threadlocal_get_error (); - if (!err && plugin_errno_is_preserved ()) + if (!err && plugin_errno_is_preserved (b)) err = errno; } if (result == 0 || err != EOPNOTSUPP) return result; } - assert (plugin.pwrite); + assert (p->plugin.pwrite); threadlocal_set_error (0); limit = count < MAX_REQUEST_SIZE ? count : MAX_REQUEST_SIZE; buf = calloc (limit, 1); @@ -525,7 +446,8 @@ plugin_zero (struct connection *conn, } while (count) { - result = plugin.pwrite (connection_get_handle (conn), buf, limit, offset); + result = p->plugin.pwrite (connection_get_handle (conn), + buf, limit, offset); if (result < 0) break; count -= limit; @@ -538,3 +460,134 @@ plugin_zero (struct connection *conn, errno = err; return result; } + +static struct backend plugin_functions = { + .free = plugin_free, + .thread_model = plugin_thread_model, + .name = plugin_name, + .usage = plugin_usage, + .version = plugin_version, + .dump_fields = plugin_dump_fields, + .config = plugin_config, + .config_complete = plugin_config_complete, + .errno_is_preserved = plugin_errno_is_preserved, + .open = plugin_open, + .close = plugin_close, + .get_size = plugin_get_size, + .can_write = plugin_can_write, + .can_flush = plugin_can_flush, + .is_rotational = plugin_is_rotational, + .can_trim = plugin_can_trim, + .pread = plugin_pread, + .pwrite = plugin_pwrite, + .flush = plugin_flush, + .trim = plugin_trim, + .zero = plugin_zero, +}; + +/* Register and load a plugin. */ +struct backend * +plugin_register (const char *filename, + void *dl, struct nbdkit_plugin *(*plugin_init) (void)) +{ + struct backend_plugin *p; + const struct nbdkit_plugin *plugin; + size_t i, len, size; + + p = malloc (sizeof *p); + if (p == NULL) { + out_of_memory: + perror ("strdup"); + exit (EXIT_FAILURE); + } + + p->backend = plugin_functions; + p->filename = strdup (filename); + if (p->filename == NULL) goto out_of_memory; + p->dl = dl; + + debug ("registering %s", p->filename); + + /* Call the initialization function which returns the address of the + * plugin's own 'struct nbdkit_plugin'. + */ + plugin = plugin_init (); + if (!plugin) { + fprintf (stderr, "%s: %s: plugin registration function failed\n", + program_name, p->filename); + exit (EXIT_FAILURE); + } + + /* Check for incompatible future versions. */ + if (plugin->_api_version != 1) { + fprintf (stderr, "%s: %s: plugin is incompatible with this version of nbdkit (_api_version = %d)\n", + program_name, p->filename, plugin->_api_version); + exit (EXIT_FAILURE); + } + + /* Since the plugin might be much older than the current version of + * nbdkit, only copy up to the self-declared _struct_size of the + * plugin and zero out the rest. If the plugin is much newer then + * we'll only call the "old" fields. + */ + size = sizeof p->plugin; /* our struct */ + memset (&p->plugin, 0, size); + if (size > plugin->_struct_size) + size = plugin->_struct_size; + memcpy (&p->plugin, plugin, size); + + /* Check for the minimum fields which must exist in the + * plugin struct. + */ + if (p->plugin.name == NULL) { + fprintf (stderr, "%s: %s: plugin must have a .name field\n", + program_name, p->filename); + exit (EXIT_FAILURE); + } + if (p->plugin.open == NULL) { + fprintf (stderr, "%s: %s: plugin must have a .open callback\n", + program_name, p->filename); + exit (EXIT_FAILURE); + } + if (p->plugin.get_size == NULL) { + fprintf (stderr, "%s: %s: plugin must have a .get_size callback\n", + program_name, p->filename); + exit (EXIT_FAILURE); + } + if (p->plugin.pread == NULL) { + fprintf (stderr, "%s: %s: plugin must have a .pread callback\n", + program_name, p->filename); + exit (EXIT_FAILURE); + } + + len = strlen (p->plugin.name); + if (len == 0) { + fprintf (stderr, "%s: %s: plugin.name field must not be empty\n", + program_name, p->filename); + exit (EXIT_FAILURE); + } + for (i = 0; i < len; ++i) { + if (!((p->plugin.name[i] >= '0' && p->plugin.name[i] <= '9') || + (p->plugin.name[i] >= 'a' && p->plugin.name[i] <= 'z') || + (p->plugin.name[i] >= 'A' && p->plugin.name[i] <= 'Z'))) { + fprintf (stderr, "%s: %s: plugin.name ('%s') field must contain only ASCII alphanumeric characters\n", + program_name, p->filename, p->plugin.name); + exit (EXIT_FAILURE); + } + } + /* Copy the module's name into local storage, so that plugin.name + * survives past unload. */ + if (!(p->plugin.name = strdup (p->plugin.name))) { + perror ("strdup"); + exit (EXIT_FAILURE); + } + + debug ("registered %s (name %s)", p->filename, p->plugin.name); + + /* Call the on-load callback if it exists. */ + debug ("%s: load", p->filename); + if (p->plugin.load) + p->plugin.load (); + + return (struct backend *) p; +} -- 2.14.3
Eric Blake
2018-Jan-19 13:40 UTC
[Libguestfs] [nbdkit PATCH v2 04/13] Fix const-correctness of backend pwrite method.
From: "Richard W.M. Jones" <rjones@redhat.com> Message-Id: <20180117205356.8699-4-rjones@redhat.com> --- src/internal.h | 2 +- src/plugins.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/internal.h b/src/internal.h index 9c4993d..c4ee51b 100644 --- a/src/internal.h +++ b/src/internal.h @@ -168,7 +168,7 @@ struct backend { int (*is_rotational) (struct backend *, struct connection *conn); int (*can_trim) (struct backend *, struct connection *conn); int (*pread) (struct backend *, struct connection *conn, void *buf, uint32_t count, uint64_t offset); - int (*pwrite) (struct backend *, struct connection *conn, void *buf, uint32_t count, uint64_t offset); + int (*pwrite) (struct backend *, struct connection *conn, const void *buf, uint32_t count, uint64_t offset); int (*flush) (struct backend *, struct connection *conn); int (*trim) (struct backend *, struct connection *conn, uint32_t count, uint64_t offset); int (*zero) (struct backend *, struct connection *conn, uint32_t count, uint64_t offset, int may_trim); diff --git a/src/plugins.c b/src/plugins.c index b687849..6a2ef66 100644 --- a/src/plugins.c +++ b/src/plugins.c @@ -355,7 +355,7 @@ plugin_pread (struct backend *b, struct connection *conn, static int plugin_pwrite (struct backend *b, struct connection *conn, - void *buf, uint32_t count, uint64_t offset) + const void *buf, uint32_t count, uint64_t offset) { struct backend_plugin *p = container_of (b, struct backend_plugin, backend); -- 2.14.3
Eric Blake
2018-Jan-19 13:40 UTC
[Libguestfs] [nbdkit PATCH v2 05/13] backend: Pass flags argument through
Although our plugin interface has to support older users that didn't take a flag parameter, we want to allow newer plugins that can honor flags, as well as insist that all filters take a flag argument. This will allow future expansion to conditionally honor any flags passed over the NBD protocol; with immediate plans to support NBD_CMD_FLAG_FUA. So, make the backend interface always pass flags for actions corresponding to NBD wire commands. The .zero interface is currently the only user of a flag; note that our NBDKIT_FLAG_MAY_TRIM preserves our old internal sense, which is intentionally opposite of the NBD protocol NBD_CMD_FLAG_NO_HOLE, and that our flag values are not the same as the on-the-wire flag values. Signed-off-by: Eric Blake <eblake@redhat.com> --- src/connections.c | 18 ++++++++++-------- src/internal.h | 14 ++++++++------ src/plugins.c | 18 ++++++++++++------ 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/connections.c b/src/connections.c index aac1d05..8446691 100644 --- a/src/connections.c +++ b/src/connections.c @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2013-2017 Red Hat Inc. + * Copyright (C) 2013-2018 Red Hat Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -871,6 +871,7 @@ handle_request (struct connection *conn, void *buf) { bool flush_after_command; + uint32_t f = 0; /* Flush after command performed? */ flush_after_command = (flags & NBD_CMD_FLAG_FUA) != 0; @@ -883,28 +884,29 @@ handle_request (struct connection *conn, switch (cmd) { case NBD_CMD_READ: - if (backend->pread (backend, conn, buf, count, offset) == -1) + if (backend->pread (backend, conn, buf, count, offset, 0) == -1) return get_error (conn); break; case NBD_CMD_WRITE: - if (backend->pwrite (backend, conn, buf, count, offset) == -1) + if (backend->pwrite (backend, conn, buf, count, offset, 0) == -1) return get_error (conn); break; case NBD_CMD_FLUSH: - if (backend->flush (backend, conn) == -1) + if (backend->flush (backend, conn, 0) == -1) return get_error (conn); break; case NBD_CMD_TRIM: - if (backend->trim (backend, conn, count, offset) == -1) + if (backend->trim (backend, conn, count, offset, 0) == -1) return get_error (conn); break; case NBD_CMD_WRITE_ZEROES: - if (backend->zero (backend, conn, count, offset, - !(flags & NBD_CMD_FLAG_NO_HOLE)) == -1) + if (!(flags & NBD_CMD_FLAG_NO_HOLE)) + f |= NBDKIT_FLAG_MAY_TRIM; + if (backend->zero (backend, conn, count, offset, f) == -1) return get_error (conn); break; @@ -912,7 +914,7 @@ handle_request (struct connection *conn, abort (); } - if (flush_after_command && backend->flush (backend, conn) == -1) + if (flush_after_command && backend->flush (backend, conn, 0) == -1) return get_error (conn); return 0; diff --git a/src/internal.h b/src/internal.h index c4ee51b..be1a0ca 100644 --- a/src/internal.h +++ b/src/internal.h @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2013-2017 Red Hat Inc. + * Copyright (C) 2013-2018 Red Hat Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -97,6 +97,8 @@ (type *) ((char *) __mptr - offsetof(type, member)); \ }) +#define NBDKIT_FLAG_MAY_TRIM (1<<0) /* Maps to !NBD_CMD_FLAG_NO_HOLE */ + /* main.c */ extern const char *exportname; extern const char *ipaddr; @@ -167,11 +169,11 @@ struct backend { int (*can_flush) (struct backend *, struct connection *conn); int (*is_rotational) (struct backend *, struct connection *conn); int (*can_trim) (struct backend *, struct connection *conn); - int (*pread) (struct backend *, struct connection *conn, void *buf, uint32_t count, uint64_t offset); - int (*pwrite) (struct backend *, struct connection *conn, const void *buf, uint32_t count, uint64_t offset); - int (*flush) (struct backend *, struct connection *conn); - int (*trim) (struct backend *, struct connection *conn, uint32_t count, uint64_t offset); - int (*zero) (struct backend *, struct connection *conn, uint32_t count, uint64_t offset, int may_trim); + int (*pread) (struct backend *, struct connection *conn, void *buf, uint32_t count, uint64_t offset, uint32_t flags); + int (*pwrite) (struct backend *, struct connection *conn, const void *buf, uint32_t count, uint64_t offset, uint32_t flags); + int (*flush) (struct backend *, struct connection *conn, uint32_t flags); + int (*trim) (struct backend *, struct connection *conn, uint32_t count, uint64_t offset, uint32_t flags); + int (*zero) (struct backend *, struct connection *conn, uint32_t count, uint64_t offset, uint32_t flags); }; extern struct backend *plugin_register (const char *_filename, void *_dl, struct nbdkit_plugin *(*plugin_init) (void)); diff --git a/src/plugins.c b/src/plugins.c index 6a2ef66..4c6c3a5 100644 --- a/src/plugins.c +++ b/src/plugins.c @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2013 Red Hat Inc. + * Copyright (C) 2013-2018 Red Hat Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -341,12 +341,13 @@ plugin_can_trim (struct backend *b, struct connection *conn) static int plugin_pread (struct backend *b, struct connection *conn, - void *buf, uint32_t count, uint64_t offset) + void *buf, uint32_t count, uint64_t offset, uint32_t flags) { struct backend_plugin *p = container_of (b, struct backend_plugin, backend); assert (connection_get_handle (conn)); assert (p->plugin.pread != NULL); + assert (!flags); debug ("pread count=%" PRIu32 " offset=%" PRIu64, count, offset); @@ -355,11 +356,12 @@ plugin_pread (struct backend *b, struct connection *conn, static int plugin_pwrite (struct backend *b, struct connection *conn, - const void *buf, uint32_t count, uint64_t offset) + const void *buf, uint32_t count, uint64_t offset, uint32_t flags) { struct backend_plugin *p = container_of (b, struct backend_plugin, backend); assert (connection_get_handle (conn)); + assert (!flags); debug ("pwrite count=%" PRIu32 " offset=%" PRIu64, count, offset); @@ -372,11 +374,12 @@ plugin_pwrite (struct backend *b, struct connection *conn, } static int -plugin_flush (struct backend *b, struct connection *conn) +plugin_flush (struct backend *b, struct connection *conn, uint32_t flags) { struct backend_plugin *p = container_of (b, struct backend_plugin, backend); assert (connection_get_handle (conn)); + assert (!flags); debug ("flush"); @@ -390,11 +393,12 @@ plugin_flush (struct backend *b, struct connection *conn) static int plugin_trim (struct backend *b, struct connection *conn, - uint32_t count, uint64_t offset) + uint32_t count, uint64_t offset, uint32_t flags) { struct backend_plugin *p = container_of (b, struct backend_plugin, backend); assert (connection_get_handle (conn)); + assert (!flags); debug ("trim count=%" PRIu32 " offset=%" PRIu64, count, offset); @@ -408,15 +412,17 @@ plugin_trim (struct backend *b, struct connection *conn, static int plugin_zero (struct backend *b, struct connection *conn, - uint32_t count, uint64_t offset, int may_trim) + uint32_t count, uint64_t offset, uint32_t flags) { struct backend_plugin *p = container_of (b, struct backend_plugin, backend); char *buf; uint32_t limit; int result; int err = 0; + int may_trim = (flags & NBDKIT_FLAG_MAY_TRIM) != 0; assert (connection_get_handle (conn)); + assert (!(flags & ~NBDKIT_FLAG_MAY_TRIM)); debug ("zero count=%" PRIu32 " offset=%" PRIu64 " may_trim=%d", count, offset, may_trim); -- 2.14.3
Eric Blake
2018-Jan-19 13:40 UTC
[Libguestfs] [nbdkit PATCH v2 06/13] plugins: Move FUA fallback to plugins
As long as the plugins couldn't directly handle Forced Unit Access (FUA), it made sense to have a common fallback in handle_request(). But upcoming patches will allow plugins to handle FUA, at which point the fallback is easier to implement on a per-command basis, via the just-added flags parameter in backend functions. Furthermore, the upcoming addition of filters means that we do not want to call .flush at the wrong layer of the backend stack, so it makes sense to have the fallback as low as possible. The NBD spec says that we must tolerate a client sending NBD_CMD_FLAG_FUA on any command (due to historical behavior of at least qemu sending it on READ), but that it only has to have defined semantics on commands that can cause write actions. So, we only pass the FUA flag through on WRITE, WRITE_ZEROES, and TRIM. Note that validate_request() already ensured that that we are not calling a write command if conn->readonly; so we no longer have to check that. For now, FUA support is synonymous with .can_flush support, because we are still implementing FUA via the fallback to .flush; but future patches will split the two conditions as part of wiring up further FUA support. It also means that we can assert that if we did not advertise FUA support to the client, then the client should not be requesting FUA; and therefore if the plugins layer sees the FUA flag, we must have a .flush callback. Signed-off-by: Eric Blake <eblake@redhat.com> --- src/connections.c | 25 +++++++++++-------- src/internal.h | 1 + src/plugins.c | 75 ++++++++++++++++++++++++++++++++++++------------------- 3 files changed, 64 insertions(+), 37 deletions(-) diff --git a/src/connections.c b/src/connections.c index 8446691..55bfe9e 100644 --- a/src/connections.c +++ b/src/connections.c @@ -816,6 +816,11 @@ validate_request (struct connection *conn, *error = EINVAL; return false; } + if (!conn->can_flush && (flags & NBD_CMD_FLAG_FUA)) { + nbdkit_error ("invalid request: FUA flag not supported"); + *error = EINVAL; + return false; + } /* Refuse over-large read and write requests. */ if ((cmd == NBD_CMD_WRITE || cmd == NBD_CMD_READ) && @@ -870,13 +875,8 @@ handle_request (struct connection *conn, uint16_t cmd, uint16_t flags, uint64_t offset, uint32_t count, void *buf) { - bool flush_after_command; uint32_t f = 0; - - /* Flush after command performed? */ - flush_after_command = (flags & NBD_CMD_FLAG_FUA) != 0; - if (!conn->can_flush || conn->readonly) - flush_after_command = false; + bool fua = conn->can_flush && (flags & NBD_CMD_FLAG_FUA); /* The plugin should call nbdkit_set_error() to request a particular error, otherwise we fallback to errno or EIO. */ @@ -889,7 +889,9 @@ handle_request (struct connection *conn, break; case NBD_CMD_WRITE: - if (backend->pwrite (backend, conn, buf, count, offset, 0) == -1) + if (fua) + f |= NBDKIT_FLAG_FUA; + if (backend->pwrite (backend, conn, buf, count, offset, f) == -1) return get_error (conn); break; @@ -899,13 +901,17 @@ handle_request (struct connection *conn, break; case NBD_CMD_TRIM: - if (backend->trim (backend, conn, count, offset, 0) == -1) + if (fua) + f |= NBDKIT_FLAG_FUA; + if (backend->trim (backend, conn, count, offset, f) == -1) return get_error (conn); break; case NBD_CMD_WRITE_ZEROES: if (!(flags & NBD_CMD_FLAG_NO_HOLE)) f |= NBDKIT_FLAG_MAY_TRIM; + if (fua) + f |= NBDKIT_FLAG_FUA; if (backend->zero (backend, conn, count, offset, f) == -1) return get_error (conn); break; @@ -914,9 +920,6 @@ handle_request (struct connection *conn, abort (); } - if (flush_after_command && backend->flush (backend, conn, 0) == -1) - return get_error (conn); - return 0; } diff --git a/src/internal.h b/src/internal.h index be1a0ca..c69ec25 100644 --- a/src/internal.h +++ b/src/internal.h @@ -98,6 +98,7 @@ }) #define NBDKIT_FLAG_MAY_TRIM (1<<0) /* Maps to !NBD_CMD_FLAG_NO_HOLE */ +#define NBDKIT_FLAG_FUA (1<<1) /* Maps to NBD_CMD_FLAG_FUA */ /* main.c */ extern const char *exportname; diff --git a/src/plugins.c b/src/plugins.c index 4c6c3a5..eff25d3 100644 --- a/src/plugins.c +++ b/src/plugins.c @@ -354,25 +354,6 @@ plugin_pread (struct backend *b, struct connection *conn, return p->plugin.pread (connection_get_handle (conn), buf, count, offset); } -static int -plugin_pwrite (struct backend *b, struct connection *conn, - const void *buf, uint32_t count, uint64_t offset, uint32_t flags) -{ - struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - - assert (connection_get_handle (conn)); - assert (!flags); - - debug ("pwrite count=%" PRIu32 " offset=%" PRIu64, count, offset); - - if (p->plugin.pwrite != NULL) - return p->plugin.pwrite (connection_get_handle (conn), buf, count, offset); - else { - errno = EROFS; - return -1; - } -} - static int plugin_flush (struct backend *b, struct connection *conn, uint32_t flags) { @@ -391,23 +372,58 @@ plugin_flush (struct backend *b, struct connection *conn, uint32_t flags) } } +static int +plugin_pwrite (struct backend *b, struct connection *conn, + const void *buf, uint32_t count, uint64_t offset, uint32_t flags) +{ + int r; + struct backend_plugin *p = container_of (b, struct backend_plugin, backend); + bool fua = flags & NBDKIT_FLAG_FUA; + + assert (connection_get_handle (conn)); + assert (!(flags & ~NBDKIT_FLAG_FUA)); + + debug ("pwrite count=%" PRIu32 " offset=%" PRIu64 " fua=%d", count, offset, + fua); + + if (p->plugin.pwrite != NULL) + r = p->plugin.pwrite (connection_get_handle (conn), buf, count, offset); + else { + errno = EROFS; + return -1; + } + if (r == 0 && fua) { + assert (p->plugin.flush); + r = plugin_flush (b, conn, 0); + } + return r; +} + static int plugin_trim (struct backend *b, struct connection *conn, uint32_t count, uint64_t offset, uint32_t flags) { + int r; struct backend_plugin *p = container_of (b, struct backend_plugin, backend); + bool fua = flags & NBDKIT_FLAG_FUA; assert (connection_get_handle (conn)); - assert (!flags); + assert (!(flags & ~NBDKIT_FLAG_FUA)); - debug ("trim count=%" PRIu32 " offset=%" PRIu64, count, offset); + debug ("trim count=%" PRIu32 " offset=%" PRIu64 " fua=%d", count, offset, + fua); if (p->plugin.trim != NULL) - return p->plugin.trim (connection_get_handle (conn), count, offset); + r = p->plugin.trim (connection_get_handle (conn), count, offset); else { errno = EINVAL; return -1; } + if (r == 0 && fua) { + assert (p->plugin.flush); + r = plugin_flush (b, conn, 0); + } + return r; } static int @@ -420,12 +436,13 @@ plugin_zero (struct backend *b, struct connection *conn, int result; int err = 0; int may_trim = (flags & NBDKIT_FLAG_MAY_TRIM) != 0; + bool fua = flags & NBDKIT_FLAG_FUA; assert (connection_get_handle (conn)); - assert (!(flags & ~NBDKIT_FLAG_MAY_TRIM)); + assert (!(flags & ~(NBDKIT_FLAG_MAY_TRIM | NBDKIT_FLAG_FUA))); - debug ("zero count=%" PRIu32 " offset=%" PRIu64 " may_trim=%d", - count, offset, may_trim); + debug ("zero count=%" PRIu32 " offset=%" PRIu64 " may_trim=%d fua=%d", + count, offset, may_trim, fua); if (!count) return 0; @@ -439,7 +456,7 @@ plugin_zero (struct backend *b, struct connection *conn, err = errno; } if (result == 0 || err != EOPNOTSUPP) - return result; + goto done; } assert (p->plugin.pwrite); @@ -464,6 +481,12 @@ plugin_zero (struct backend *b, struct connection *conn, err = errno; free (buf); errno = err; + + done: + if (!result && fua) { + assert (p->plugin.flush); + result = plugin_flush (b, conn, 0); + } return result; } -- 2.14.3
Eric Blake
2018-Jan-19 13:40 UTC
[Libguestfs] [nbdkit PATCH v2 07/13] backend: Add a .plugin_name method.
From: "Richard W.M. Jones" <rjones@redhat.com> This returns the plugin name, which for plugins is the same as the ordinary .name method (but for filters will be different). Message-Id: <20180117205356.8699-5-rjones@redhat.com> --- src/connections.c | 4 ++-- src/internal.h | 1 + src/plugins.c | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/connections.c b/src/connections.c index 55bfe9e..7a4c7bb 100644 --- a/src/connections.c +++ b/src/connections.c @@ -221,7 +221,7 @@ _handle_single_connection (int sockin, int sockout) if (backend->open (backend, conn, readonly) == -1) goto done; - threadlocal_set_name (backend->name (backend)); + threadlocal_set_name (backend->plugin_name (backend)); /* Handshake. */ if (negotiate_handshake (conn) == -1) @@ -253,7 +253,7 @@ _handle_single_connection (int sockin, int sockout) goto wait; } if (asprintf (&worker->name, - "%s.%d", backend->name (backend), nworkers) < 0) { + "%s.%d", backend->plugin_name (backend), nworkers) < 0) { perror ("asprintf"); set_status (conn, -1); free (worker); diff --git a/src/internal.h b/src/internal.h index c69ec25..9d01c2b 100644 --- a/src/internal.h +++ b/src/internal.h @@ -157,6 +157,7 @@ struct backend { void (*free) (struct backend *); int (*thread_model) (struct backend *); const char *(*name) (struct backend *); + const char *(*plugin_name) (struct backend *); void (*usage) (struct backend *); const char *(*version) (struct backend *); void (*dump_fields) (struct backend *); diff --git a/src/plugins.c b/src/plugins.c index eff25d3..4442a50 100644 --- a/src/plugins.c +++ b/src/plugins.c @@ -494,6 +494,7 @@ static struct backend plugin_functions = { .free = plugin_free, .thread_model = plugin_thread_model, .name = plugin_name, + .plugin_name = plugin_name, .usage = plugin_usage, .version = plugin_version, .dump_fields = plugin_dump_fields, -- 2.14.3
Eric Blake
2018-Jan-19 13:40 UTC
[Libguestfs] [nbdkit PATCH v2 08/13] connections: Allow multiple handles to be stored in the connection object.
From: "Richard W.M. Jones" <rjones@redhat.com> Previously only one handle could be stored, but we will need to store multiple handles when we have filters. The plugin handle is defined as index 0. Filters will use indices > 0. Message-Id: <20180117205356.8699-6-rjones@redhat.com> [eblake: rework for FUA support] Signed-off-by: Eric Blake <eblake@redhat.com> --- src/connections.c | 37 ++++++++++++++++++++++++++++++------- src/internal.h | 4 ++-- src/plugins.c | 53 +++++++++++++++++++++++++++-------------------------- 3 files changed, 59 insertions(+), 35 deletions(-) diff --git a/src/connections.c b/src/connections.c index 7a4c7bb..e225b5c 100644 --- a/src/connections.c +++ b/src/connections.c @@ -69,10 +69,12 @@ struct connection { pthread_mutex_t write_lock; pthread_mutex_t status_lock; int status; /* 1 for more I/O with client, 0 for shutdown, -1 on error */ - void *handle; void *crypto_session; int nworkers; + void **handles; + size_t nr_handles; + uint64_t exportsize; int readonly; int can_flush; @@ -100,16 +102,37 @@ static void raw_close (struct connection *); /* Accessors for public fields in the connection structure. * Everything else is private to this file. */ -void -connection_set_handle (struct connection *conn, void *handle) +int +connection_set_handle (struct connection *conn, size_t i, void *handle) { - conn->handle = handle; + size_t j; + + if (i < conn->nr_handles) + conn->handles[i] = handle; + else { + j = conn->nr_handles; + conn->nr_handles = i+1; + conn->handles = realloc (conn->handles, + conn->nr_handles * sizeof (void *)); + if (conn->handles == NULL) { + perror ("realloc"); + conn->nr_handles = 0; + return -1; + } + for (; j < i; ++j) + conn->handles[j] = NULL; + conn->handles[i] = handle; + } + return 0; } void * -connection_get_handle (struct connection *conn) +connection_get_handle (struct connection *conn, size_t i) { - return conn->handle; + if (i < conn->nr_handles) + return conn->handles[i]; + else + return NULL; } pthread_mutex_t * @@ -341,7 +364,7 @@ free_connection (struct connection *conn) * callback should always be called. */ if (!quit) { - if (conn->handle) + if (conn->nr_handles > 0 && conn->handles[0]) backend->close (backend, conn); } diff --git a/src/internal.h b/src/internal.h index 9d01c2b..28b1aaf 100644 --- a/src/internal.h +++ b/src/internal.h @@ -134,8 +134,8 @@ typedef int (*connection_recv_function) (struct connection *, void *buf, size_t typedef int (*connection_send_function) (struct connection *, const void *buf, size_t len); typedef void (*connection_close_function) (struct connection *); extern int handle_single_connection (int sockin, int sockout); -extern void connection_set_handle (struct connection *conn, void *handle); -extern void *connection_get_handle (struct connection *conn); +extern int connection_set_handle (struct connection *conn, size_t i, void *handle); +extern void *connection_get_handle (struct connection *conn, size_t i); extern pthread_mutex_t *connection_get_request_lock (struct connection *conn); extern void connection_set_crypto_session (struct connection *conn, void *session); extern void *connection_get_crypto_session (struct connection *conn); diff --git a/src/plugins.c b/src/plugins.c index 4442a50..137bae3 100644 --- a/src/plugins.c +++ b/src/plugins.c @@ -238,7 +238,7 @@ plugin_open (struct backend *b, struct connection *conn, int readonly) struct backend_plugin *p = container_of (b, struct backend_plugin, backend); void *handle; - assert (connection_get_handle (conn) == NULL); + assert (connection_get_handle (conn, 0) == NULL); assert (p->plugin.open != NULL); debug ("%s: open readonly=%d", p->filename, readonly); @@ -247,7 +247,7 @@ plugin_open (struct backend *b, struct connection *conn, int readonly) if (!handle) return -1; - connection_set_handle (conn, handle); + connection_set_handle (conn, 0, handle); return 0; } @@ -256,14 +256,14 @@ plugin_close (struct backend *b, struct connection *conn) { struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - assert (connection_get_handle (conn)); + assert (connection_get_handle (conn, 0)); debug ("close"); if (p->plugin.close) - p->plugin.close (connection_get_handle (conn)); + p->plugin.close (connection_get_handle (conn, 0)); - connection_set_handle (conn, NULL); + connection_set_handle (conn, 0, NULL); } static int64_t @@ -271,12 +271,12 @@ plugin_get_size (struct backend *b, struct connection *conn) { struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - assert (connection_get_handle (conn)); + assert (connection_get_handle (conn, 0)); assert (p->plugin.get_size != NULL); debug ("get_size"); - return p->plugin.get_size (connection_get_handle (conn)); + return p->plugin.get_size (connection_get_handle (conn, 0)); } static int @@ -284,12 +284,12 @@ plugin_can_write (struct backend *b, struct connection *conn) { struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - assert (connection_get_handle (conn)); + assert (connection_get_handle (conn, 0)); debug ("can_write"); if (p->plugin.can_write) - return p->plugin.can_write (connection_get_handle (conn)); + return p->plugin.can_write (connection_get_handle (conn, 0)); else return p->plugin.pwrite != NULL; } @@ -299,12 +299,12 @@ plugin_can_flush (struct backend *b, struct connection *conn) { struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - assert (connection_get_handle (conn)); + assert (connection_get_handle (conn, 0)); debug ("can_flush"); if (p->plugin.can_flush) - return p->plugin.can_flush (connection_get_handle (conn)); + return p->plugin.can_flush (connection_get_handle (conn, 0)); else return p->plugin.flush != NULL; } @@ -314,12 +314,12 @@ plugin_is_rotational (struct backend *b, struct connection *conn) { struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - assert (connection_get_handle (conn)); + assert (connection_get_handle (conn, 0)); debug ("is_rotational"); if (p->plugin.is_rotational) - return p->plugin.is_rotational (connection_get_handle (conn)); + return p->plugin.is_rotational (connection_get_handle (conn, 0)); else return 0; /* assume false */ } @@ -329,12 +329,12 @@ plugin_can_trim (struct backend *b, struct connection *conn) { struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - assert (connection_get_handle (conn)); + assert (connection_get_handle (conn, 0)); debug ("can_trim"); if (p->plugin.can_trim) - return p->plugin.can_trim (connection_get_handle (conn)); + return p->plugin.can_trim (connection_get_handle (conn, 0)); else return p->plugin.trim != NULL; } @@ -345,13 +345,13 @@ plugin_pread (struct backend *b, struct connection *conn, { struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - assert (connection_get_handle (conn)); + assert (connection_get_handle (conn, 0)); assert (p->plugin.pread != NULL); assert (!flags); debug ("pread count=%" PRIu32 " offset=%" PRIu64, count, offset); - return p->plugin.pread (connection_get_handle (conn), buf, count, offset); + return p->plugin.pread (connection_get_handle (conn, 0), buf, count, offset); } static int @@ -359,13 +359,13 @@ plugin_flush (struct backend *b, struct connection *conn, uint32_t flags) { struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - assert (connection_get_handle (conn)); + assert (connection_get_handle (conn, 0)); assert (!flags); debug ("flush"); if (p->plugin.flush != NULL) - return p->plugin.flush (connection_get_handle (conn)); + return p->plugin.flush (connection_get_handle (conn, 0)); else { errno = EINVAL; return -1; @@ -380,14 +380,15 @@ plugin_pwrite (struct backend *b, struct connection *conn, struct backend_plugin *p = container_of (b, struct backend_plugin, backend); bool fua = flags & NBDKIT_FLAG_FUA; - assert (connection_get_handle (conn)); + assert (connection_get_handle (conn, 0)); assert (!(flags & ~NBDKIT_FLAG_FUA)); debug ("pwrite count=%" PRIu32 " offset=%" PRIu64 " fua=%d", count, offset, fua); if (p->plugin.pwrite != NULL) - r = p->plugin.pwrite (connection_get_handle (conn), buf, count, offset); + r = p->plugin.pwrite (connection_get_handle (conn, 0), + buf, count, offset); else { errno = EROFS; return -1; @@ -407,14 +408,14 @@ plugin_trim (struct backend *b, struct connection *conn, struct backend_plugin *p = container_of (b, struct backend_plugin, backend); bool fua = flags & NBDKIT_FLAG_FUA; - assert (connection_get_handle (conn)); + assert (connection_get_handle (conn, 0)); assert (!(flags & ~NBDKIT_FLAG_FUA)); debug ("trim count=%" PRIu32 " offset=%" PRIu64 " fua=%d", count, offset, fua); if (p->plugin.trim != NULL) - r = p->plugin.trim (connection_get_handle (conn), count, offset); + r = p->plugin.trim (connection_get_handle (conn, 0), count, offset); else { errno = EINVAL; return -1; @@ -438,7 +439,7 @@ plugin_zero (struct backend *b, struct connection *conn, int may_trim = (flags & NBDKIT_FLAG_MAY_TRIM) != 0; bool fua = flags & NBDKIT_FLAG_FUA; - assert (connection_get_handle (conn)); + assert (connection_get_handle (conn, 0)); assert (!(flags & ~(NBDKIT_FLAG_MAY_TRIM | NBDKIT_FLAG_FUA))); debug ("zero count=%" PRIu32 " offset=%" PRIu64 " may_trim=%d fua=%d", @@ -448,7 +449,7 @@ plugin_zero (struct backend *b, struct connection *conn, return 0; if (p->plugin.zero) { errno = 0; - result = p->plugin.zero (connection_get_handle (conn), + result = p->plugin.zero (connection_get_handle (conn, 0), count, offset, may_trim); if (result == -1) { err = threadlocal_get_error (); @@ -469,7 +470,7 @@ plugin_zero (struct backend *b, struct connection *conn, } while (count) { - result = p->plugin.pwrite (connection_get_handle (conn), + result = p->plugin.pwrite (connection_get_handle (conn, 0), buf, limit, offset); if (result < 0) break; -- 2.14.3
From: "Richard W.M. Jones" <rjones@redhat.com> Filters can be placed in front of plugins to modify their behaviour. This commit introduces the <nbdkit-filter.h> header file, the manual page, the ‘filterdir’ directory (like ‘plugindir’), and the ‘filters/’ source directory which will contain both example and real filters. Message-Id: <20180117205356.8699-7-rjones@redhat.com> [eblake: update for FUA flag support] Signed-off-by: Eric Blake <eblake@redhat.com> --- Makefile.am | 2 +- TODO | 17 +- configure.ac | 3 +- docs/Makefile.am | 9 +- docs/nbdkit-filter.pod | 528 ++++++++++++++++++++++++++++++++++++++++++++++++ docs/nbdkit-plugin.pod | 3 +- docs/nbdkit.pod | 3 +- filters/Makefile.am | 33 +++ include/Makefile.am | 4 +- include/nbdkit-filter.h | 147 ++++++++++++++ include/nbdkit-plugin.h | 2 + src/Makefile.am | 5 +- src/main.c | 1 + src/nbdkit.pc.in | 1 + 14 files changed, 736 insertions(+), 22 deletions(-) create mode 100644 docs/nbdkit-filter.pod create mode 100644 filters/Makefile.am create mode 100644 include/nbdkit-filter.h diff --git a/Makefile.am b/Makefile.am index f3c88b0..9c5b4c3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -49,7 +49,7 @@ SUBDIRS = \ src if HAVE_PLUGINS -SUBDIRS += plugins +SUBDIRS += plugins filters endif SUBDIRS += tests diff --git a/TODO b/TODO index 0c027e2..0955db7 100644 --- a/TODO +++ b/TODO @@ -34,10 +34,8 @@ nbdkit there is no compelling reason unless the result is better than qemu-nbd. For the majority of users it would be better if they were directed to qemu-nbd for these use cases. -Filters -------- - -It should be possible to layer filters over plugins to do things like: +Suggestions for filters +----------------------- * adding artificial delays (see wdelay/rdelay options in the file plugin) @@ -50,17 +48,6 @@ It should be possible to layer filters over plugins to do things like: * export a single partition (like qemu-nbd -P) -A possible syntax would be: - - nbdkit --filter=delay [--filter=...] file file=foo wdelay=10 - -The filter(s) intercept all plugin calls and can either return, return -an error, or pass the call down to the next layer in the stack (and -eventually to the plugin). By intercepting the .config call the -filter can process its own parameters from the command line (wdelay=10 -in the example above), and by intercepting the .pread, .pwrite methods -the filter could inject the delaying behaviour. - Composing nbdkit ---------------- diff --git a/configure.ac b/configure.ac index a2950f6..7032614 100644 --- a/configure.ac +++ b/configure.ac @@ -181,7 +181,7 @@ AS_IF([test "x$POD2MAN" != "xno"],[ AM_CONDITIONAL([HAVE_POD2MAN], [test "x$POD2MAN" != "xno"]) AC_ARG_ENABLE([plugins], - [AS_HELP_STRING([--disable-plugins], [disable all bundled plugins])]) + [AS_HELP_STRING([--disable-plugins], [disable all bundled plugins and filters])]) AM_CONDITIONAL([HAVE_PLUGINS], [test "x$enable_plugins" != "xno"]) dnl Check for Perl, for embedding in the perl plugin. @@ -512,6 +512,7 @@ AC_CONFIG_FILES([Makefile plugins/tar/Makefile plugins/vddk/Makefile plugins/xz/Makefile + filters/Makefile src/Makefile src/nbdkit.pc tests/Makefile]) diff --git a/docs/Makefile.am b/docs/Makefile.am index 323f48d..d2330fb 100644 --- a/docs/Makefile.am +++ b/docs/Makefile.am @@ -33,6 +33,7 @@ EXTRA_DIST = \ nbdkit.pod \ nbdkit-plugin.pod + nbdkit-filter.pod CLEANFILES = *~ @@ -40,7 +41,8 @@ if HAVE_POD2MAN man_MANS = \ nbdkit.1 \ - nbdkit-plugin.3 + nbdkit-plugin.3 \ + nbdkit-filter.3 CLEANFILES += $(man_MANS) nbdkit.1: nbdkit.pod @@ -53,4 +55,9 @@ nbdkit-plugin.3: nbdkit-plugin.pod if grep 'POD ERROR' $@.t; then rm $@.t; exit 1; fi && \ mv $@.t $@ +nbdkit-filter.3: nbdkit-filter.pod + $(POD2MAN) $(POD2MAN_ARGS) --section=3 --name=nbdkit-filter $< $@.t && \ + if grep 'POD ERROR' $@.t; then rm $@.t; exit 1; fi && \ + mv $@.t $@ + endif diff --git a/docs/nbdkit-filter.pod b/docs/nbdkit-filter.pod new file mode 100644 index 0000000..330b8a4 --- /dev/null +++ b/docs/nbdkit-filter.pod @@ -0,0 +1,528 @@ +=encoding utf8 + +=head1 NAME + +nbdkit-filter - How to write nbdkit filters + +=head1 SYNOPSIS + + #include <nbdkit-filter.h> + + static int + myfilter_config (nbdkit_next_config *next, void *nxdata, + const char *key, const char *value) + { + if (strcmp (key, "myparameter") == 0) { + // ... + return 0; + } + else { + // pass through to next filter or plugin + return next (nxdata, key, value); + } + } + + static struct nbdkit_filter filter = { + .name = "filter", + .config = myfilter_config, + /* etc */ + }; + + NBDKIT_REGISTER_FILTER(filter) + +When this has been compiled to a shared library, do: + + nbdkit [--args ...] --filter=./myfilter.so plugin [key=value ...] + +When debugging, use the I<-fv> options: + + nbdkit -fv --filter=./myfilter.so plugin [key=value ...] + +=head1 DESCRIPTION + +One or more nbdkit filters can be placed in front of an nbdkit plugin +to modify the behaviour of the plugin. This manual page describes how +to create an nbdkit filter. + +Filters can be used for example to limit requests to an offset/limit, +add copy-on-write support, or inject delays or errors (for testing). + +Different filters can be stacked: + + NBD ┌─────────┐ ┌─────────┐ ┌────────┐ + client ───▶│ filter1 │───▶│ filter2 │── ─ ─ ──▶│ plugin │ + request └─────────┘ └─────────┘ └────────┘ + +Each filter intercepts plugin functions (see L<nbdkit-plugin(3)>) and +can call the next filter or plugin in the chain, modifying parameters, +calling before the filter function, in the middle or after. Filters +may even short-cut the chain. As an example, to process its own +parameters the filter can intercept the C<.config> method: + + static int + myfilter_config (nbdkit_next_config *next, void *nxdata, + const char *key, const char *value) + { + if (strcmp (key, "myparameter") == 0) { + // ... + // here you would handle this key, value + // ... + return 0; + } + else { + // pass through to next filter or plugin + return next (nxdata, key, value); + } + } + + static struct nbdkit_filter filter = { + // ... + .config = myfilter_config, + // ... + }; + +The call to C<next (nxdata, ...)> calls the C<.config> method of the +next filter or plugin in the chain. In the example above any +instances of C<myparameter=...> on the command line would not be seen +by the plugin. + +To see example filters, take a look at the source of nbdkit, in the +C<filters> directory. + +Filters must be written in C, must be fully thread safe, and have +tighter rules regarding what callbacks may do. While there is a +guarantee that plugins written against an older version of nbdkit will +still work with newer versions, filters do not have the same stability +guarantee, and nbdkit may refuse to use a filter that was compiled +against a different version rather than risk misbehavior. + +=head1 C<nbdkit-filter.h> + +All filters should start by including this header file: + + #include <nbdkit-filter.h> + +=head1 C<struct nbdkit_filter> + +All filters must define and register one C<struct nbdkit_filter>, +which contains the name of the filter and pointers to plugin methods +that the filter wants to intercept. + + static struct nbdkit_filter filter = { + .name = "filter", + .longname = "My Filter", + .description = "This is my great filter for nbdkit", + .config = myfilter_config, + /* etc */ + }; + + NBDKIT_REGISTER_FILTER(filter) + +The C<.name> field is the name of the filter. This is the only field +which is required. + +=head1 NEXT PLUGIN + +F<nbdkit-filter.h> defines two function types (C<nbdkit_next_config>, +C<nbdkit_next_config_complete>) and a structure called C<struct +nbdkit_next>. These abstract the next plugin or filter in the chain. +There is also an opaque pointer C<nxdata> which must be passed along +when calling these functions. + +The filter’s C<.config> and C<.config_complete> methods may only call +the next C<.config> or C<.config_complete> method in the chain +(optionally). + +The filter’s C<.open> and C<.close> methods are called when a new +connection is opened or an old connection closed, and these have no +C<next> parameter because they cannot be short-circuited. + +The filter’s other methods like C<.get_size>, C<.pread> etc ― always +called in the context of a connection ― are passed a pointer to +C<struct nbdkit_next> which contains a subset of the plugin methods +that can be called during a connection. It is possible for a filter +to issue (for example) extra read calls in response to a single +C<.pwrite> call. + +You can modify parameters when you call the C<next> function. However +be careful when modifying strings because for some methods +(eg. C<.config>) the plugin may save the string pointer that you pass +along. So you may have to ensure that the string is not freed for the +lifetime of the server. + +Note that if your filter registers a callback but in that callback it +doesn't call the C<next> function then the corresponding method in the +plugin will never be called. + +=head1 CALLBACKS + +C<struct nbdkit_filter> has some static fields describing the filter +and optional callback functions which can be used to intercept plugin +methods. + +=head2 C<.name> + + const char *name; + +This field (a string) is required, and B<must> contain only ASCII +alphanumeric characters and be unique amongst all filters. + +=head2 C<.version> + + const char *version; + +Filters may optionally set a version string which is displayed in help +and debugging output. + +=head2 C<.longname> + + const char *longname; + +An optional free text name of the filter. This field is used in error +messages. + +=head2 C<.description> + + const char *description; + +An optional multi-line description of the filter. + +=head2 C<.load> + + void load (void); + +This is called once just after the filter is loaded into memory. You +can use this to perform any global initialization needed by the +filter. + +=head2 C<.unload> + + void unload (void); + +This may be called once just before the filter is unloaded from +memory. Note that it's not guaranteed that C<.unload> will always be +called (eg. the server might be killed or segfault), so you should try +to make the filter as robust as possible by not requiring cleanup. +See also L<nbdkit-plugin(3)/SHUTDOWN>. + +=head2 C<.config> + + int (*config) (nbdkit_next_config *next, void *nxdata, + const char *key, const char *value); + +This intercepts the plugin C<.config> method and can be used by the +filter to parse its own command line parameters. You should try to +make sure that command line parameter keys that the filter uses do not +conflict with ones that could be used by a plugin. + +If there is an error, C<.config> should call C<nbdkit_error> with an +error message and return C<-1>. + +=head2 C<.config_complete> + + int (*config_complete) (nbdkit_next_config_complete *next, void *nxdata); + +This intercepts the plugin C<.config_complete> method and can be used +to ensure that all parameters needed by the filter were supplied on +the command line. + +If there is an error, C<.config_complete> should call C<nbdkit_error> +with an error message and return C<-1>. + +=head2 C<.config_help> + + const char *config_help; + +This optional multi-line help message should summarize any +C<key=value> parameters that it takes. It does I<not> need to repeat +what already appears in C<.description>. + +If the filter doesn't take any config parameters you should probably +omit this. + +=head2 C<.open> + + void * (*open) (int readonly); + +This is called when a new client connection is opened and can be used +to allocate any per-connection data structures needed by the filter. +The handle (which is not the same as the plugin handle) is passed back +to other filter callbacks and could be freed in the C<.close> +callback. + +Note that the handle is completely opaque to nbdkit, but it must not +be NULL. + +If there is an error, C<.open> should call C<nbdkit_error> with an +error message and return C<NULL>. + +=head2 C<.close> + + void (*close) (void *handle); + +This is called when the client closes the connection. It should clean +up any per-connection resources used by the filter. + +=head2 C<.get_size> + + int64_t (*get_size) (struct nbdkit_next *next, void *nxdata, + void *handle); + +This intercepts the plugin C<.get_size> method and can be used to read +or modify the apparent size of the block device that the NBD client +will see. + +The returned size must be E<ge> 0. If there is an error, C<.get_size> +should call C<nbdkit_error> with an error message and return C<-1>. + +=head2 C<.can_write> + +=head2 C<.can_flush> + +=head2 C<.is_rotational> + +=head2 C<.can_trim> + + int (*can_write) (struct nbdkit_next *next, void *nxdata, + void *handle); + int (*can_flush) (struct nbdkit_next *next, void *nxdata, + void *handle); + int (*is_rotational) (struct nbdkit_next *next, + void *nxdata, + void *handle); + int (*can_trim) (struct nbdkit_next *next, void *nxdata, + void *handle); + +These intercept the corresponding plugin methods. + +If there is an error, the callback should call C<nbdkit_error> with an +error message and return C<-1>. + +=head2 C<.pread> + + int (*pread) (struct nbdkit_next *next, void *nxdata, + void *handle, void *buf, uint32_t count, uint64_t offset, + uint32_t flags); + +This intercepts the plugin C<.pread> method and can be used to read or +modify data read by the plugin. + +At this time, flags will be 0 on input, and the filter should not pass +any flags to C<next->pread>. + +If there is an error (including a short read which couldn't be +recovered from), C<.pread> should call C<nbdkit_error> with an error +message B<and> set C<errno>, then return C<-1>. + +=head2 C<.pwrite> + + int (*pwrite) (struct nbdkit_next *next, void *nxdata, + void *handle, + const void *buf, uint32_t count, uint64_t offset, + uint32_t flags); + +This intercepts the plugin C<.pwrite> method and can be used to modify +data written by the plugin. + +At this time, flags may include C<NBDKIT_FLAG_FUA> on input based on +the result of C<.can_flush>. In turn, the filter may only pass +C<NBDKIT_FLAG_FUA> on to C<next->pwrite> if C<next->can_flush> +returned true. + +This function will not be called if C<.can_write> returned false; in +turn, the filter should not call C<next->pwrite> if C<next->can_write> +did not return true. + +If there is an error (including a short write which couldn't be +recovered from), C<.pwrite> should call C<nbdkit_error> with an error +message B<and> set C<errno>, then return C<-1>. + +=head2 C<.flush> + + int (*flush) (struct nbdkit_next *next, void *nxdata, + void *handle, uint32_t flags); + +This intercepts the plugin C<.flush> method and can be used to modify +flush requests. This will only b + +At this time, flags will be 0 on input, and the filter should not pass +any flags to C<next->flush>. + +This function will not be called if C<.can_flush> returned false; in +turn, the filter should not call C<next->flush> if C<next->can_flush> +did not return true. + +If there is an error, C<.flush> should call C<nbdkit_error> with an +error message B<and> set C<errno>, then return C<-1>. + +=head2 C<.trim> + + int (*trim) (struct nbdkit_next *next, void *nxdata, + void *handle, uint32_t count, uint64_t offset, uint32_t flags); + +This intercepts the plugin C<.trim> method and can be used to modify +trim requests. + +At this time, flags may include C<NBDKIT_FLAG_FUA> on input based on +the result of C<.can_flush>. In turn, the filter may only pass +C<NBDKIT_FLAG_FUA> on to C<next->trim> if C<next->can_flush> +returned true. + +This function will not be called if C<.can_trim> returned false; in +turn, the filter should not call C<next->trim> if C<next->can_trim> +did not return true. + +If there is an error, C<.trim> should call C<nbdkit_error> with an +error message B<and> set C<errno>, then return C<-1>. + +=head2 C<.zero> + + int (*zero) (struct nbdkit_next *next, void *nxdata, + void *handle, uint32_t count, uint64_t offset, + uint32_t flags); + +This intercepts the plugin C<.zero> method and can be used to modify +zero requests. + +At this time, flags may include C<NBDKIT_FLAG_MAY_TRIM> +unconditionally, and C<NBDKIT_FLAG_FUA> based on the result of +C<.can_flush>. In turn, when calling C<next->zero>, the filter may +pass C<NBDKIT_FLAG_MAY_TRIM> unconditionally, but may only pass +C<NBDKIT_FLAG_FUA> if C<next->can_flush> returned true. + +This function will not be called if C<.can_write> returned false; in +turn, the filter should not call C<next->zero> if C<next->can_write> +did not return true. + +If there is an error, C<.zero> should call C<nbdkit_error> with an +error message B<and> set C<errno>, then return C<-1>; however, +unlike plugins, this function must not return the C<EOPNOTSUPP> +error (the code guarantees that C<next->zero> will have already +done a fallback to C<next->write> rather than fail with that +particular error, and the fallback must not be performed more +than once). + +=head1 THREADS + +Because filters can be mixed and used with any plugin and thus any +threading model supported by L<nbdkit-plugin(3)>, filters must be +thread safe. They must be able to handle concurrent requests even on +the same handle. + +Filters may have to use pthread primitives like mutexes to achieve +this. + +=head1 DEBUGGING + +Run the server with I<-f> and I<-v> options so it doesn't fork and you +can see debugging information: + + nbdkit -fv --filter=./myfilter.so plugin [key=value [key=value [...]]] + +To print debugging information from within the filter, call +C<nbdkit_debug>, which has the following prototype and works like +L<printf(3)>: + + void nbdkit_debug (const char *fs, ...); + void nbdkit_vdebug (const char *fs, va_list args); + +For convenience, C<nbdkit_debug> preserves the value of C<errno>. +Note that C<nbdkit_debug> only prints things when the server is in +verbose mode (I<-v> option). + +=head1 INSTALLING THE FILTER + +The filter is a C<*.so> file and possibly a manual page. You can of +course install the filter C<*.so> file wherever you want, and users +will be able to use it by running: + + nbdkit --filter=/path/to/filter.so plugin [args] + +However B<if> the shared library has a name of the form +C<nbdkit-I<name>-filter.so> B<and if> the library is installed in the +C<$filterdir> directory, then users can be run it by only typing: + + nbdkit --filter=name plugin [args] + +The location of the C<$filterdir> directory is set when nbdkit is +compiled and can be found by doing: + + nbdkit --dump-config + +If using the pkg-config/pkgconf system then you can also find the +filter directory at compile time by doing: + + pkgconf nbdkit --variable=filterdir + +=head1 PKG-CONFIG/PKGCONF + +nbdkit provides a pkg-config/pkgconf file called C<nbdkit.pc> which +should be installed on the correct path when the nbdkit development +environment is installed. You can use this in autoconf +F<configure.ac> scripts to test for the development environment: + + PKG_CHECK_MODULES([NBDKIT], [nbdkit >= 1.2.3]) + +The above will fail unless nbdkit E<ge> 1.2.3 and the header file is +installed, and will set C<NBDKIT_CFLAGS> and C<NBDKIT_LIBS> +appropriately for compiling filters. + +You can also run pkg-config/pkgconf directly, for example: + + if ! pkgconf nbdkit --exists; then + echo "you must install the nbdkit development environment" + exit 1 + fi + +=head1 SEE ALSO + +L<nbdkit(1)>, +L<nbdkit-plugin(1)>. + +=head1 AUTHORS + +Richard W.M. Jones + +=head1 COPYRIGHT + +Copyright (C) 2013-2018 Red Hat Inc. + +=head1 LICENSE + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +=over 4 + +=item * + +Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +=item * + +Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +=item * + +Neither the name of Red Hat nor the names of its contributors may be +used to endorse or promote products derived from this software without +specific prior written permission. + +=back + +THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. diff --git a/docs/nbdkit-plugin.pod b/docs/nbdkit-plugin.pod index 9abf75f..3cafc42 100644 --- a/docs/nbdkit-plugin.pod +++ b/docs/nbdkit-plugin.pod @@ -692,6 +692,7 @@ and then users will be able to run it like this: =head1 SEE ALSO L<nbdkit(1)>, +L<nbdkit-filter(3)>, L<nbdkit-example1-plugin(1)>, L<nbdkit-example2-plugin(1)>, L<nbdkit-example3-plugin(1)>, @@ -711,7 +712,7 @@ Pino Toscano =head1 COPYRIGHT -Copyright (C) 2013-2017 Red Hat Inc. +Copyright (C) 2013-2018 Red Hat Inc. =head1 LICENSE diff --git a/docs/nbdkit.pod b/docs/nbdkit.pod index 1687ac9..3b37db8 100644 --- a/docs/nbdkit.pod +++ b/docs/nbdkit.pod @@ -856,6 +856,7 @@ L</SOCKET ACTIVATION>. Other nbdkit manual pages: L<nbdkit-plugin(3)>, +L<nbdkit-filter(3)>, L<nbdkit-curl-plugin(1)>, L<nbdkit-example1-plugin(1)>, L<nbdkit-example2-plugin(1)>, @@ -895,7 +896,7 @@ Pino Toscano =head1 COPYRIGHT -Copyright (C) 2013-2017 Red Hat Inc. +Copyright (C) 2013-2018 Red Hat Inc. =head1 LICENSE diff --git a/filters/Makefile.am b/filters/Makefile.am new file mode 100644 index 0000000..ed1580b --- /dev/null +++ b/filters/Makefile.am @@ -0,0 +1,33 @@ +# nbdkit +# Copyright (C) 2013-2018 Red Hat Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of Red Hat nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +#SUBDIRS diff --git a/include/Makefile.am b/include/Makefile.am index 7d54215..deccc6b 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -30,4 +30,6 @@ # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. -include_HEADERS = nbdkit-plugin.h +include_HEADERS = \ + nbdkit-plugin.h \ + nbdkit-filter.h diff --git a/include/nbdkit-filter.h b/include/nbdkit-filter.h new file mode 100644 index 0000000..27d2b2c --- /dev/null +++ b/include/nbdkit-filter.h @@ -0,0 +1,147 @@ +/* nbdkit + * Copyright (C) 2013-2018 Red Hat Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Red Hat nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* See nbdkit-filter(3) for documentation and how to write a filter. */ + +#ifndef NBDKIT_FILTER_H +#define NBDKIT_FILTER_H + +/* This header also defines some useful functions like nbdkit_debug + * and nbdkit_parse_size which are appropriate for filters to use. + */ +#include <nbdkit-plugin.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define NBDKIT_FILTER_API_VERSION 1 + +typedef int nbdkit_next_config (void *nxdata, + const char *key, const char *value); +typedef int nbdkit_next_config_complete (void *nxdata); + +struct nbdkit_next { + int64_t (*get_size) (void *nxdata); + + int (*can_write) (void *nxdata); + int (*can_flush) (void *nxdata); + int (*is_rotational) (void *nxdata); + int (*can_trim) (void *nxdata); + + int (*pread) (void *nxdata, void *buf, uint32_t count, uint64_t offset, + uint32_t flags); + int (*pwrite) (void *nxdata, const void *buf, uint32_t count, + uint64_t offset, uint32_t flags); + int (*flush) (void *nxdata, uint32_t flags); + int (*trim) (void *nxdata, uint32_t count, uint64_t offset, uint32_t flags); + int (*zero) (void *nxdata, uint32_t count, uint64_t offset, uint32_t flags); +}; + +struct nbdkit_filter { + /* Do not set these fields directly; use NBDKIT_REGISTER_FILTER. + * They exist so that we can recognize filters compiled against + * one version of the header with a runtime compiled against a + * different version with more (or fewer) fields. + */ + uint64_t _struct_size; + int _api_version; + + /* New fields will only be added at the end of the struct. */ + const char *name; + const char *longname; + const char *version; + const char *description; + + void (*load) (void); + void (*unload) (void); + + int (*config) (nbdkit_next_config *next, void *nxdata, + const char *key, const char *value); + int (*config_complete) (nbdkit_next_config_complete *next, void *nxdata); + const char *config_help; + + void * (*open) (int readonly); + void (*close) (void *handle); + + int64_t (*get_size) (struct nbdkit_next *next, void *nxdata, + void *handle); + + int (*can_write) (struct nbdkit_next *next, void *nxdata, + void *handle); + int (*can_flush) (struct nbdkit_next *next, void *nxdata, + void *handle); + int (*is_rotational) (struct nbdkit_next *next, + void *nxdata, + void *handle); + int (*can_trim) (struct nbdkit_next *next, void *nxdata, + void *handle); + + int (*pread) (struct nbdkit_next *next, void *nxdata, + void *handle, void *buf, uint32_t count, uint64_t offset, + uint32_t flags); + int (*pwrite) (struct nbdkit_next *next, void *nxdata, + void *handle, + const void *buf, uint32_t count, uint64_t offset, + uint32_t flags); + int (*flush) (struct nbdkit_next *next, void *nxdata, + void *handle, uint32_t flags); + int (*trim) (struct nbdkit_next *next, void *nxdata, + void *handle, uint32_t count, uint64_t offset, uint32_t flags); + int (*zero) (struct nbdkit_next *next, void *nxdata, + void *handle, uint32_t count, uint64_t offset, uint32_t flags); +}; + +#ifndef NBDKIT_CXX_LANG_C +#ifdef __cplusplus +#define NBDKIT_CXX_LANG_C extern "C" +#else +#define NBDKIT_CXX_LANG_C /* nothing */ +#endif +#endif + +#define NBDKIT_REGISTER_FILTER(filter) \ + NBDKIT_CXX_LANG_C \ + struct nbdkit_filter * \ + filter_init (void) \ + { \ + (filter)._struct_size = sizeof (filter); \ + (filter)._api_version = NBDKIT_API_VERSION; \ + return &(filter); \ + } + +#ifdef __cplusplus +} +#endif + +#endif /* NBDKIT_FILTER_H */ diff --git a/include/nbdkit-plugin.h b/include/nbdkit-plugin.h index 2ec3b15..13541e5 100644 --- a/include/nbdkit-plugin.h +++ b/include/nbdkit-plugin.h @@ -111,11 +111,13 @@ extern char *nbdkit_absolute_path (const char *path); extern int64_t nbdkit_parse_size (const char *str); extern int nbdkit_read_password (const char *value, char **password); +#ifndef NBDKIT_CXX_LANG_C #ifdef __cplusplus #define NBDKIT_CXX_LANG_C extern "C" #else #define NBDKIT_CXX_LANG_C /* nothing */ #endif +#endif #define NBDKIT_REGISTER_PLUGIN(plugin) \ NBDKIT_CXX_LANG_C \ diff --git a/src/Makefile.am b/src/Makefile.am index 1f05eab..6033fe5 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -31,6 +31,7 @@ # SUCH DAMAGE. plugindir = $(libdir)/nbdkit/plugins +filterdir = $(libdir)/nbdkit/filters sbin_PROGRAMS = nbdkit @@ -47,13 +48,15 @@ nbdkit_SOURCES = \ sockets.c \ threadlocal.c \ utils.c \ - $(top_srcdir)/include/nbdkit-plugin.h + $(top_srcdir)/include/nbdkit-plugin.h \ + $(top_srcdir)/include/nbdkit-filter.h nbdkit_CPPFLAGS = \ -Dbindir=\"$(bindir)\" \ -Dlibdir=\"$(libdir)\" \ -Dmandir=\"$(mandir)\" \ -Dplugindir=\"$(plugindir)\" \ + -Dfilterdir=\"$(filterdir)\" \ -Dsbindir=\"$(sbindir)\" \ -Dsysconfdir=\"$(sysconfdir)\" \ -I$(top_srcdir)/include diff --git a/src/main.c b/src/main.c index b3e6bad..4790c46 100644 --- a/src/main.c +++ b/src/main.c @@ -179,6 +179,7 @@ dump_config (void) printf ("%s=%s\n", "mandir", mandir); printf ("%s=%s\n", "name", PACKAGE_NAME); printf ("%s=%s\n", "plugindir", plugindir); + printf ("%s=%s\n", "filterdir", filterdir); printf ("%s=%s\n", "root_tls_certificates_dir", root_tls_certificates_dir); printf ("%s=%s\n", "sbindir", sbindir); #ifdef HAVE_LIBSELINUX diff --git a/src/nbdkit.pc.in b/src/nbdkit.pc.in index cbb301d..fe8f511 100644 --- a/src/nbdkit.pc.in +++ b/src/nbdkit.pc.in @@ -3,6 +3,7 @@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ plugindir=@libdir@/nbdkit/plugins +filterdir=@libdir@/nbdkit/filters Name: @PACKAGE_NAME@ Version: @PACKAGE_VERSION@ -- 2.14.3
From: "Richard W.M. Jones" <rjones@redhat.com> Also implements the --filters parameter. Message-Id: <20180117205356.8699-8-rjones@redhat.com> [eblake: update for FUA flag support] Signed-off-by: Eric Blake <eblake@redhat.com> --- docs/nbdkit.pod | 21 +- nbdkit.in | 17 +- src/Makefile.am | 1 + src/filters.c | 613 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/internal.h | 21 +- src/main.c | 114 +++++++++-- src/plugins.c | 11 +- 7 files changed, 778 insertions(+), 20 deletions(-) create mode 100644 src/filters.c diff --git a/docs/nbdkit.pod b/docs/nbdkit.pod index 3b37db8..636eedc 100644 --- a/docs/nbdkit.pod +++ b/docs/nbdkit.pod @@ -7,7 +7,7 @@ nbdkit - A toolkit for creating NBD servers =head1 SYNOPSIS nbdkit [-e EXPORTNAME] [--exit-with-parent] [-f] - [-g GROUP] [-i IPADDR] + [--filter=FILTER ...] [-g GROUP] [-i IPADDR] [--newstyle] [--oldstyle] [-P PIDFILE] [-p PORT] [-r] [--run CMD] [-s] [--selinux-label LABEL] [-t THREADS] [--tls=off|on|require] [--tls-certificates /path/to/certificates] @@ -119,6 +119,13 @@ not allowed with the oldstyle protocol. I<Don't> fork into the background. +=item B<--filter> FILTER + +Add a filter before the plugin. This option may be given one or more +times to stack filters in front of the plugin. They are processed in +the order they appear on the command line. See L</FILTERS> and +L<nbdkit-filter(3)>. + =item B<-g> GROUP =item B<--group> GROUP @@ -354,6 +361,18 @@ languages. The file should be executable. For example: (see L<nbdkit-perl-plugin(3)> for a full example). +=head1 FILTERS + +One or more filters can be placed in front of an nbdkit plugin to +modify the behaviour of the plugin, using the I<--filter> parameter. +Filters can be used for example to limit requests to an offset/limit, +add copy-on-write support, or inject delays or errors (for testing). + +Several existing filters are available in the C<$filterdir>. Use +C<nbdkit --dump-config> to find the directory name. + +How to write filters is described in L<nbdkit-filter(3)>. + =head1 SOCKET ACTIVATION nbdkit supports socket activation (sometimes called systemd socket diff --git a/nbdkit.in b/nbdkit.in index 20bc9c0..d4fe4e0 100644 --- a/nbdkit.in +++ b/nbdkit.in @@ -1,7 +1,7 @@ #!/bin/bash - # @configure_input@ -# Copyright (C) 2017 Red Hat Inc. +# Copyright (C) 2017-2018 Red Hat Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -79,6 +79,21 @@ while [ $# -gt 0 ]; do shift ;; + # Filters can be rewritten if purely alphanumeric. + --filter) + args[$i]="--filter" + ((++i)) + if [[ "$2" =~ ^[a-zA-Z0-9]+$ ]]; then + if [ -x "$b/filters/$2/.libs/nbdkit-$2-filter.so" ]; then + args[$i]="$b/filters/$2/.libs/nbdkit-$2-filter.so" + else + args[$i]="$2" + fi + fi + ((++i)) + shift 2 + ;; + # Anything else can be rewritten if it's purely alphanumeric, # but there is only one module name so only rewrite once. *) diff --git a/src/Makefile.am b/src/Makefile.am index 6033fe5..ae16fde 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -40,6 +40,7 @@ nbdkit_SOURCES = \ connections.c \ crypto.c \ errors.c \ + filters.c \ internal.h \ locks.c \ main.c \ diff --git a/src/filters.c b/src/filters.c new file mode 100644 index 0000000..093221c --- /dev/null +++ b/src/filters.c @@ -0,0 +1,613 @@ +/* nbdkit + * Copyright (C) 2013-2018 Red Hat Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Red Hat nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <inttypes.h> +#include <assert.h> +#include <errno.h> + +#include <dlfcn.h> + +#include "nbdkit-filter.h" +#include "internal.h" + +/* We extend the generic backend struct with extra fields relating + * to this filter. + */ +struct backend_filter { + struct backend backend; + char *filename; + void *dl; + struct nbdkit_filter filter; +}; + +/* Note this frees the whole chain. */ +static void +filter_free (struct backend *b) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + + f->backend.next->free (f->backend.next); + + /* Acquiring this lock prevents any filter callbacks from running + * simultaneously. + */ + lock_unload (); + + debug ("%s: unload", f->filename); + if (f->filter.unload) + f->filter.unload (); + + dlclose (f->dl); + free (f->filename); + + unlock_unload (); + + free (f); +} + +/* These are actually passing through to the final plugin, hence + * the function names. + */ +static int +plugin_thread_model (struct backend *b) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + + return f->backend.next->thread_model (f->backend.next); +} + +static int +plugin_errno_is_preserved (struct backend *b) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + + return f->backend.next->errno_is_preserved (f->backend.next); +} + +static const char * +plugin_name (struct backend *b) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + + return f->backend.next->name (f->backend.next); +} + +static const char * +filter_name (struct backend *b) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + + return f->filter.name; +} + +static const char * +filter_version (struct backend *b) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + + return f->filter.version; +} + +static void +filter_usage (struct backend *b) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + + printf ("filter: %s", f->filter.name); + if (f->filter.longname) + printf (" (%s)", f->filter.longname); + printf ("\n"); + printf ("(%s)", f->filename); + if (f->filter.description) { + printf ("\n"); + printf ("%s\n", f->filter.description); + } + if (f->filter.config_help) { + printf ("\n"); + printf ("%s\n", f->filter.config_help); + } +} + +static void +filter_dump_fields (struct backend *b) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + + f->backend.next->dump_fields (f->backend.next); +} + +static int +next_config (void *nxdata, const char *key, const char *value) +{ + struct backend *b = nxdata; + b->config (b, key, value); + return 0; +} + +static void +filter_config (struct backend *b, const char *key, const char *value) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + + debug ("%s: config key=%s, value=%s", + f->filename, key, value); + + if (f->filter.config) { + if (f->filter.config (next_config, f->backend.next, key, value) == -1) + exit (EXIT_FAILURE); + } + else + f->backend.next->config (f->backend.next, key, value); +} + +static int +next_config_complete (void *nxdata) +{ + struct backend *b = nxdata; + b->config_complete (b); + return 0; +} + +static void +filter_config_complete (struct backend *b) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + + debug ("%s: config_complete", f->filename); + + if (f->filter.config_complete) { + if (f->filter.config_complete (next_config_complete, f->backend.next) == -1) + exit (EXIT_FAILURE); + } + else + f->backend.next->config_complete (f->backend.next); +} + +static int +filter_open (struct backend *b, struct connection *conn, int readonly) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + void *handle = NULL; + + debug ("%s: open readonly=%d", f->filename, readonly); + + if (f->filter.open) { + handle = f->filter.open (readonly); + if (handle == NULL) + return -1; + } + connection_set_handle (conn, f->backend.i, handle); + return f->backend.next->open (f->backend.next, conn, readonly); +} + +static void +filter_close (struct backend *b, struct connection *conn) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + void *handle = connection_get_handle (conn, f->backend.i); + + debug ("close"); + + if (f->filter.close) + f->filter.close (handle); + f->backend.next->close (f->backend.next, conn); +} + +/* The next_functions structure contains pointers to backend + * functions. However because these functions are all expecting a + * backend and a connection, we cannot call them directly, but must + * write some next_* functions that unpack the two parameters from a + * single ‘void *nxdata’ struct pointer (‘b_conn’). + */ + +/* Literally a backend + a connection pointer. This is the + * implementation if ‘void *nxdata’ in the filter API. + */ +struct b_conn { + struct backend *b; + struct connection *conn; +}; + +static int64_t +next_get_size (void *nxdata) +{ + struct b_conn *b_conn = nxdata; + return b_conn->b->get_size (b_conn->b, b_conn->conn); +} + +static int +next_can_write (void *nxdata) +{ + struct b_conn *b_conn = nxdata; + return b_conn->b->can_write (b_conn->b, b_conn->conn); +} + +static int +next_can_flush (void *nxdata) +{ + struct b_conn *b_conn = nxdata; + return b_conn->b->can_flush (b_conn->b, b_conn->conn); +} + +static int +next_is_rotational (void *nxdata) +{ + struct b_conn *b_conn = nxdata; + return b_conn->b->is_rotational (b_conn->b, b_conn->conn); +} + +static int +next_can_trim (void *nxdata) +{ + struct b_conn *b_conn = nxdata; + return b_conn->b->can_trim (b_conn->b, b_conn->conn); +} + +static int +next_pread (void *nxdata, void *buf, uint32_t count, uint64_t offset, + uint32_t flags) +{ + struct b_conn *b_conn = nxdata; + return b_conn->b->pread (b_conn->b, b_conn->conn, buf, count, offset, flags); +} + +static int +next_pwrite (void *nxdata, const void *buf, uint32_t count, uint64_t offset, + uint32_t flags) +{ + struct b_conn *b_conn = nxdata; + return b_conn->b->pwrite (b_conn->b, b_conn->conn, buf, count, offset, flags); +} + +static int +next_flush (void *nxdata, uint32_t flags) +{ + struct b_conn *b_conn = nxdata; + return b_conn->b->flush (b_conn->b, b_conn->conn, flags); +} + +static int +next_trim (void *nxdata, uint32_t count, uint64_t offset, uint32_t flags) +{ + struct b_conn *b_conn = nxdata; + return b_conn->b->trim (b_conn->b, b_conn->conn, count, offset, flags); +} + +static int +next_zero (void *nxdata, uint32_t count, uint64_t offset, uint32_t flags) +{ + struct b_conn *b_conn = nxdata; + return b_conn->b->zero (b_conn->b, b_conn->conn, count, offset, flags); +} + +static struct nbdkit_next next_functions = { + .get_size = next_get_size, + .can_write = next_can_write, + .can_flush = next_can_flush, + .is_rotational = next_is_rotational, + .can_trim = next_can_trim, + .pread = next_pread, + .pwrite = next_pwrite, + .flush = next_flush, + .trim = next_trim, + .zero = next_zero, +}; + +static int64_t +filter_get_size (struct backend *b, struct connection *conn) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + void *handle = connection_get_handle (conn, f->backend.i); + struct b_conn nxdata = { .b = f->backend.next, .conn = conn }; + + debug ("get_size"); + + if (f->filter.get_size) + return f->filter.get_size (&next_functions, &nxdata, handle); + else + return f->backend.next->get_size (f->backend.next, conn); +} + +static int +filter_can_write (struct backend *b, struct connection *conn) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + void *handle = connection_get_handle (conn, f->backend.i); + struct b_conn nxdata = { .b = f->backend.next, .conn = conn }; + + debug ("can_write"); + + if (f->filter.can_write) + return f->filter.can_write (&next_functions, &nxdata, handle); + else + return f->backend.next->can_write (f->backend.next, conn); +} + +static int +filter_can_flush (struct backend *b, struct connection *conn) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + void *handle = connection_get_handle (conn, f->backend.i); + struct b_conn nxdata = { .b = f->backend.next, .conn = conn }; + + debug ("can_flush"); + + if (f->filter.can_flush) + return f->filter.can_flush (&next_functions, &nxdata, handle); + else + return f->backend.next->can_flush (f->backend.next, conn); +} + +static int +filter_is_rotational (struct backend *b, struct connection *conn) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + void *handle = connection_get_handle (conn, f->backend.i); + struct b_conn nxdata = { .b = f->backend.next, .conn = conn }; + + debug ("is_rotational"); + + if (f->filter.is_rotational) + return f->filter.is_rotational (&next_functions, &nxdata, handle); + else + return f->backend.next->is_rotational (f->backend.next, conn); +} + +static int +filter_can_trim (struct backend *b, struct connection *conn) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + void *handle = connection_get_handle (conn, f->backend.i); + struct b_conn nxdata = { .b = f->backend.next, .conn = conn }; + + debug ("can_trim"); + + if (f->filter.can_trim) + return f->filter.can_trim (&next_functions, &nxdata, handle); + else + return f->backend.next->can_trim (f->backend.next, conn); +} + +static int +filter_pread (struct backend *b, struct connection *conn, + void *buf, uint32_t count, uint64_t offset, uint32_t flags) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + void *handle = connection_get_handle (conn, f->backend.i); + struct b_conn nxdata = { .b = f->backend.next, .conn = conn }; + + debug ("pread count=%" PRIu32 " offset=%" PRIu64 " flags=0x%" PRIx32, + count, offset, flags); + + if (f->filter.pread) + return f->filter.pread (&next_functions, &nxdata, handle, + buf, count, offset, flags); + else + return f->backend.next->pread (f->backend.next, conn, + buf, count, offset, flags); +} + +static int +filter_pwrite (struct backend *b, struct connection *conn, + const void *buf, uint32_t count, uint64_t offset, + uint32_t flags) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + void *handle = connection_get_handle (conn, f->backend.i); + struct b_conn nxdata = { .b = f->backend.next, .conn = conn }; + + debug ("pwrite count=%" PRIu32 " offset=%" PRIu64 " flags=0x%" PRIx32, + count, offset, flags); + + if (f->filter.pwrite) + return f->filter.pwrite (&next_functions, &nxdata, handle, + buf, count, offset, flags); + else + return f->backend.next->pwrite (f->backend.next, conn, + buf, count, offset, flags); +} + +static int +filter_flush (struct backend *b, struct connection *conn, uint32_t flags) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + void *handle = connection_get_handle (conn, f->backend.i); + struct b_conn nxdata = { .b = f->backend.next, .conn = conn }; + + debug ("flush flags=0x%" PRIx32, flags); + + if (f->filter.flush) + return f->filter.flush (&next_functions, &nxdata, handle, flags); + else + return f->backend.next->flush (f->backend.next, conn, flags); +} + +static int +filter_trim (struct backend *b, struct connection *conn, + uint32_t count, uint64_t offset, uint32_t flags) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + void *handle = connection_get_handle (conn, f->backend.i); + struct b_conn nxdata = { .b = f->backend.next, .conn = conn }; + + debug ("trim count=%" PRIu32 " offset=%" PRIu64 " flags=0x%" PRIx32, + count, offset, flags); + + if (f->filter.trim) + return f->filter.trim (&next_functions, &nxdata, handle, count, offset, + flags); + else + return f->backend.next->trim (f->backend.next, conn, count, offset, flags); +} + +static int +filter_zero (struct backend *b, struct connection *conn, + uint32_t count, uint64_t offset, uint32_t flags) +{ + struct backend_filter *f = container_of (b, struct backend_filter, backend); + void *handle = connection_get_handle (conn, f->backend.i); + struct b_conn nxdata = { .b = f->backend.next, .conn = conn }; + + debug ("zero count=%" PRIu32 " offset=%" PRIu64 " flags=0x%" PRIx32, + count, offset, flags); + + if (f->filter.zero) + return f->filter.zero (&next_functions, &nxdata, handle, + count, offset, flags); + else + return f->backend.next->zero (f->backend.next, conn, + count, offset, flags); +} + +static struct backend filter_functions = { + .free = filter_free, + .thread_model = plugin_thread_model, + .name = filter_name, + .plugin_name = plugin_name, + .usage = filter_usage, + .version = filter_version, + .dump_fields = filter_dump_fields, + .config = filter_config, + .config_complete = filter_config_complete, + .errno_is_preserved = plugin_errno_is_preserved, + .open = filter_open, + .close = filter_close, + .get_size = filter_get_size, + .can_write = filter_can_write, + .can_flush = filter_can_flush, + .is_rotational = filter_is_rotational, + .can_trim = filter_can_trim, + .pread = filter_pread, + .pwrite = filter_pwrite, + .flush = filter_flush, + .trim = filter_trim, + .zero = filter_zero, +}; + +/* Register and load a filter. */ +struct backend * +filter_register (struct backend *next, size_t index, const char *filename, + void *dl, struct nbdkit_filter *(*filter_init) (void)) +{ + struct backend_filter *f; + const struct nbdkit_filter *filter; + size_t i, len, size; + + f = calloc (1, sizeof *f); + if (f == NULL) { + out_of_memory: + perror ("strdup"); + exit (EXIT_FAILURE); + } + + f->backend = filter_functions; + f->backend.next = next; + f->backend.i = index; + f->filename = strdup (filename); + if (f->filename == NULL) goto out_of_memory; + f->dl = dl; + + debug ("registering filter %s", f->filename); + + /* Call the initialization function which returns the address of the + * filter's own 'struct nbdkit_filter'. + */ + filter = filter_init (); + if (!filter) { + fprintf (stderr, "%s: %s: filter registration function failed\n", + program_name, f->filename); + exit (EXIT_FAILURE); + } + + /* Check for incompatible future versions. */ + if (filter->_api_version != 1) { + fprintf (stderr, "%s: %s: filter is incompatible with this version of nbdkit (_api_version = %d)\n", + program_name, f->filename, filter->_api_version); + exit (EXIT_FAILURE); + } + + /* Since the filter might be much older than the current version of + * nbdkit, only copy up to the self-declared _struct_size of the + * filter and zero out the rest. If the filter is much newer then + * we'll only call the "old" fields. + */ + size = sizeof f->filter; /* our struct */ + memset (&f->filter, 0, size); + if (size > filter->_struct_size) + size = filter->_struct_size; + memcpy (&f->filter, filter, size); + + /* Only filter.name is required. */ + if (f->filter.name == NULL) { + fprintf (stderr, "%s: %s: filter must have a .name field\n", + program_name, f->filename); + exit (EXIT_FAILURE); + } + + len = strlen (f->filter.name); + if (len == 0) { + fprintf (stderr, "%s: %s: filter.name field must not be empty\n", + program_name, f->filename); + exit (EXIT_FAILURE); + } + for (i = 0; i < len; ++i) { + if (!((f->filter.name[i] >= '0' && f->filter.name[i] <= '9') || + (f->filter.name[i] >= 'a' && f->filter.name[i] <= 'z') || + (f->filter.name[i] >= 'A' && f->filter.name[i] <= 'Z'))) { + fprintf (stderr, "%s: %s: filter.name ('%s') field must contain only ASCII alphanumeric characters\n", + program_name, f->filename, f->filter.name); + exit (EXIT_FAILURE); + } + } + /* Copy the module's name into local storage, so that filter.name + * survives past unload. */ + if (!(f->filter.name = strdup (f->filter.name))) { + perror ("strdup"); + exit (EXIT_FAILURE); + } + + debug ("registered filter %s (name %s)", f->filename, f->filter.name); + + /* Call the on-load callback if it exists. */ + debug ("%s: load", f->filename); + if (f->filter.load) + f->filter.load (); + + return (struct backend *) f; +} diff --git a/src/internal.h b/src/internal.h index 28b1aaf..7fd52a2 100644 --- a/src/internal.h +++ b/src/internal.h @@ -41,6 +41,7 @@ #include <pthread.h> #include "nbdkit-plugin.h" +#include "nbdkit-filter.h" #ifdef __APPLE__ #define UNIX_PATH_MAX 104 @@ -118,6 +119,7 @@ extern volatile int quit; extern int quit_fd; extern struct backend *backend; +#define for_each_backend(b) for (b = backend; b != NULL; b = b->next) /* cleanup.c */ extern void cleanup_free (void *ptr); @@ -152,8 +154,19 @@ extern int crypto_negotiate_tls (struct connection *conn, int sockin, int sockou /* errors.c */ #define debug nbdkit_debug -/* plugins.c */ struct backend { + /* Next filter or plugin in the chain. This is always NULL for + * plugins and never NULL for filters. + */ + struct backend *next; + + /* A unique index used to fetch the handle from the connections + * object. The plugin (last in the chain) has index 0, and the + * filters have index 1, 2, ... depending how "far" they are from + * the plugin. + */ + size_t i; + void (*free) (struct backend *); int (*thread_model) (struct backend *); const char *(*name) (struct backend *); @@ -178,7 +191,11 @@ struct backend { int (*zero) (struct backend *, struct connection *conn, uint32_t count, uint64_t offset, uint32_t flags); }; -extern struct backend *plugin_register (const char *_filename, void *_dl, struct nbdkit_plugin *(*plugin_init) (void)); +/* plugins.c */ +extern struct backend *plugin_register (size_t index, const char *filename, void *dl, struct nbdkit_plugin *(*plugin_init) (void)); + +/* filters.c */ +extern struct backend *filter_register (struct backend *next, size_t index, const char *filename, void *dl, struct nbdkit_filter *(*filter_init) (void)); /* locks.c */ extern void lock_connection (void); diff --git a/src/main.c b/src/main.c index 4790c46..38691c9 100644 --- a/src/main.c +++ b/src/main.c @@ -64,7 +64,8 @@ static int is_short_name (const char *); static char *make_random_fifo (void); -static struct backend *open_plugin_so (const char *filename, int short_name); +static struct backend *open_plugin_so (size_t i, const char *filename, int short_name); +static struct backend *open_filter_so (struct backend *next, size_t i, const char *filename, int short_name); static void start_serving (void); static void set_up_signals (void); static void run_command (void); @@ -120,6 +121,7 @@ static const struct option long_options[] = { { "export", 1, NULL, 'e' }, { "export-name",1, NULL, 'e' }, { "exportname", 1, NULL, 'e' }, + { "filter", 1, NULL, 0 }, { "foreground", 0, NULL, 'f' }, { "no-fork", 0, NULL, 'f' }, { "group", 1, NULL, 'g' }, @@ -154,7 +156,7 @@ usage (void) { printf ("nbdkit [--dump-config] [--dump-plugin]\n" " [-e EXPORTNAME] [--exit-with-parent] [-f]\n" - " [-g GROUP] [-i IPADDR]\n" + " [--filter=FILTER ...] [-g GROUP] [-i IPADDR]\n" " [--newstyle] [--oldstyle] [-P PIDFILE] [-p PORT] [-r]\n" " [--run CMD] [-s] [--selinux-label LABEL] [-t THREADS]\n" " [--tls=off|on|require] [--tls-certificates /path/to/certificates]\n" @@ -206,6 +208,11 @@ main (int argc, char *argv[]) int short_name; const char *filename; char *p; + static struct filter_filename { + struct filter_filename *next; + const char *filename; + } *filter_filenames = NULL; + size_t i; threadlocal_init (); @@ -245,6 +252,18 @@ main (int argc, char *argv[]) exit (EXIT_FAILURE); #endif } + else if (strcmp (long_options[option_index].name, "filter") == 0) { + struct filter_filename *t; + + t = malloc (sizeof *t); + if (t == NULL) { + perror ("malloc"); + exit (EXIT_FAILURE); + } + t->next = filter_filenames; + t->filename = optarg; + filter_filenames = t; + } else if (strcmp (long_options[option_index].name, "run") == 0) { if (socket_activation) { fprintf (stderr, "%s: cannot use socket activation with --run flag\n", @@ -497,23 +516,46 @@ main (int argc, char *argv[]) } } - backend = open_plugin_so (filename, short_name); + /* Open the plugin (first) and then wrap the plugin with the + * filters. The filters are wrapped in reverse order that they + * appear on the command line so that in the end ‘backend’ points to + * the first filter on the command line. + */ + backend = open_plugin_so (0, filename, short_name); + i = 1; + while (filter_filenames) { + struct filter_filename *t = filter_filenames; + const char *filename = t->filename; + int short_name = is_short_name (filename); + + backend = open_filter_so (backend, i++, filename, short_name); + + filter_filenames = t->next; + free (t); + } if (help) { + struct backend *b; + usage (); - printf ("\n%s:\n\n", filename); - backend->usage (backend); + for_each_backend (b) { + printf ("\n"); + b->usage (b); + } exit (EXIT_SUCCESS); } if (version) { const char *v; + struct backend *b; display_version (); - printf ("%s", backend->name (backend)); - if ((v = backend->version (backend)) != NULL) - printf (" %s", v); - printf ("\n"); + for_each_backend (b) { + printf ("%s", b->name (b)); + if ((v = b->version (b)) != NULL) + printf (" %s", v); + printf ("\n"); + } exit (EXIT_SUCCESS); } @@ -575,7 +617,7 @@ main (int argc, char *argv[]) exit (EXIT_SUCCESS); } -/* Is it a name relative to the plugindir? */ +/* Is it a plugin or filter name relative to the plugindir/filterdir? */ static int is_short_name (const char *filename) { @@ -615,7 +657,7 @@ make_random_fifo (void) } static struct backend * -open_plugin_so (const char *name, int short_name) +open_plugin_so (size_t i, const char *name, int short_name) { struct backend *ret; char *filename = (char *) name; @@ -653,7 +695,55 @@ open_plugin_so (const char *name, int short_name) } /* Register the plugin. */ - ret = plugin_register (filename, dl, plugin_init); + ret = plugin_register (i, filename, dl, plugin_init); + + if (free_filename) + free (filename); + + return ret; +} + +static struct backend * +open_filter_so (struct backend *next, size_t i, + const char *name, int short_name) +{ + struct backend *ret; + char *filename = (char *) name; + int free_filename = 0; + void *dl; + struct nbdkit_filter *(*filter_init) (void); + char *error; + + if (short_name) { + /* Short names are rewritten relative to the filterdir. */ + if (asprintf (&filename, + "%s/nbdkit-%s-filter.so", filterdir, name) == -1) { + perror ("asprintf"); + exit (EXIT_FAILURE); + } + free_filename = 1; + } + + dl = dlopen (filename, RTLD_NOW|RTLD_GLOBAL); + if (dl == NULL) { + fprintf (stderr, "%s: %s: %s\n", program_name, filename, dlerror ()); + exit (EXIT_FAILURE); + } + + /* Initialize the filter. See dlopen(3) to understand C weirdness. */ + dlerror (); + *(void **) (&filter_init) = dlsym (dl, "filter_init"); + if ((error = dlerror ()) != NULL) { + fprintf (stderr, "%s: %s: %s\n", program_name, name, error); + exit (EXIT_FAILURE); + } + if (!filter_init) { + fprintf (stderr, "%s: %s: invalid filter_init\n", program_name, name); + exit (EXIT_FAILURE); + } + + /* Register the filter. */ + ret = filter_register (next, i, filename, dl, filter_init); if (free_filename) free (filename); diff --git a/src/plugins.c b/src/plugins.c index 137bae3..1de2ba2 100644 --- a/src/plugins.c +++ b/src/plugins.c @@ -102,10 +102,11 @@ plugin_usage (struct backend *b) { struct backend_plugin *p = container_of (b, struct backend_plugin, backend); - printf ("%s", p->plugin.name); + printf ("plugin: %s", p->plugin.name); if (p->plugin.longname) printf (" (%s)", p->plugin.longname); printf ("\n"); + printf ("(%s)", p->filename); if (p->plugin.description) { printf ("\n"); printf ("%s\n", p->plugin.description); @@ -518,7 +519,7 @@ static struct backend plugin_functions = { /* Register and load a plugin. */ struct backend * -plugin_register (const char *filename, +plugin_register (size_t index, const char *filename, void *dl, struct nbdkit_plugin *(*plugin_init) (void)) { struct backend_plugin *p; @@ -533,11 +534,13 @@ plugin_register (const char *filename, } p->backend = plugin_functions; + p->backend.next = NULL; + p->backend.i = index; p->filename = strdup (filename); if (p->filename == NULL) goto out_of_memory; p->dl = dl; - debug ("registering %s", p->filename); + debug ("registering plugin %s", p->filename); /* Call the initialization function which returns the address of the * plugin's own 'struct nbdkit_plugin'. @@ -613,7 +616,7 @@ plugin_register (const char *filename, exit (EXIT_FAILURE); } - debug ("registered %s (name %s)", p->filename, p->plugin.name); + debug ("registered plugin %s (name %s)", p->filename, p->plugin.name); /* Call the on-load callback if it exists. */ debug ("%s: load", p->filename); -- 2.14.3
Eric Blake
2018-Jan-19 13:40 UTC
[Libguestfs] [nbdkit PATCH v2 11/13] filters: Add nbdkit-offset-filter.
From: "Richard W.M. Jones" <rjones@redhat.com> This very basic filter allows you to select an offset and range within a plugin, for example: nbdkit --filter=offset file file=foo offset=1M range=100M which serves the byte range [ 1M .. 101M-1 ] from file ‘foo’. Message-Id: <20180117205356.8699-9-rjones@redhat.com> [eblake: adjust for FUA flags] Signed-off-by: Eric Blake <eblake@redhat.com> --- TODO | 2 - configure.ac | 1 + filters/Makefile.am | 3 +- filters/offset/Makefile.am | 62 +++++++++++++ filters/offset/nbdkit-offset-filter.pod | 99 +++++++++++++++++++++ filters/offset/offset.c | 148 ++++++++++++++++++++++++++++++++ 6 files changed, 312 insertions(+), 3 deletions(-) create mode 100644 filters/offset/Makefile.am create mode 100644 filters/offset/nbdkit-offset-filter.pod create mode 100644 filters/offset/offset.c diff --git a/TODO b/TODO index 0955db7..8eda0d7 100644 --- a/TODO +++ b/TODO @@ -44,8 +44,6 @@ Suggestions for filters * copy-on-write, a popular feature in other servers -* export a subset using offset/size - * export a single partition (like qemu-nbd -P) Composing nbdkit diff --git a/configure.ac b/configure.ac index 7032614..4892dc4 100644 --- a/configure.ac +++ b/configure.ac @@ -513,6 +513,7 @@ AC_CONFIG_FILES([Makefile plugins/vddk/Makefile plugins/xz/Makefile filters/Makefile + filters/offset/Makefile src/Makefile src/nbdkit.pc tests/Makefile]) diff --git a/filters/Makefile.am b/filters/Makefile.am index ed1580b..91fbe6c 100644 --- a/filters/Makefile.am +++ b/filters/Makefile.am @@ -30,4 +30,5 @@ # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. -#SUBDIRS +SUBDIRS = \ + offset diff --git a/filters/offset/Makefile.am b/filters/offset/Makefile.am new file mode 100644 index 0000000..f6e253c --- /dev/null +++ b/filters/offset/Makefile.am @@ -0,0 +1,62 @@ +# nbdkit +# Copyright (C) 2018 Red Hat Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of Red Hat nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +EXTRA_DIST = nbdkit-offset-filter.pod + +CLEANFILES = *~ + +filterdir = $(libdir)/nbdkit/filters + +filter_LTLIBRARIES = nbdkit-offset-filter.la + +nbdkit_offset_filter_la_SOURCES = \ + offset.c \ + $(top_srcdir)/include/nbdkit-filter.h + +nbdkit_offset_filter_la_CPPFLAGS = \ + -I$(top_srcdir)/include +nbdkit_offset_filter_la_CFLAGS = \ + $(WARNINGS_CFLAGS) +nbdkit_offset_filter_la_LDFLAGS = \ + -module -avoid-version -shared + +if HAVE_POD2MAN + +man_MANS = nbdkit-offset-filter.1 +CLEANFILES += $(man_MANS) + +nbdkit-offset-filter.1: nbdkit-offset-filter.pod + $(POD2MAN) $(POD2MAN_ARGS) --section=1 --name=`basename $@ .1` $< $@.t && \ + if grep 'POD ERROR' $@.t; then rm $@.t; exit 1; fi && \ + mv $@.t $@ + +endif diff --git a/filters/offset/nbdkit-offset-filter.pod b/filters/offset/nbdkit-offset-filter.pod new file mode 100644 index 0000000..c7f7bdf --- /dev/null +++ b/filters/offset/nbdkit-offset-filter.pod @@ -0,0 +1,99 @@ +=encoding utf8 + +=head1 NAME + +nbdkit-offset-filter - nbdkit offset filter + +=head1 SYNOPSIS + + nbdkit --filter=offset plugin offset=OFFSET range=LENGTH [plugin-args...] + +=head1 DESCRIPTION + +C<nbdkit-offset-filter> is a filter that limits requests to the byte +range C<[offset .. range-1]> within another plugin. + +=head1 PARAMETERS + +=over 4 + +=item B<offset=OFFSET> + +The start offset. + +This parameter is required. + +=item B<range=LENGTH> + +The length of data to serve. + +This is optional. If not given then the range is served starting from +the offset through to the end of the underlying file/device. + +=back + +Note it is an error if the range parameter is supplied and +C<offset+range> is larger than the size of data served by the +underlying plugin. + +=head1 EXAMPLE + +Using L<nbdkit-file-plugin(1)>, serve the C<100M> length range +starting from C<1M> through to S<C<101M - 1 byte>> from the file +C<disk.img>: + + nbdkit --filter=offset file file=disk.img offset=1M range=100M + +=head1 SEE ALSO + +L<nbdkit(1)>, +L<nbdkit-file-plugin(1)>, +L<nbdkit-filter(3)>. + +=head1 AUTHORS + +Richard W.M. Jones + +=head1 COPYRIGHT + +Copyright (C) 2018 Red Hat Inc. + +=head1 LICENSE + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +=over 4 + +=item * + +Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +=item * + +Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +=item * + +Neither the name of Red Hat nor the names of its contributors may be +used to endorse or promote products derived from this software without +specific prior written permission. + +=back + +THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. diff --git a/filters/offset/offset.c b/filters/offset/offset.c new file mode 100644 index 0000000..e781938 --- /dev/null +++ b/filters/offset/offset.c @@ -0,0 +1,148 @@ +/* nbdkit + * Copyright (C) 2018 Red Hat Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Red Hat nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> + +#include <nbdkit-filter.h> + +static int64_t offset = -1, range = -1; + +/* Called for each key=value passed on the command line. */ +static int +offset_config (nbdkit_next_config *next, void *nxdata, + const char *key, const char *value) +{ + if (strcmp (key, "offset") == 0) { + offset = nbdkit_parse_size (value); + if (offset == -1) + return -1; + return 0; + } + else if (strcmp (key, "range") == 0) { + range = nbdkit_parse_size (value); + if (range == -1) + return -1; + return 0; + } + else + return next (nxdata, key, value); +} + +/* Check the user did pass both parameters. */ +static int +offset_config_complete (nbdkit_next_config_complete *next, void *nxdata) +{ + if (offset == -1) { + nbdkit_error ("you must supply the offset parameter on the command line"); + return -1; + } + + return next (nxdata); +} + +#define offset_config_help \ + "offset=<OFFSET> (required) The start offset to serve.\n" \ + "range=<LENGTH> The total size to serve." + +/* Get the file size. */ +static int64_t +offset_get_size (struct nbdkit_next *next, void *nxdata, + void *handle) +{ + int64_t real_size = next->get_size (nxdata); + + if (range >= 0) { + if (offset + range > real_size) { + nbdkit_error ("offset + range is larger than the real size of the underlying file or device"); + return -1; + } + return range; + } + else + return real_size - offset; +} + +/* Read data. */ +static int +offset_pread (struct nbdkit_next *next, void *nxdata, + void *handle, void *buf, uint32_t count, uint64_t offs, + uint32_t flags) +{ + return next->pread (nxdata, buf, count, offs + offset, flags); +} + +/* Write data. */ +static int +offset_pwrite (struct nbdkit_next *next, void *nxdata, + void *handle, + const void *buf, uint32_t count, uint64_t offs, uint32_t flags) +{ + return next->pwrite (nxdata, buf, count, offs + offset, flags); +} + +/* Trim data. */ +static int +offset_trim (struct nbdkit_next *next, void *nxdata, + void *handle, uint32_t count, uint64_t offs, uint32_t flags) +{ + return next->trim (nxdata, count, offs + offset, flags); +} + +/* Zero data. */ +static int +offset_zero (struct nbdkit_next *next, void *nxdata, + void *handle, uint32_t count, uint64_t offs, uint32_t flags) +{ + return next->zero (nxdata, count, offs + offset, flags); +} + +static struct nbdkit_filter filter = { + .name = "offset", + .longname = "nbdkit offset filter", + .version = PACKAGE_VERSION, + .config = offset_config, + .config_complete = offset_config_complete, + .config_help = offset_config_help, + .get_size = offset_get_size, + .pread = offset_pread, + .pwrite = offset_pwrite, + .trim = offset_trim, + .zero = offset_zero, +}; + +NBDKIT_REGISTER_FILTER(filter) -- 2.14.3
Eric Blake
2018-Jan-19 13:40 UTC
[Libguestfs] [nbdkit PATCH v2 12/13] filters: Move rdelay/wdelay from file plugin to new delay filter.
From: "Richard W.M. Jones" <rjones@redhat.com> Previously the file plugin supported ‘rdelay’ and ‘wdelay’ parameters for injecting delays (for testing) into read and write requests. This moves the functionality to a new delay filter so that it can be used with any plugin. Message-Id: <20180117205356.8699-10-rjones@redhat.com> [eblake: adjust for FUA flags] Signed-off-by: Eric Blake <eblake@redhat.com> --- TODO | 3 - configure.ac | 1 + filters/Makefile.am | 1 + filters/delay/Makefile.am | 62 +++++++++++++ filters/delay/delay.c | 162 ++++++++++++++++++++++++++++++++++ filters/delay/nbdkit-delay-filter.pod | 88 ++++++++++++++++++ plugins/file/file.c | 76 ++-------------- plugins/file/nbdkit-file-plugin.pod | 14 +-- tests/test-parallel-file.sh | 4 +- tests/test-parallel-nbd.sh | 1 + 10 files changed, 325 insertions(+), 87 deletions(-) create mode 100644 filters/delay/Makefile.am create mode 100644 filters/delay/delay.c create mode 100644 filters/delay/nbdkit-delay-filter.pod diff --git a/TODO b/TODO index 8eda0d7..3ec45fd 100644 --- a/TODO +++ b/TODO @@ -37,9 +37,6 @@ directed to qemu-nbd for these use cases. Suggestions for filters ----------------------- -* adding artificial delays (see wdelay/rdelay options in the file - plugin) - * injecting artificial errors for testing clients * copy-on-write, a popular feature in other servers diff --git a/configure.ac b/configure.ac index 4892dc4..9376a2e 100644 --- a/configure.ac +++ b/configure.ac @@ -513,6 +513,7 @@ AC_CONFIG_FILES([Makefile plugins/vddk/Makefile plugins/xz/Makefile filters/Makefile + filters/delay/Makefile filters/offset/Makefile src/Makefile src/nbdkit.pc diff --git a/filters/Makefile.am b/filters/Makefile.am index 91fbe6c..d4aa6c0 100644 --- a/filters/Makefile.am +++ b/filters/Makefile.am @@ -31,4 +31,5 @@ # SUCH DAMAGE. SUBDIRS = \ + delay \ offset diff --git a/filters/delay/Makefile.am b/filters/delay/Makefile.am new file mode 100644 index 0000000..5b20b69 --- /dev/null +++ b/filters/delay/Makefile.am @@ -0,0 +1,62 @@ +# nbdkit +# Copyright (C) 2018 Red Hat Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of Red Hat nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +EXTRA_DIST = nbdkit-delay-filter.pod + +CLEANFILES = *~ + +filterdir = $(libdir)/nbdkit/filters + +filter_LTLIBRARIES = nbdkit-delay-filter.la + +nbdkit_delay_filter_la_SOURCES = \ + delay.c \ + $(top_srcdir)/include/nbdkit-filter.h + +nbdkit_delay_filter_la_CPPFLAGS = \ + -I$(top_srcdir)/include +nbdkit_delay_filter_la_CFLAGS = \ + $(WARNINGS_CFLAGS) +nbdkit_delay_filter_la_LDFLAGS = \ + -module -avoid-version -shared + +if HAVE_POD2MAN + +man_MANS = nbdkit-delay-filter.1 +CLEANFILES += $(man_MANS) + +nbdkit-delay-filter.1: nbdkit-delay-filter.pod + $(POD2MAN) $(POD2MAN_ARGS) --section=1 --name=`basename $@ .1` $< $@.t && \ + if grep 'POD ERROR' $@.t; then rm $@.t; exit 1; fi && \ + mv $@.t $@ + +endif diff --git a/filters/delay/delay.c b/filters/delay/delay.c new file mode 100644 index 0000000..6049a20 --- /dev/null +++ b/filters/delay/delay.c @@ -0,0 +1,162 @@ +/* nbdkit + * Copyright (C) 2018 Red Hat Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Red Hat nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <time.h> + +#include <nbdkit-filter.h> + +static int rdelayms = 0; /* read delay (milliseconds) */ +static int wdelayms = 0; /* write delay (milliseconds) */ + +static int +parse_delay (const char *value) +{ + size_t len = strlen (value); + int r; + + if (len > 2 && strcmp (&value[len-2], "ms") == 0) { + if (sscanf (value, "%d", &r) == 1) + return r; + else { + nbdkit_error ("cannot parse rdelay/wdelay milliseconds parameter: %s", + value); + return -1; + } + } + else { + if (sscanf (value, "%d", &r) == 1) + return r * 1000; + else { + nbdkit_error ("cannot parse rdelay/wdelay seconds parameter: %s", + value); + return -1; + } + } +} + +static void +delay (int ms) +{ + if (ms > 0) { + const struct timespec ts = { + .tv_sec = ms / 1000, + .tv_nsec = (ms * 1000000) % 1000000000 + }; + nanosleep (&ts, NULL); + } +} + +static void +read_delay (void) +{ + delay (rdelayms); +} + +static void +write_delay (void) +{ + delay (wdelayms); +} + +/* Called for each key=value passed on the command line. */ +static int +delay_config (nbdkit_next_config *next, void *nxdata, + const char *key, const char *value) +{ + if (strcmp (key, "rdelay") == 0) { + rdelayms = parse_delay (value); + if (rdelayms == -1) + return -1; + return 0; + } + else if (strcmp (key, "wdelay") == 0) { + wdelayms = parse_delay (value); + if (wdelayms == -1) + return -1; + return 0; + } + else + return next (nxdata, key, value); +} + +#define delay_config_help \ + "rdelay=<NN>[ms] Read delay in seconds/milliseconds.\n" \ + "wdelay=<NN>[ms] Write delay in seconds/milliseconds." \ + +/* Read data. */ +static int +delay_pread (struct nbdkit_next *next, void *nxdata, + void *handle, void *buf, uint32_t count, uint64_t offset, + uint32_t flags) +{ + read_delay (); + return next->pread (nxdata, buf, count, offset, flags); +} + +/* Write data. */ +static int +delay_pwrite (struct nbdkit_next *next, void *nxdata, + void *handle, + const void *buf, uint32_t count, uint64_t offset, uint32_t flags) +{ + write_delay (); + return next->pwrite (nxdata, buf, count, offset, flags); +} + +/* Zero data. */ +static int +delay_zero (struct nbdkit_next *next, void *nxdata, + void *handle, uint32_t count, uint64_t offset, uint32_t flags) +{ + write_delay (); + return next->zero (nxdata, count, offset, flags); +} + +static struct nbdkit_filter filter = { + .name = "delay", + .longname = "nbdkit delay filter", + .version = PACKAGE_VERSION, + .config = delay_config, + .config_help = delay_config_help, + .pread = delay_pread, + .pwrite = delay_pwrite, + .zero = delay_zero, +}; + +NBDKIT_REGISTER_FILTER(filter) diff --git a/filters/delay/nbdkit-delay-filter.pod b/filters/delay/nbdkit-delay-filter.pod new file mode 100644 index 0000000..10aba94 --- /dev/null +++ b/filters/delay/nbdkit-delay-filter.pod @@ -0,0 +1,88 @@ +=encoding utf8 + +=head1 NAME + +nbdkit-delay-filter - nbdkit delay filter + +=head1 SYNOPSIS + + nbdkit --filter=delay plugin rdelay=SECS wdelay=SECS [plugin-args...] + + nbdkit --filter=delay plugin rdelay=<NN>ms wdelay=<NN>ms [plugin-args...] + +=head1 DESCRIPTION + +C<nbdkit-delay-filter> is a filter that delays read and write requests +by some seconds or milliseconds. This is used to simulate a slow or +remote server, or to test certain kinds of race conditions in Linux. + +=head1 PARAMETERS + +=over 4 + +=item B<rdelay=SECS> + +=item B<rdelay=E<lt>NNE<gt>ms> + +The optional read delay in seconds or milliseconds. + +=item B<wdelay=SECS> + +=item B<wdelay=E<lt>NNE<gt>ms> + +The optional write delay in seconds or milliseconds. + +=back + +=head1 SEE ALSO + +L<nbdkit(1)>, +L<nbdkit-filter(3)>. + +=head1 AUTHORS + +Richard W.M. Jones + +=head1 COPYRIGHT + +Copyright (C) 2018 Red Hat Inc. + +=head1 LICENSE + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +=over 4 + +=item * + +Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +=item * + +Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +=item * + +Neither the name of Red Hat nor the names of its contributors may be +used to endorse or promote products derived from this software without +specific prior written permission. + +=back + +THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. diff --git a/plugins/file/file.c b/plugins/file/file.c index 4a91251..1fe4191 100644 --- a/plugins/file/file.c +++ b/plugins/file/file.c @@ -40,7 +40,6 @@ #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> -#include <time.h> #include <errno.h> #include <nbdkit-plugin.h> @@ -50,8 +49,6 @@ #endif static char *filename = NULL; -static int rdelayms = 0; /* read delay (milliseconds) */ -static int wdelayms = 0; /* write delay (milliseconds) */ static void file_unload (void) @@ -59,56 +56,6 @@ file_unload (void) free (filename); } -static int -parse_delay (const char *value) -{ - size_t len = strlen (value); - int r; - - if (len > 2 && strcmp (&value[len-2], "ms") == 0) { - if (sscanf (value, "%d", &r) == 1) - return r; - else { - nbdkit_error ("cannot parse rdelay/wdelay milliseconds parameter: %s", - value); - return -1; - } - } - else { - if (sscanf (value, "%d", &r) == 1) - return r * 1000; - else { - nbdkit_error ("cannot parse rdelay/wdelay seconds parameter: %s", - value); - return -1; - } - } -} - -static void -delay (int ms) -{ - if (ms > 0) { - const struct timespec ts = { - .tv_sec = ms / 1000, - .tv_nsec = (ms * 1000000) % 1000000000 - }; - nanosleep (&ts, NULL); - } -} - -static void -read_delay (void) -{ - delay (rdelayms); -} - -static void -write_delay (void) -{ - delay (wdelayms); -} - /* Called for each key=value passed on the command line. This plugin * only accepts file=<filename>, which is required. */ @@ -122,15 +69,10 @@ file_config (const char *key, const char *value) if (!filename) return -1; } - else if (strcmp (key, "rdelay") == 0) { - rdelayms = parse_delay (value); - if (rdelayms == -1) - return -1; - } - else if (strcmp (key, "wdelay") == 0) { - wdelayms = parse_delay (value); - if (wdelayms == -1) - return -1; + else if (strcmp (key, "rdelay") == 0 || + strcmp (key, "wdelay") == 0) { + nbdkit_error ("add --filter=delay on the command line"); + return -1; } else { nbdkit_error ("unknown parameter '%s'", key); @@ -157,9 +99,7 @@ file_config_complete (void) } #define file_config_help \ - "file=<FILENAME> (required) The filename to serve.\n" \ - "rdelay=<NN>[ms] Read delay in seconds/milliseconds.\n" \ - "wdelay=<NN>[ms] Write delay in seconds/milliseconds." \ + "file=<FILENAME> (required) The filename to serve." \ /* The per-connection handle. */ struct handle { @@ -241,8 +181,6 @@ file_pread (void *handle, void *buf, uint32_t count, uint64_t offset) { struct handle *h = handle; - read_delay (); - while (count > 0) { ssize_t r = pread (h->fd, buf, count, offset); if (r == -1) { @@ -267,8 +205,6 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) { struct handle *h = handle; - write_delay (); - while (count > 0) { ssize_t r = pwrite (h->fd, buf, count, offset); if (r == -1) { @@ -292,8 +228,6 @@ file_zero (void *handle, uint32_t count, uint64_t offset, int may_trim) #endif int r = -1; - write_delay (); - #ifdef FALLOC_FL_PUNCH_HOLE if (may_trim) { r = fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, diff --git a/plugins/file/nbdkit-file-plugin.pod b/plugins/file/nbdkit-file-plugin.pod index a2d1e6a..016513a 100644 --- a/plugins/file/nbdkit-file-plugin.pod +++ b/plugins/file/nbdkit-file-plugin.pod @@ -31,21 +31,13 @@ This parameter is required. =item B<rdelay=E<lt>NNE<gt>ms> -Delay reads for C<SECS> seconds or C<NN> milliseconds. -This is used to simulate a slow or remote server, or to -test certain kinds of race conditions in Linux. - -The default is no delay. - =item B<wdelay=SECS> =item B<wdelay=E<lt>NNE<gt>ms> -Delay writes for C<SECS> seconds or C<NN> milliseconds. -This is used to simulate a slow or remote server, or to -test certain kinds of race conditions in Linux. - -The default is no delay. +These plugin parameters have been moved to the +L<nbdkit-delay-filter(1)> filter. Modify the command line to add +I<--filter=delay> in order to use these parameters. =back diff --git a/tests/test-parallel-file.sh b/tests/test-parallel-file.sh index 79a60ac..b9e5f40 100755 --- a/tests/test-parallel-file.sh +++ b/tests/test-parallel-file.sh @@ -49,7 +49,7 @@ $QEMU_IO -f raw -c "aio_write -P 2 1 1" -c "aio_read -P 1 0 1" -c aio_flush \ trap 'rm -f test-parallel-file.out' 0 1 2 3 15 # With --threads=1, the write should complete first because it was issued first -nbdkit -v -t 1 -U - file file=file-data wdelay=2 rdelay=1 --run ' +nbdkit -v -t 1 -U - --filter delay file file=file-data wdelay=2 rdelay=1 --run ' $QEMU_IO -f raw -c "aio_write -P 2 1 1" -c "aio_read -P 1 0 1" -c aio_flush $nbd ' | tee test-parallel-file.out if test "$(grep '1/1' test-parallel-file.out)" != \ @@ -59,7 +59,7 @@ read 1/1 bytes at offset 0"; then fi # With default --threads, the faster read should complete first -nbdkit -v -U - file file=file-data wdelay=2 rdelay=1 --run ' +nbdkit -v -U - --filter delay file file=file-data wdelay=2 rdelay=1 --run ' $QEMU_IO -f raw -c "aio_write -P 2 1 1" -c "aio_read -P 1 0 1" -c aio_flush $nbd ' | tee test-parallel-file.out if test "$(grep '1/1' test-parallel-file.out)" != \ diff --git a/tests/test-parallel-nbd.sh b/tests/test-parallel-nbd.sh index f8e5071..d87573d 100755 --- a/tests/test-parallel-nbd.sh +++ b/tests/test-parallel-nbd.sh @@ -54,6 +54,7 @@ trap 'rm -f test-parallel-nbd.out test-parallel-nbd.sock' 0 1 2 3 15 ( rm -f test-parallel-nbd.sock nbdkit --exit-with-parent -v -U test-parallel-nbd.sock \ + --filter delay \ file file=file-data wdelay=2 rdelay=1 & # With --threads=1, the write should complete first because it was issued first -- 2.14.3
Eric Blake
2018-Jan-19 13:40 UTC
[Libguestfs] [nbdkit PATCH v2 13/13] RFC: plugins: Add callbacks for FUA semantics
[still a work in progress, as I finish rebasing to capture the ideas raised on the list, but posting now for initial feedback] The NBD protocol supports Forced Unit Access (FUA) as a more efficient way to wait for just one write to land in persistent storage, rather than all outstanding writes at the time of a flush; modeled after the kernel's block I/O flag of the same name. While we can emulate the proper semantics with a full-blown flush, there are some plugins that can properly pass the FUA flag on to the end storage and thereby avoid some overhead. This patch introduces new callbacks and documentations for those callbacks, although the actual implementation to take advantage of the new callbacks will be in later patches. The biggest thing to note is that we now support 2 API versions for the plugin, where the plugin author chooses whether to keep version 1 (default, no FUA support) or opt in to version 2 (FUA support). Signed-off-by: Eric Blake <eblake@redhat.com> --- docs/nbdkit-plugin.pod | 89 +++++++++++++++++++++++++++++++++++++++++++++++-- docs/nbdkit.pod | 7 +++- include/nbdkit-plugin.h | 31 ++++++++++++++++- src/internal.h | 4 +-- src/plugins.c | 2 +- 5 files changed, 125 insertions(+), 8 deletions(-) diff --git a/docs/nbdkit-plugin.pod b/docs/nbdkit-plugin.pod index 3cafc42..d982e65 100644 --- a/docs/nbdkit-plugin.pod +++ b/docs/nbdkit-plugin.pod @@ -6,6 +6,8 @@ nbdkit-plugin - How to write nbdkit plugins =head1 SYNOPSIS + #define NBDKIT_API_VERSION 2 + #include <nbdkit-plugin.h> #define THREAD_MODEL NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS @@ -51,9 +53,21 @@ L<nbdkit-perl-plugin(3)>, L<nbdkit-python-plugin(3)>, L<nbdkit-ruby-plugin(3)>. +=head1 C<#define NBDKIT_API_VERSION> + +Plugins must choose which API version they want to use. The default +version is 1; but if a plugin defines NBDKIT_API_VERSION to a positive +integer prior to including C<nbdkit-plugin.h>, the signature of +several callbacks is enhanced. A newer nbdkit will always support +plugins compiled against an older API version, but plugins that opt in +to newer versions require a new enough nbdkit. For now, the maximum +version is 2, which enables fine-tuned response to client flags +including efficient Forced Unit Access (FUA) on writes. + =head1 C<nbdkit-plugin.h> -All plugins should start by including this header file: +All plugins should start by including this header file, after +optionally choosing an API version: #include <nbdkit-plugin.h> @@ -400,7 +414,28 @@ If there is an error, C<.can_trim> should call C<nbdkit_error> with an error message and return C<-1>. This callback is not required. If omitted, then we return true iff a -C<.trim> callback has been defined. +C<.trim> or C<.trim_fua> callback has been defined. + +=head2 C<.can_fua> + + int can_fua (void *handle); + +This is called during the option negotiation phase to find out if the +plugin supports the Forced Unit Access (FUA) flag on write and trim +requests. + +If there is an error, C<.can_fua> should call C<nbdkit_error> with an +error message and return C<-1>. + +This callback is not required. If omitted, then we return true iff +either the C<.pwrite_fua> callback has been defined, or if C<.can_flush> +returns true (in the latter case, FUA semantics are emulated by nbdkit +calling C<.flush> before completing any write or trim operation with +the FUA flag set). + +Note that if this defaults to true and C<.can_trim> also returns true, +the plugin must provide either C<.flush> or C<.trim_fua> for correct +FUA semantics. =head2 C<.pread> @@ -442,6 +477,21 @@ recovered from), C<.pwrite> should call C<nbdkit_error> with an error message, and C<nbdkit_set_error> to record an appropriate error (unless C<errno> is sufficient), then return C<-1>. +If the plugin can provide efficient Forced Unit Access (FUA) semantics, +it should define C<.pwrite_fua> instead. + +=head2 C<.pwrite_fua> + + int pwrite_fua (void *handle, const void *buf, uint32_t count, uint64_t offset, int fua); + +This callback has the same requirements as C<.pwrite>, with the +additional parameter C<fua> set to a non-zero value if the client +wants FUA semantics (where the command must not return until the +actions of the write have landed in persistent storage). If the +plugin cannot provide efficient FUA, but C<.can_flush> returns true +and C<.can_fua> does not return false, then client requests for FUA +semantics are emulated by nbdkit calling C<.flush>. + =head2 C<.flush> int flush (void *handle); @@ -455,6 +505,11 @@ If there is an error, C<.flush> should call C<nbdkit_error> with an error message, and C<nbdkit_set_error> to record an appropriate error (unless C<errno> is sufficient), then return C<-1>. +Note that C<.flush> can be called both by the client doing an explicit +flush request, and by nbdkit when emulating Forced Unit Access (FUA) +semantics after a write or trim where the plugin did not provide FUA +callbacks (C<.pwrite_fua>, C<.zero_fua>, and C<.trim_fua>). + =head2 C<.trim> int trim (void *handle, uint32_t count, uint64_t offset); @@ -467,6 +522,21 @@ If there is an error, C<.trim> should call C<nbdkit_error> with an error message, and C<nbdkit_set_error> to record an appropriate error (unless C<errno> is sufficient), then return C<-1>. +If the plugin can provide efficient Forced Unit Access (FUA) semantics, +it should define C<.trim_fua> instead. + +=head2 C<.trim_fua> + + int trim_fua (void *handle, uint32_t count, uint64_t offset, int fua); + +This callback has the same requirements as C<.trim>, with the +additional parameter C<fua> set to a non-zero value if the client +wants FUA semantics (where the command must not return until the +actions of the trim have landed in persistent storage). If the plugin +cannot provide efficient FUA, but C<.can_flush> returns true and +C<.can_fua> does not return false, then client requests for FUA +semantics are emulated by nbdkit calling C<.flush>. + =head2 C<.zero> int zero (void *handle, uint32_t count, uint64_t offset, int may_trim); @@ -488,6 +558,21 @@ If there is an error, C<.zero> should call C<nbdkit_error> with an error message, and C<nbdkit_set_error> to record an appropriate error (unless C<errno> is sufficient), then return C<-1>. +If the plugin can provide efficient Forced Unit Access (FUA) semantics, +it should define C<.zero_fua> instead. + +=head2 C<.zero_fua> + + int zero_fua (void *handle, uint32_t count, uint64_t offset, int may_trim, int fua); + +This callback has the same requirements as C<.zero>, with the +additional parameter C<fua> set to a non-zero value if the client +wants FUA semantics (where the command must not return until the +actions of the write have landed in persistent storage). If the +plugin cannot provide efficient FUA, but C<.can_flush> returns true +and C<.can_fua> does not return false, then client requests for FUA +semantics are emulated by nbdkit calling C<.flush>. + =head1 THREADS Each nbdkit plugin must declare its thread safety model by defining diff --git a/docs/nbdkit.pod b/docs/nbdkit.pod index 636eedc..eaa638b 100644 --- a/docs/nbdkit.pod +++ b/docs/nbdkit.pod @@ -804,7 +804,12 @@ information about that plugin, eg: [etc] Plugins which ship with nbdkit usually have the same version as the -corresponding nbdkit binary. +corresponding nbdkit binary. The nbdkit binary will always be able +to utilize plugins compiled against an older version of the header; +however, there are cases where a newer plugin may not be fully +supported by an older nbdkit binary (for example, a plugin that +supplies C<.pwrite_fua> but not C<.pwrite> may not support writes +when loaded by the older nbdkit). =head2 Detect if a plugin is installed diff --git a/include/nbdkit-plugin.h b/include/nbdkit-plugin.h index 13541e5..b67d343 100644 --- a/include/nbdkit-plugin.h +++ b/include/nbdkit-plugin.h @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2013-2017 Red Hat Inc. + * Copyright (C) 2013-2018 Red Hat Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -48,7 +48,16 @@ extern "C" { #define NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS 2 #define NBDKIT_THREAD_MODEL_PARALLEL 3 +#define NBDKIT_FLAG_MAY_TRIM (1<<0) +#define NBDKIT_FLAG_FUA (1<<1) + +/* By default, a plugin gets API version 1; but you may request + * version 2 prior to including this header */ +#ifndef NBDKIT_API_VERSION #define NBDKIT_API_VERSION 1 +#elif (NBDKIT_API_VERSION - 0) < 1 || NBDKIT_API_VERSION > 2) +#error Unsupported API version +#endif struct nbdkit_plugin { /* Do not set these fields directly; use NBDKIT_REGISTER_PLUGIN. @@ -87,15 +96,35 @@ struct nbdkit_plugin { int (*can_trim) (void *handle); int (*pread) (void *handle, void *buf, uint32_t count, uint64_t offset); +#if NBDKIT_API_VERSION == 1 int (*pwrite) (void *handle, const void *buf, uint32_t count, uint64_t offset); +#else + int (*pwrite_old) (void *handle, const void *buf, uint32_t count, uint64_t offset); +#endif int (*flush) (void *handle); +#if NBDKIT_API_VERSION == 1 int (*trim) (void *handle, uint32_t count, uint64_t offset); int (*zero) (void *handle, uint32_t count, uint64_t offset, int may_trim); +#else + int (*trim_old) (void *handle, uint32_t count, uint64_t offset); + int (*zero_old) (void *handle, uint32_t count, uint64_t offset, int may_trim); +#endif int errno_is_preserved; void (*dump_plugin) (void); + int (*can_fua) (void *handle); +#if NBDKIT_API_VERSION == 1 + int (*_unused1) (void *, const void *, uint32_t, uint64_t, uint32_t); + int (*_unused2) (void *, uint32_t, uint64_t, uint32_t); + int (*_unused3) (void *, uint32_t, uint64_t, uint32_t); +#else + int (*pwrite) (void *handle, const void *buf, uint32_t count, + uint64_t offset, uint32_t flags); + int (*zero) (void *handle, uint32_t count, uint64_t offset, uint32_t flags); + int (*trim) (void *handle, uint32_t count, uint64_t offset, uint32_t flags); +#endif /* int (*set_exportname) (void *handle, const char *exportname); */ }; diff --git a/src/internal.h b/src/internal.h index 7fd52a2..c76c0d3 100644 --- a/src/internal.h +++ b/src/internal.h @@ -40,6 +40,7 @@ #include <sys/socket.h> #include <pthread.h> +#define NBDKIT_API_VERSION 2 #include "nbdkit-plugin.h" #include "nbdkit-filter.h" @@ -98,9 +99,6 @@ (type *) ((char *) __mptr - offsetof(type, member)); \ }) -#define NBDKIT_FLAG_MAY_TRIM (1<<0) /* Maps to !NBD_CMD_FLAG_NO_HOLE */ -#define NBDKIT_FLAG_FUA (1<<1) /* Maps to NBD_CMD_FLAG_FUA */ - /* main.c */ extern const char *exportname; extern const char *ipaddr; diff --git a/src/plugins.c b/src/plugins.c index 1de2ba2..fd5e843 100644 --- a/src/plugins.c +++ b/src/plugins.c @@ -553,7 +553,7 @@ plugin_register (size_t index, const char *filename, } /* Check for incompatible future versions. */ - if (plugin->_api_version != 1) { + if (plugin->_api_version < 0 || plugin->_api_version > 2) { fprintf (stderr, "%s: %s: plugin is incompatible with this version of nbdkit (_api_version = %d)\n", program_name, p->filename, plugin->_api_version); exit (EXIT_FAILURE); -- 2.14.3
Richard W.M. Jones
2018-Jan-19 14:26 UTC
Re: [Libguestfs] [nbdkit PATCH v2 08/13] connections: Allow multiple handles to be stored in the connection object.
I pushed up to and including this patch. I also have made some changes to the "Introduce filters" and "Implement filters" patches and I folded them into a single patch because it was becoming too hard to rebase when they were separate patches, and there's not really any point in having them as separate patches anyway. I need to rebase everything and repost my filters patch series, which I'll do the very next thing today. One note: tests/test-nbd occasionally fails. (It was failing before very occasionally so this isn't a regression). I didn't look into it in any detail yet. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com virt-p2v converts physical machines to virtual machines. Boot with a live CD or over the network (PXE) and turn machines into KVM guests. http://libguestfs.org/virt-v2v
Richard W.M. Jones
2018-Jan-19 15:27 UTC
Re: [Libguestfs] [nbdkit PATCH v2 13/13] RFC: plugins: Add callbacks for FUA semantics
On Fri, Jan 19, 2018 at 07:40:29AM -0600, Eric Blake wrote:> =head1 SYNOPSIS > > + #define NBDKIT_API_VERSION 2 > + > #include <nbdkit-plugin.h> > > #define THREAD_MODEL NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS > @@ -51,9 +53,21 @@ L<nbdkit-perl-plugin(3)>, > L<nbdkit-python-plugin(3)>, > L<nbdkit-ruby-plugin(3)>. > > +=head1 C<#define NBDKIT_API_VERSION> > + > +Plugins must choose which API version they want to use. The default > +version is 1; but if a plugin defines NBDKIT_API_VERSION to a positive > +integer prior to including C<nbdkit-plugin.h>, the signature of > +several callbacks is enhanced. A newer nbdkit will always support > +plugins compiled against an older API version, but plugins that opt in > +to newer versions require a new enough nbdkit. For now, the maximum > +version is 2, which enables fine-tuned response to client flags > +including efficient Forced Unit Access (FUA) on writes. > + > =head1 C<nbdkit-plugin.h> > > -All plugins should start by including this header file: > +All plugins should start by including this header file, after > +optionally choosing an API version: > > #include <nbdkit-plugin.h>I'm in favour of only documenting the newest API. Of course we keep the old one working, but we don't need to document it any longer. This means that each instance of the #include is always preceeded by ‘#define NBDKIT_API_VERSION 2’ (or whatever is the latest version).> @@ -442,6 +477,21 @@ recovered from), C<.pwrite> should call C<nbdkit_error> with an error > message, and C<nbdkit_set_error> to record an appropriate error > (unless C<errno> is sufficient), then return C<-1>. > > +If the plugin can provide efficient Forced Unit Access (FUA) semantics, > +it should define C<.pwrite_fua> instead. > + > +=head2 C<.pwrite_fua> > + > + int pwrite_fua (void *handle, const void *buf, uint32_t count, uint64_t offset, int fua);But in fact the callback is still called ‘pwrite’? (which is good!) Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com virt-builder quickly builds VMs from scratch http://libguestfs.org/virt-builder.1.html
Shaun McDowell
2018-Jan-19 16:56 UTC
Re: [Libguestfs] [nbdkit PATCH v2 13/13] RFC: plugins: Add callbacks for FUA semantics
Hi, We've been using a modified nbdkit (ours is cbdkit internally) for about half a year now and since you guys appear to be working on a next version of the API I wanted to go over some of the limitations we hit with nbdkit that we think others may also hit for consideration into the batch of changes you are making to the api. About Us: Our primary use for nbdkit is to facilitate a disk driver implementation in userspace. Our plugin provides a disk interface to cloud/blob/object storages (aws s3, gcs, openstack swift) and so every read and write that doesn't hit an internal cache within the plugin is going out over the network to one of these cloud storages. Because each read or write operation could take somewhere between 1ms (hitting an internal cache) and 30 seconds (hitting a cloud storage bucket that is overloaded causing slowdowns and retries) depending on load, we needed to support as many operations in parallel as possible to overcome potentially high latency and provide high throughput. Limitation: The kernel will (with today's default settings) typically be willing to send up to 128 requests of 128kB size to the driver in parallel. We wanted to support 128 parallel read operations on different areas of the disk without requiring 128 separate threads and connections for the driver. Right now in nbdkit that is impossible. The main loop in connection.c will pull an nbd request off the socket and block until that read request is complete before sending a response and getting the next request, blocking other requests on the socket unless running X connections/threads in parallel. For write operations we can overcome this through flush mechanics and early success calls but with reads we get stuck. Change: We introduced an additional set of functions to the nbdkit_plugin struct that supports asynchronous handling of the requests and a few helper functions for the plugin to use to respond when it has finished the request. This is very similar to the fuse filesystem low level api (async supported) vs the high level fuse fs api (sync only). The design goal here is that a single connection/thread on nbdkit can support as many requests in parallel as the plugin allows. The nbdkit side pulls the request off the socket and if the async function pointer is non-null it will wrap the request in an op struct and use the async plugin call for read/write/etc capturing any buffer allocated and some op details into the op pointer. The plugin async_* will start the op and return to nbdkit while the plugin works on it in the background. Nbdkit will then go back to the socket and begin the next request. Our plugin uses 1 connection/nbdkit thread and 2-4 threads internally with boost asio over sockets to service the requests to cloud. We are able to achieve ~1GB/s (yes bytes) read/write performance to aws s3 from an ec2 node with 10 gigabit networking on < 100MB of memory in the driver with this approach. Here are some of what our function prototypes look like that support an asynchronous nbdkit model #define CBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS 2 #define CBDKIT_THREAD_MODEL_PARALLEL 3 #define CBDKIT_THREAD_MODEL_ASYNC 4 struct cbdkit_plugin { ... int (*pread) (void *handle, void *buf, uint32_t count, uint64_t offset); int (*pwrite) (void *handle, const void *buf, uint32_t count, uint64_t offset); int (*flush) (void *handle); int (*trim) (void *handle, uint32_t count, uint64_t offset); int (*zero) (void *handle, uint32_t count, uint64_t offset, int may_trim); int errno_is_preserved; void (*async_pread) (void *op, void *handle, void *buf, uint32_t count, uint64_t offset); void (*async_pwrite) (void *op, void *handle, const void *buf, uint32_t count, uint64_t offset, int fua); void (*async_flush) (void *op, void *handle); void (*async_trim) (void *op, void *handle, uint32_t count, uint64_t offset, int fua); void (*async_zero) (void *op, void *handle, uint32_t count, uint64_t offset, int may_trim, int fua); ... } Additionally there are a few helper functions for the plugin to use to respond back to nbdkit when the job is eventually finished. The plugin contract when using the async functions is that every async func guarantees it will call an appropriate async_reply function. /* call for completion of successful async_pwrite, async_flush, async_trim, or async_zero */ extern CBDKIT_CXX_LANG_C int cbdkit_async_reply (void *op); /* call for complete of successful async_pread */ extern CBDKIT_CXX_LANG_C int cbdkit_async_reply_read (void *op); /* call for completion of any async operation with error */ extern CBDKIT_CXX_LANG_C int cbdkit_async_reply_error (void *op, uint32_t error); If there is any interest in supporting async ops in the next api version I am able to share the entire modified nbdkit (cbdkit) source that we use that supports this async op framework, fua, as well as some buffer pooling. Thanks for consideration, - Shaun On Fri, Jan 19, 2018 at 8:40 AM, Eric Blake <eblake@redhat.com> wrote:> [still a work in progress, as I finish rebasing to capture the > ideas raised on the list, but posting now for initial feedback] > > The NBD protocol supports Forced Unit Access (FUA) as a more efficient > way to wait for just one write to land in persistent storage, rather > than all outstanding writes at the time of a flush; modeled after > the kernel's block I/O flag of the same name. While we can emulate > the proper semantics with a full-blown flush, there are some plugins > that can properly pass the FUA flag on to the end storage and thereby > avoid some overhead. > > This patch introduces new callbacks and documentations for those > callbacks, although the actual implementation to take advantage of > the new callbacks will be in later patches. The biggest thing to > note is that we now support 2 API versions for the plugin, where > the plugin author chooses whether to keep version 1 (default, no > FUA support) or opt in to version 2 (FUA support). > > Signed-off-by: Eric Blake <eblake@redhat.com> > --- > docs/nbdkit-plugin.pod | 89 ++++++++++++++++++++++++++++++ > +++++++++++++++++-- > docs/nbdkit.pod | 7 +++- > include/nbdkit-plugin.h | 31 ++++++++++++++++- > src/internal.h | 4 +-- > src/plugins.c | 2 +- > 5 files changed, 125 insertions(+), 8 deletions(-) > > diff --git a/docs/nbdkit-plugin.pod b/docs/nbdkit-plugin.pod > index 3cafc42..d982e65 100644 > --- a/docs/nbdkit-plugin.pod > +++ b/docs/nbdkit-plugin.pod > @@ -6,6 +6,8 @@ nbdkit-plugin - How to write nbdkit plugins > > =head1 SYNOPSIS > > + #define NBDKIT_API_VERSION 2 > + > #include <nbdkit-plugin.h> > > #define THREAD_MODEL NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS > @@ -51,9 +53,21 @@ L<nbdkit-perl-plugin(3)>, > L<nbdkit-python-plugin(3)>, > L<nbdkit-ruby-plugin(3)>. > > +=head1 C<#define NBDKIT_API_VERSION> > + > +Plugins must choose which API version they want to use. The default > +version is 1; but if a plugin defines NBDKIT_API_VERSION to a positive > +integer prior to including C<nbdkit-plugin.h>, the signature of > +several callbacks is enhanced. A newer nbdkit will always support > +plugins compiled against an older API version, but plugins that opt in > +to newer versions require a new enough nbdkit. For now, the maximum > +version is 2, which enables fine-tuned response to client flags > +including efficient Forced Unit Access (FUA) on writes. > + > =head1 C<nbdkit-plugin.h> > > -All plugins should start by including this header file: > +All plugins should start by including this header file, after > +optionally choosing an API version: > > #include <nbdkit-plugin.h> > > @@ -400,7 +414,28 @@ If there is an error, C<.can_trim> should call > C<nbdkit_error> with an > error message and return C<-1>. > > This callback is not required. If omitted, then we return true iff a > -C<.trim> callback has been defined. > +C<.trim> or C<.trim_fua> callback has been defined. > + > +=head2 C<.can_fua> > + > + int can_fua (void *handle); > + > +This is called during the option negotiation phase to find out if the > +plugin supports the Forced Unit Access (FUA) flag on write and trim > +requests. > + > +If there is an error, C<.can_fua> should call C<nbdkit_error> with an > +error message and return C<-1>. > + > +This callback is not required. If omitted, then we return true iff > +either the C<.pwrite_fua> callback has been defined, or if C<.can_flush> > +returns true (in the latter case, FUA semantics are emulated by nbdkit > +calling C<.flush> before completing any write or trim operation with > +the FUA flag set). > + > +Note that if this defaults to true and C<.can_trim> also returns true, > +the plugin must provide either C<.flush> or C<.trim_fua> for correct > +FUA semantics. > > =head2 C<.pread> > > @@ -442,6 +477,21 @@ recovered from), C<.pwrite> should call > C<nbdkit_error> with an error > message, and C<nbdkit_set_error> to record an appropriate error > (unless C<errno> is sufficient), then return C<-1>. > > +If the plugin can provide efficient Forced Unit Access (FUA) semantics, > +it should define C<.pwrite_fua> instead. > + > +=head2 C<.pwrite_fua> > + > + int pwrite_fua (void *handle, const void *buf, uint32_t count, uint64_t > offset, int fua); > + > +This callback has the same requirements as C<.pwrite>, with the > +additional parameter C<fua> set to a non-zero value if the client > +wants FUA semantics (where the command must not return until the > +actions of the write have landed in persistent storage). If the > +plugin cannot provide efficient FUA, but C<.can_flush> returns true > +and C<.can_fua> does not return false, then client requests for FUA > +semantics are emulated by nbdkit calling C<.flush>. > + > =head2 C<.flush> > > int flush (void *handle); > @@ -455,6 +505,11 @@ If there is an error, C<.flush> should call > C<nbdkit_error> with an > error message, and C<nbdkit_set_error> to record an appropriate error > (unless C<errno> is sufficient), then return C<-1>. > > +Note that C<.flush> can be called both by the client doing an explicit > +flush request, and by nbdkit when emulating Forced Unit Access (FUA) > +semantics after a write or trim where the plugin did not provide FUA > +callbacks (C<.pwrite_fua>, C<.zero_fua>, and C<.trim_fua>). > + > =head2 C<.trim> > > int trim (void *handle, uint32_t count, uint64_t offset); > @@ -467,6 +522,21 @@ If there is an error, C<.trim> should call > C<nbdkit_error> with an > error message, and C<nbdkit_set_error> to record an appropriate error > (unless C<errno> is sufficient), then return C<-1>. > > +If the plugin can provide efficient Forced Unit Access (FUA) semantics, > +it should define C<.trim_fua> instead. > + > +=head2 C<.trim_fua> > + > + int trim_fua (void *handle, uint32_t count, uint64_t offset, int fua); > + > +This callback has the same requirements as C<.trim>, with the > +additional parameter C<fua> set to a non-zero value if the client > +wants FUA semantics (where the command must not return until the > +actions of the trim have landed in persistent storage). If the plugin > +cannot provide efficient FUA, but C<.can_flush> returns true and > +C<.can_fua> does not return false, then client requests for FUA > +semantics are emulated by nbdkit calling C<.flush>. > + > =head2 C<.zero> > > int zero (void *handle, uint32_t count, uint64_t offset, int may_trim); > @@ -488,6 +558,21 @@ If there is an error, C<.zero> should call > C<nbdkit_error> with an > error message, and C<nbdkit_set_error> to record an appropriate error > (unless C<errno> is sufficient), then return C<-1>. > > +If the plugin can provide efficient Forced Unit Access (FUA) semantics, > +it should define C<.zero_fua> instead. > + > +=head2 C<.zero_fua> > + > + int zero_fua (void *handle, uint32_t count, uint64_t offset, int > may_trim, int fua); > + > +This callback has the same requirements as C<.zero>, with the > +additional parameter C<fua> set to a non-zero value if the client > +wants FUA semantics (where the command must not return until the > +actions of the write have landed in persistent storage). If the > +plugin cannot provide efficient FUA, but C<.can_flush> returns true > +and C<.can_fua> does not return false, then client requests for FUA > +semantics are emulated by nbdkit calling C<.flush>. > + > =head1 THREADS > > Each nbdkit plugin must declare its thread safety model by defining > diff --git a/docs/nbdkit.pod b/docs/nbdkit.pod > index 636eedc..eaa638b 100644 > --- a/docs/nbdkit.pod > +++ b/docs/nbdkit.pod > @@ -804,7 +804,12 @@ information about that plugin, eg: > [etc] > > Plugins which ship with nbdkit usually have the same version as the > -corresponding nbdkit binary. > +corresponding nbdkit binary. The nbdkit binary will always be able > +to utilize plugins compiled against an older version of the header; > +however, there are cases where a newer plugin may not be fully > +supported by an older nbdkit binary (for example, a plugin that > +supplies C<.pwrite_fua> but not C<.pwrite> may not support writes > +when loaded by the older nbdkit). > > =head2 Detect if a plugin is installed > > diff --git a/include/nbdkit-plugin.h b/include/nbdkit-plugin.h > index 13541e5..b67d343 100644 > --- a/include/nbdkit-plugin.h > +++ b/include/nbdkit-plugin.h > @@ -1,5 +1,5 @@ > /* nbdkit > - * Copyright (C) 2013-2017 Red Hat Inc. > + * Copyright (C) 2013-2018 Red Hat Inc. > * All rights reserved. > * > * Redistribution and use in source and binary forms, with or without > @@ -48,7 +48,16 @@ extern "C" { > #define NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS 2 > #define NBDKIT_THREAD_MODEL_PARALLEL 3 > > +#define NBDKIT_FLAG_MAY_TRIM (1<<0) > +#define NBDKIT_FLAG_FUA (1<<1) > + > +/* By default, a plugin gets API version 1; but you may request > + * version 2 prior to including this header */ > +#ifndef NBDKIT_API_VERSION > #define NBDKIT_API_VERSION 1 > +#elif (NBDKIT_API_VERSION - 0) < 1 || NBDKIT_API_VERSION > 2) > +#error Unsupported API version > +#endif > > struct nbdkit_plugin { > /* Do not set these fields directly; use NBDKIT_REGISTER_PLUGIN. > @@ -87,15 +96,35 @@ struct nbdkit_plugin { > int (*can_trim) (void *handle); > > int (*pread) (void *handle, void *buf, uint32_t count, uint64_t offset); > +#if NBDKIT_API_VERSION == 1 > int (*pwrite) (void *handle, const void *buf, uint32_t count, uint64_t > offset); > +#else > + int (*pwrite_old) (void *handle, const void *buf, uint32_t count, > uint64_t offset); > +#endif > int (*flush) (void *handle); > +#if NBDKIT_API_VERSION == 1 > int (*trim) (void *handle, uint32_t count, uint64_t offset); > int (*zero) (void *handle, uint32_t count, uint64_t offset, int > may_trim); > +#else > + int (*trim_old) (void *handle, uint32_t count, uint64_t offset); > + int (*zero_old) (void *handle, uint32_t count, uint64_t offset, int > may_trim); > +#endif > > int errno_is_preserved; > > void (*dump_plugin) (void); > > + int (*can_fua) (void *handle); > +#if NBDKIT_API_VERSION == 1 > + int (*_unused1) (void *, const void *, uint32_t, uint64_t, uint32_t); > + int (*_unused2) (void *, uint32_t, uint64_t, uint32_t); > + int (*_unused3) (void *, uint32_t, uint64_t, uint32_t); > +#else > + int (*pwrite) (void *handle, const void *buf, uint32_t count, > + uint64_t offset, uint32_t flags); > + int (*zero) (void *handle, uint32_t count, uint64_t offset, uint32_t > flags); > + int (*trim) (void *handle, uint32_t count, uint64_t offset, uint32_t > flags); > +#endif > /* int (*set_exportname) (void *handle, const char *exportname); */ > }; > > diff --git a/src/internal.h b/src/internal.h > index 7fd52a2..c76c0d3 100644 > --- a/src/internal.h > +++ b/src/internal.h > @@ -40,6 +40,7 @@ > #include <sys/socket.h> > #include <pthread.h> > > +#define NBDKIT_API_VERSION 2 > #include "nbdkit-plugin.h" > #include "nbdkit-filter.h" > > @@ -98,9 +99,6 @@ > (type *) ((char *) __mptr - offsetof(type, member)); \ > }) > > -#define NBDKIT_FLAG_MAY_TRIM (1<<0) /* Maps to !NBD_CMD_FLAG_NO_HOLE */ > -#define NBDKIT_FLAG_FUA (1<<1) /* Maps to NBD_CMD_FLAG_FUA */ > - > /* main.c */ > extern const char *exportname; > extern const char *ipaddr; > diff --git a/src/plugins.c b/src/plugins.c > index 1de2ba2..fd5e843 100644 > --- a/src/plugins.c > +++ b/src/plugins.c > @@ -553,7 +553,7 @@ plugin_register (size_t index, const char *filename, > } > > /* Check for incompatible future versions. */ > - if (plugin->_api_version != 1) { > + if (plugin->_api_version < 0 || plugin->_api_version > 2) { > fprintf (stderr, "%s: %s: plugin is incompatible with this version of > nbdkit (_api_version = %d)\n", > program_name, p->filename, plugin->_api_version); > exit (EXIT_FAILURE); > -- > 2.14.3 > > _______________________________________________ > Libguestfs mailing list > Libguestfs@redhat.com > https://www.redhat.com/mailman/listinfo/libguestfs >
Possibly Parallel Threads
- [nbdkit PATCH v2 13/13] RFC: plugins: Add callbacks for FUA semantics
- Re: [nbdkit PATCH v2 13/13] RFC: plugins: Add callbacks for FUA semantics
- [nbdkit PATCH v2 00/13] Add filters + FUA support to nbdkit
- [nbdkit PATCH 0/7] Initial implementation of FUA flag passthrough
- Re: [nbdkit PATCH 0/7] Initial implementation of FUA flag passthrough