Richard W.M. Jones
2016-Jan-11 17:30 UTC
[Libguestfs] [PATCH] Add support for newstyle NBD protocol (RHBZ#1297100).
Experimental and only very lightly tested so far. Rich.
Richard W.M. Jones
2016-Jan-11 17:30 UTC
[Libguestfs] [PATCH] Add support for newstyle NBD protocol (RHBZ#1297100).
--- .gitignore | 1 + TODO | 10 +-- docs/nbdkit.pod | 38 +++++++- src/connections.c | 235 ++++++++++++++++++++++++++++++++++++++++++++++++-- src/internal.h | 1 + src/main.c | 15 +++- src/protocol.h | 49 ++++++++++- tests/Makefile.am | 8 ++ tests/test-newstyle.c | 102 ++++++++++++++++++++++ 9 files changed, 440 insertions(+), 19 deletions(-) create mode 100644 tests/test-newstyle.c diff --git a/.gitignore b/.gitignore index b1a8850..9ea072f 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,7 @@ Makefile.in /tests/test-connect /tests/test-file /tests/test-gzip +/tests/test-newstyle /tests/test-ocaml /tests/test-ocaml-plugin.so /tests/test-perl diff --git a/TODO b/TODO index a05aa5b..d39e64c 100644 --- a/TODO +++ b/TODO @@ -17,9 +17,9 @@ * Performance - measure and improve it. -* Implement the new protocol and export names. With export names it - should be possible to have multiple plugins on the command line - (each responding to a different export of course): +* Implement export names. With export names it should be possible to + have multiple plugins on the command line (each responding to a + different export of course): nbdkit --export /foo plugin.so --export /bar another-plugin.so @@ -27,8 +27,8 @@ default that accepts all exportnames, or to divide the export name "space" up using regexps or wildcards. - Annoyingly nbd-client dropped support for the oldstyle protocol (see - https://bugzilla.redhat.com/1297100). + Export names are not actually paths (although that is how they are + often used), but arbitrary UTF-8 text strings. * Implement true parallel request handling. Currently NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS and diff --git a/docs/nbdkit.pod b/docs/nbdkit.pod index 9ce75d3..28ac490 100644 --- a/docs/nbdkit.pod +++ b/docs/nbdkit.pod @@ -7,8 +7,8 @@ nbdkit - A toolkit for creating NBD servers =head1 SYNOPSIS nbdkit [--dump-config] [-f] [-g GROUP] [-i IPADDR] - [-P PIDFILE] [-p PORT] [-r] [--run CMD] [-s] - [-U SOCKET] [-u USER] [-v] [-V] + [--new-style] [-P PIDFILE] [-p PORT] [-r] + [--run CMD] [-s] [-U SOCKET] [-u USER] [-v] [-V] PLUGIN [key=value [key=value [...]]] =head1 DESCRIPTION @@ -103,6 +103,15 @@ See also I<-u>. Listen on the specified interface. The default is to listen on all interfaces. See also I<-p>. +=item B<-n> + +=item B<--new-style> + +=item B<--newstyle> + +Use the newstyle NBD protocol instead of the default (oldstyle) +protocol. See L</NEW STYLE VS OLD STYLE PROTOCOL> below. + =item B<-P> PIDFILE =item B<--pid-file> PIDFILE @@ -280,6 +289,31 @@ Unix socket, like this: nbdkit -U - plugin [args] --run '...' +=head1 NEW STYLE VS OLD STYLE PROTOCOL + +The NBD protocol comes in two incompatible forms that we call +"oldstyle" and "newstyle". + +Unfortunately which protocol you should use depends on the client and +cannot be known in advance (despite claims to the contrary). + +nbdkit defaults to the oldstyle protocol for compatibility with qemu +and qemu-nbd. Use the I<-n> or I<--new-style> flag on the command +line to use the newstyle protocol. + +If you want to claim compatibility with what the NBD proto.txt +document says should be the case (which isn't in reality), then you +should always use I<-n> when using port 10809, and use oldstyle +otherwise. + +If the client is nbd-client E<ge> 3.10 then you must use the newstyle +protocol (add the I<-n> flag on the command line), else you will see +this error: + + Error: It looks like you're trying to connect to an oldstyle server. + +nbdkit ignores export names at present (see also the C<TODO> file). + =head1 SIGNALS C<nbdkit> responds to the following signals: diff --git a/src/connections.c b/src/connections.c index 15f416b..6bdf4ef 100644 --- a/src/connections.c +++ b/src/connections.c @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2013 Red Hat Inc. + * Copyright (C) 2013-2016 Red Hat Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -52,6 +52,12 @@ /* Maximum read or write request that we will handle. */ #define MAX_REQUEST_SIZE (64 * 1024 * 1024) +/* Maximum number of client options we allow before giving up. */ +#define MAX_NR_OPTIONS 32 + +/* Maximum length of any option data (bytes). */ +#define MAX_OPTION_LENGTH 4096 + static struct connection *new_connection (int sockin, int sockout); static void free_connection (struct connection *conn); static int negotiate_handshake (struct connection *conn); @@ -143,11 +149,8 @@ free_connection (struct connection *conn) free (conn); } -/* XXX Note because we don't support multiple plugins or export names, - * we are using the old-style handshake. This will be fixed. - */ static int -_negotiate_handshake (struct connection *conn) +_negotiate_handshake_oldstyle (struct connection *conn) { struct old_handshake handshake; int64_t r; @@ -201,7 +204,8 @@ _negotiate_handshake (struct connection *conn) conn->can_trim = 1; } - debug ("flags: global 0x%x export 0x%x", gflags, eflags); + debug ("oldstyle negotiation: flags: global 0x%x export 0x%x", + gflags, eflags); memset (&handshake, 0, sizeof handshake); memcpy (handshake.nbdmagic, "NBDMAGIC", 8); @@ -218,13 +222,230 @@ _negotiate_handshake (struct connection *conn) return 0; } +/* Receive newstyle options. + * + * Currently we ignore NBD_OPT_EXPORT_NAME (see TODO), we close the + * connection if sent NBD_OPT_ABORT, we send a canned list of + * options for NBD_OPT_LIST, and we send NBD_REP_ERR_UNSUP for + * everything else. + */ +static int +send_newstyle_option_reply (struct connection *conn, + uint32_t option, uint32_t reply) +{ + struct fixed_new_option_reply fixed_new_option_reply; + + fixed_new_option_reply.magic = htobe64 (NEW_OPTION_REPLY); + fixed_new_option_reply.option = htobe32 (option); + fixed_new_option_reply.reply = htobe32 (reply); + fixed_new_option_reply.replylen = htobe32 (0); + + if (xwrite (conn->sockout, + &fixed_new_option_reply, sizeof fixed_new_option_reply) == -1) { + nbdkit_error ("write: %m"); + return -1; + } + + return 0; +} + +static int +_negotiate_handshake_newstyle_options (struct connection *conn) +{ + struct new_option new_option; + size_t nr_options; + uint64_t version; + uint32_t option; + uint32_t optlen; + char data[MAX_OPTION_LENGTH+1]; + + for (nr_options = 0; nr_options < MAX_NR_OPTIONS; ++nr_options) { + if (xread (conn->sockin, &new_option, sizeof new_option) == -1) { + nbdkit_error ("read: %m"); + return -1; + } + + version = be64toh (new_option.version); + if (version != NEW_VERSION) { + nbdkit_error ("unknown option version %" PRIx64 + ", expecting %" PRIx64, + version, NEW_VERSION); + return -1; + } + + /* There is a maximum option length we will accept, regardless + * of the option type. + */ + optlen = be32toh (new_option.optlen); + if (optlen > MAX_OPTION_LENGTH) { + nbdkit_error ("client option data too long (%" PRIu32 ")", optlen); + return -1; + } + + option = be32toh (new_option.option); + switch (option) { + case NBD_OPT_EXPORT_NAME: + if (xread (conn->sockin, data, optlen) == -1) { + nbdkit_error ("read: %m"); + return -1; + } + /* Apart from printing it, ignore the export name. */ + data[optlen] = '\0'; + debug ("newstyle negotiation: client requested export '%s' (ignored)", + data); + break; + + case NBD_OPT_ABORT: + if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1) + return -1; + nbdkit_error ("client sent NBD_OPT_ABORT to abort the connection"); + return -1; + + case NBD_OPT_LIST: + if (optlen != 0) { + if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_INVALID) + == -1) + return -1; + continue; + } + + /* Since we don't support export names, there is nothing to list. */ + if (send_newstyle_option_reply (conn, option, NBD_REP_ACK) == -1) + return -1; + break; + + default: + /* Unknown option. */ + if (send_newstyle_option_reply (conn, option, NBD_REP_ERR_UNSUP) == -1) + return -1; + } + + /* Note, since it's not very clear from the protocol doc, that the + * client must send NBD_OPT_EXPORT_NAME last, and that ends option + * negotiation. + */ + if (option == NBD_OPT_EXPORT_NAME) + break; + } + + if (nr_options >= MAX_NR_OPTIONS) { + nbdkit_error ("client exceeded maximum number of options (%d)", + MAX_NR_OPTIONS); + return -1; + } + + return 0; +} + +static int +_negotiate_handshake_newstyle (struct connection *conn) +{ + struct new_handshake handshake; + uint16_t gflags; + uint32_t cflags; + struct new_handshake_finish handshake_finish; + int64_t r; + uint64_t exportsize; + uint16_t eflags; + int fl; + + gflags = NBD_FLAG_FIXED_NEWSTYLE; + + debug ("newstyle negotiation: flags: global 0x%x", gflags); + + memcpy (handshake.nbdmagic, "NBDMAGIC", 8); + handshake.version = htobe64 (NEW_VERSION); + handshake.gflags = htobe16 (gflags); + + if (xwrite (conn->sockout, &handshake, sizeof handshake) == -1) { + nbdkit_error ("write: %m"); + return -1; + } + + /* Client now sends us its 32 bit flags word ... */ + if (xread (conn->sockin, &cflags, sizeof cflags) == -1) { + nbdkit_error ("read: %m"); + return -1; + } + cflags = be32toh (cflags); + /* ... which other than printing out, we ignore. */ + debug ("newstyle negotiation: client flags: 0x%x", cflags); + + /* Receive newstyle options. */ + if (_negotiate_handshake_newstyle_options (conn) == -1) + return -1; + + /* Finish the newstyle handshake. */ + r = plugin_get_size (conn); + if (r == -1) + return -1; + if (r < 0) { + nbdkit_error (".get_size function returned invalid value " + "(%" PRIi64 ")", r); + return -1; + } + exportsize = (uint64_t) r; + conn->exportsize = exportsize; + + eflags = NBD_FLAG_HAS_FLAGS; + + fl = plugin_can_write (conn); + if (fl == -1) + return -1; + if (readonly || !fl) { + eflags |= NBD_FLAG_READ_ONLY; + conn->readonly = 1; + } + + fl = plugin_can_flush (conn); + if (fl == -1) + return -1; + if (fl) { + eflags |= NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA; + conn->can_flush = 1; + } + + fl = plugin_is_rotational (conn); + if (fl == -1) + return -1; + if (fl) { + eflags |= NBD_FLAG_ROTATIONAL; + conn->is_rotational = 1; + } + + fl = plugin_can_trim (conn); + if (fl == -1) + return -1; + if (fl) { + eflags |= NBD_FLAG_SEND_TRIM; + conn->can_trim = 1; + } + + debug ("newstyle negotiation: flags: export 0x%x", eflags); + + memset (&handshake_finish, 0, sizeof handshake_finish); + handshake_finish.exportsize = htobe64 (exportsize); + handshake_finish.eflags = htobe16 (eflags); + + if (xwrite (conn->sockout, + &handshake_finish, sizeof handshake_finish) == -1) { + nbdkit_error ("write: %m"); + return -1; + } + + return 0; +} + static int negotiate_handshake (struct connection *conn) { int r; plugin_lock_request (conn); - r = _negotiate_handshake (conn); + if (!newstyle) + r = _negotiate_handshake_oldstyle (conn); + else + r = _negotiate_handshake_newstyle (conn); plugin_unlock_request (conn); return r; diff --git a/src/internal.h b/src/internal.h index c834b14..0603779 100644 --- a/src/internal.h +++ b/src/internal.h @@ -49,6 +49,7 @@ /* main.c */ extern const char *ipaddr; +extern int newstyle; extern const char *port; extern int readonly; extern char *unixsocket; diff --git a/src/main.c b/src/main.c index 1248a8e..21b0595 100644 --- a/src/main.c +++ b/src/main.c @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2013-2014 Red Hat Inc. + * Copyright (C) 2013-2016 Red Hat Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -67,6 +67,7 @@ static gid_t parsegroup (const char *); int foreground; /* -f */ const char *ipaddr; /* -i */ +int newstyle; /* -n */ int listen_stdin; /* -s */ char *pidfile; /* -P */ const char *port; /* -p */ @@ -83,7 +84,7 @@ static char *random_fifo = NULL; enum { HELP_OPTION = CHAR_MAX + 1 }; -static const char *short_options = "fg:i:p:P:rsu:U:vV"; +static const char *short_options = "fg:i:np:P:rsu:U:vV"; static const struct option long_options[] = { { "help", 0, NULL, HELP_OPTION }, { "dump-config",0, NULL, 0 }, @@ -92,6 +93,8 @@ static const struct option long_options[] = { { "group", 1, NULL, 'g' }, { "ip-addr", 1, NULL, 'i' }, { "ipaddr", 1, NULL, 'i' }, + { "new-style", 1, NULL, 'n' }, + { "newstyle", 1, NULL, 'n' }, { "pid-file", 1, NULL, 'P' }, { "pidfile", 1, NULL, 'P' }, { "port", 1, NULL, 'p' }, @@ -111,8 +114,8 @@ static void usage (void) { printf ("nbdkit [--dump-config] [-f] [-g GROUP] [-i IPADDR]\n" - " [-P PIDFILE] [-p PORT] [-r] [--run CMD] [-s]\n" - " [-U SOCKET] [-u USER] [-v] [-V]\n" + " [--new-style] [-P PIDFILE] [-p PORT] [-r]\n" + " [--run CMD] [-s] [-U SOCKET] [-u USER] [-v] [-V]\n" " PLUGIN [key=value [key=value [...]]]\n" "\n" "Please read the nbdkit(1) manual page for full usage.\n"); @@ -180,6 +183,10 @@ main (int argc, char *argv[]) ipaddr = optarg; break; + case 'n': + newstyle = 1; + break; + case 'P': pidfile = nbdkit_absolute_path (optarg); if (pidfile == NULL) diff --git a/src/protocol.h b/src/protocol.h index 4449171..2f9a341 100644 --- a/src/protocol.h +++ b/src/protocol.h @@ -36,7 +36,7 @@ #include <stdint.h> -/* Old-style handshake */ +/* Old-style handshake. */ struct old_handshake { char nbdmagic[8]; /* "NBDMAGIC" */ uint64_t version; /* OLD_VERSION, in network byte order */ @@ -48,6 +48,41 @@ struct old_handshake { #define OLD_VERSION UINT64_C(0x420281861253) +/* New-style handshake. */ +struct new_handshake { + char nbdmagic[8]; /* "NBDMAGIC" */ + uint64_t version; /* NEW_VERSION, in network byte order */ + uint16_t gflags; /* global flags, in network byte order */ +} __attribute__((packed)); + +#define NEW_VERSION UINT64_C(0x49484156454F5054) + +/* New-style handshake option (sent by the client to us). */ +struct new_option { + uint64_t version; /* NEW_VERSION, in network byte order */ + uint32_t option; /* NBD_OPT_* */ + uint32_t optlen; /* option data length */ + /* option data follows */ +} __attribute__((packed)); + +/* Fixed newstyle handshake reply message. */ +struct fixed_new_option_reply { + uint64_t magic; /* NEW_OPTION_REPLY, network byte order */ + uint32_t option; /* option we are replying to */ + uint32_t reply; /* NBD_REP_* */ + uint32_t replylen; /* we always send zero at the moment */ + /* reply data follows, but we currently never send any */ +}; + +#define NEW_OPTION_REPLY UINT64_C(0x3e889045565a9) + +/* New-style handshake server reply. */ +struct new_handshake_finish { + uint64_t exportsize; /* in network byte order */ + uint16_t eflags; /* per-export flags, in network byte order */ + char zeroes[124]; /* must be sent as zero bytes */ +} __attribute__((packed)); + /* Global flags. */ #define NBD_FLAG_FIXED_NEWSTYLE 1 @@ -59,6 +94,18 @@ struct old_handshake { #define NBD_FLAG_ROTATIONAL 16 #define NBD_FLAG_SEND_TRIM 32 +/* NBD options (new style handshake only). */ +#define NBD_OPT_EXPORT_NAME 1 +#define NBD_OPT_ABORT 2 +#define NBD_OPT_LIST 3 + +#define NBD_REP_ACK 1 +#define NBD_REP_SERVER 2 +#define NBD_REP_ERR_UNSUP 0x80000001 +#define NBD_REP_ERR_POLICY 0x80000002 +#define NBD_REP_ERR_INVALID 0x80000003 +#define NBD_REP_ERR_PLATFORM 0x80000004 + /* Request (client -> server). */ struct request { uint32_t magic; /* NBD_REQUEST_MAGIC. */ diff --git a/tests/Makefile.am b/tests/Makefile.am index 511c39c..8d27032 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -103,6 +103,14 @@ file-data: for f in `seq 1 512`; do echo -ne '\x01\x02\x03\x04\x05\x06\x07\x08'; done > $@-t mv $@-t $@ +# newstyle protocol test. +check_PROGRAMS += test-newstyle +TESTS += test-newstyle + +test_newstyle_SOURCES = test-newstyle.c test.h +test_newstyle_CFLAGS = $(WARNINGS_CFLAGS) $(LIBGUESTFS_CFLAGS) +test_newstyle_LDADD = libtest.la $(LIBGUESTFS_LIBS) + # gzip plugin test. if HAVE_ZLIB if HAVE_GUESTFISH diff --git a/tests/test-newstyle.c b/tests/test-newstyle.c new file mode 100644 index 0000000..b1d8ce7 --- /dev/null +++ b/tests/test-newstyle.c @@ -0,0 +1,102 @@ +/* nbdkit + * Copyright (C) 2013-2016 Red Hat Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Red Hat nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <string.h> +#include <unistd.h> + +#include <guestfs.h> + +#include "test.h" + +int +main (int argc, char *argv[]) +{ + guestfs_h *g; + int r; + char *data; + size_t i, size; + + if (test_start_nbdkit ("-n", NBDKIT_PLUGIN ("file"), "file=file-data", + NULL) == -1) + exit (EXIT_FAILURE); + + g = guestfs_create (); + if (g == NULL) { + perror ("guestfs_create"); + exit (EXIT_FAILURE); + } + + /* Using any exportname causes qemu to use the newstyle protocol. */ + r = guestfs_add_drive_opts (g, "/" /* exportname */, + GUESTFS_ADD_DRIVE_OPTS_FORMAT, "raw", + GUESTFS_ADD_DRIVE_OPTS_PROTOCOL, "nbd", + GUESTFS_ADD_DRIVE_OPTS_SERVER, server, + -1); + if (r == -1) + exit (EXIT_FAILURE); + + if (guestfs_launch (g) == -1) + exit (EXIT_FAILURE); + + /* Check the data in the file is \x01-\x08 repeated 512 times. */ + data = guestfs_pread_device (g, "/dev/sda", 8 * 512, 0, &size); + if (!data) + exit (EXIT_FAILURE); + if (size != 8 * 512) { + fprintf (stderr, "%s FAILED: unexpected size (actual: %zu, expected: 512)\n", + program_name, size); + exit (EXIT_FAILURE); + } + + for (i = 0; i < 512 * 8; i += 8) { + if (data[i] != 1 || data[i+1] != 2 || + data[i+2] != 3 || data[i+3] != 4 || + data[i+4] != 5 || data[i+5] != 6 || + data[i+6] != 7 || data[i+7] != 8) { + fprintf (stderr, "%s FAILED: unexpected data returned at offset %zu\n", + program_name, i); + exit (EXIT_FAILURE); + } + } + + free (data); + + guestfs_close (g); + exit (EXIT_SUCCESS); +} -- 2.5.0
Apparently Analagous Threads
- [PATCH nbdkit 0/2] server: Split out NBD protocol code from connections code.
- [PATCH nbdkit 0/4] common/protocol: Unify public <nbd-protocol.h>
- [PATCH nbdkit v2] protocol: Implement NBD_OPT_GO.
- [PATCH nbdkit] protocol: Implement NBD_OPT_GO.
- [nbdkit PATCH 0/2] more protocol.h tweaks