Richard W.M. Jones
2019-Oct-18  15:39 UTC
[Libguestfs] [PATCH libnbd 0/2] api: Add support for AF_VSOCK.
This is a series of patches to libnbd and nbdkit adding AF_VSOCK support. On the host side it allows you to start an nbdkit instance which listens on a virtio-vsock socket: $ ./nbdkit -fv --vsock memory 1G ... nbdkit: debug: bound to vsock 2:10809 On the guest side you can then use libnbd to connect to the server: $ ./run nbdsh -c 'h.connect_vsock(2, 10809)' -c 'print(h.get_size())' 1073741824 $ ./run nbdfuse mp --vsock 2 10809 & $ ll mp/ total 0 -rw-rw-rw-. 1 rjones rjones 1073741824 Oct 18 16:23 nbd $ dd if=/dev/random of=mp/nbd bs=1024 count=100 conv=notrunc,nocreat dd: warning: partial read (84 bytes); suggest iflag=fullblock 0+100 records in 0+100 records out 6851 bytes (6.9 kB, 6.7 KiB) copied, 0.013797 s, 497 kB/s (Performance of FUSE is not great, it should be better using raw libnbd.) I mainly wrote this to show that it can be done. It's unclear if this would be faster or slower than the usual way that NBD devices are exposed to guests via virtio-blk/-scsi. https://wiki.qemu.org/Features/VirtioVsock Thanks: Stefan Hajnoczi for help with debugging this.
Richard W.M. Jones
2019-Oct-18  15:39 UTC
[Libguestfs] [PATCH libnbd 1/2] states: Don't assume socket address family is always AF_UNIX.
Get the address family from h->connaddr instead.
This should make no difference to existing code.
---
 generator/states-connect.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/generator/states-connect.c b/generator/states-connect.c
index 04e894c..d62b0f5 100644
--- a/generator/states-connect.c
+++ b/generator/states-connect.c
@@ -51,7 +51,8 @@ STATE_MACHINE {
   int fd;
 
   assert (!h->sock);
-  fd = socket (AF_UNIX, SOCK_STREAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0);
+  fd = socket (h->connaddr.ss_family,
+               SOCK_STREAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0);
   if (fd == -1) {
     SET_NEXT_STATE (%.DEAD);
     set_error (errno, "socket");
-- 
2.23.0
Richard W.M. Jones
2019-Oct-18  15:39 UTC
[Libguestfs] [PATCH libnbd 2/2] api: Add support for AF_VSOCK.
This adds a new API for connecting to AF_VSOCK protocol
(https://wiki.qemu.org/Features/VirtioVsock).
For example:
  nbd_connect_vsock (nbd, 2, 10809);
There is no test of this feature because it only works between guest
and host.  You cannot start a server and client on the host and talk
between them, which is what we'd need to write a sane test.
---
 configure.ac               |  2 ++
 fuse/nbdfuse.c             | 26 +++++++++++++++++++
 fuse/nbdfuse.pod           |  8 ++++++
 generator/generator        | 51 ++++++++++++++++++++++++++++++++++++--
 generator/states-connect.c | 22 ++++++++++++++++
 lib/connect.c              | 20 +++++++++++++++
 lib/internal.h             |  3 +++
 7 files changed, 130 insertions(+), 2 deletions(-)
diff --git a/configure.ac b/configure.ac
index a84580d..95ffca7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -77,6 +77,8 @@ AC_CHECK_HEADERS([\
     stdatomic.h \
     sys/endian.h])
 
+AC_CHECK_HEADERS([linux/vm_sockets.h], [], [], [#include <sys/socket.h>])
+
 dnl Check for functions, all optional.
 AC_CHECK_FUNCS([\
     execvpe])
diff --git a/fuse/nbdfuse.c b/fuse/nbdfuse.c
index 75bb0be..c0aee79 100644
--- a/fuse/nbdfuse.c
+++ b/fuse/nbdfuse.c
@@ -24,6 +24,7 @@
 #include <stdlib.h>
 #include <stdbool.h>
 #include <stdint.h>
+#include <inttypes.h>
 #include <string.h>
 #include <getopt.h>
 #include <limits.h>
@@ -97,6 +98,7 @@ usage (FILE *fp, int exitcode)
 "    nbdfuse MOUNTPOINT[/FILENAME] --fd N\n"
 "    nbdfuse MOUNTPOINT[/FILENAME] --tcp HOST PORT\n"
 "    nbdfuse MOUNTPOINT[/FILENAME] --unix SOCKET\n"
+"    nbdfuse MOUNTPOINT[/FILENAME] --vsock CID PORT\n"
 "\n"
 "Please read the nbdfuse(1) manual page for full usage.\n"
 "\n"
@@ -139,6 +141,7 @@ main (int argc, char *argv[])
     MODE_SOCKET_ACTIVATION,
     MODE_TCP,
     MODE_UNIX,
+    MODE_VSOCK,
   } mode = MODE_URI;
   enum {
     HELP_OPTION = CHAR_MAX + 1,
@@ -161,6 +164,7 @@ main (int argc, char *argv[])
     { NULL }
   };
   int c, fd, r;
+  uint32_t cid, port;
   int64_t ssize;
   const char *s;
   struct fuse_args fuse_args = FUSE_ARGS_INIT (0, NULL);
@@ -262,6 +266,10 @@ main (int argc, char *argv[])
     mode = MODE_UNIX;
     optind++;
   }
+  else if (strcmp (argv[optind], "--vsock") == 0) {
+    mode = MODE_VSOCK;
+    optind++;
+  }
   /* This is undocumented, but allow either URI or --uri URI. */
   else if (strcmp (argv[optind], "--uri") == 0) {
     mode = MODE_URI;
@@ -289,6 +297,7 @@ main (int argc, char *argv[])
       usage (stderr, EXIT_FAILURE);
     break;
   case MODE_TCP:
+  case MODE_VSOCK:
     if (argc - optind != 2)
       usage (stderr, EXIT_FAILURE);
     break;
@@ -357,6 +366,23 @@ main (int argc, char *argv[])
       exit (EXIT_FAILURE);
     }
     break;
+
+  case MODE_VSOCK:
+    if (sscanf (argv[optind], "%" SCNu32, &cid) != 1) {
+      fprintf (stderr, "%s: could not parse vsock cid: %s\n\n",
+               argv[0], argv[optind]);
+      exit (EXIT_FAILURE);
+    }
+    if (sscanf (argv[optind+1], "%" SCNu32, &port) != 1) {
+      fprintf (stderr, "%s: could not parse vsock port: %s\n\n",
+               argv[0], argv[optind]);
+      exit (EXIT_FAILURE);
+    }
+    if (nbd_connect_vsock (nbd, cid, port) == -1) {
+      fprintf (stderr, "%s\n", nbd_get_error ());
+      exit (EXIT_FAILURE);
+    }
+    break;
   }
 
   ssize = nbd_get_size (nbd);
diff --git a/fuse/nbdfuse.pod b/fuse/nbdfuse.pod
index 3187822..3a7401c 100644
--- a/fuse/nbdfuse.pod
+++ b/fuse/nbdfuse.pod
@@ -19,6 +19,8 @@ Other modes:
 
  nbdfuse MOUNTPOINT[/FILENAME] --unix SOCKET
 
+ nbdfuse MOUNTPOINT[/FILENAME] --vsock CID PORT
+
 =head1 DESCRIPTION
 
 nbdfuse presents a Network Block Device as a local file inside a FUSE
@@ -230,6 +232,11 @@ unencrypted TCP socket.  See also
L<nbd_connect_tcp(3)>.
 Select Unix mode.  Connect to an NBD server on a Unix domain socket.
 See also L<nbd_connect_unix(3)>.
 
+=item B<--vsock> CID PORT
+
+Select vsock mode.  Connect to an NBD server on a C<AF_VSOCK> socket.
+See also L<nbd_connect_vsock(3)>.
+
 =back
 
 =head1 NOTES
@@ -294,6 +301,7 @@ L<nbd_connect_socket(3)>,
 L<nbd_connect_systemd_socket_activation(3)>,
 L<nbd_connect_tcp(3)>,
 L<nbd_connect_unix(3)>,
+L<nbd_connect_vsock(3)>,
 L<libguestfs(3)>,
 L<guestfish(1)>,
 L<guestmount(1)>,
diff --git a/generator/generator b/generator/generator
index 54a8eb7..89dd52a 100755
--- a/generator/generator
+++ b/generator/generator
@@ -93,6 +93,7 @@ type external_event    | CmdCreate                   (*
[nbd_create] function called *)
   | CmdConnectSockAddr          (* [nbd_aio_connect] function called *)
   | CmdConnectUnix              (* [nbd_aio_connect_unix] *)
+  | CmdConnectVSock             (* [nbd_aio_connect_vsock] *)
   | CmdConnectTCP               (* [nbd_aio_connect_tcp] *)
   | CmdConnectCommand           (* [nbd_aio_connect_command] *)
   | CmdConnectSA                (*
[nbd_aio_connect_systemd_socket_activation]*)
@@ -169,6 +170,7 @@ let rec state_machine = [
     external_events = [ CmdCreate, "";
                         CmdConnectSockAddr, "CONNECT.START";
                         CmdConnectUnix, "CONNECT_UNIX.START";
+                        CmdConnectVSock, "CONNECT_VSOCK.START";
                         CmdConnectTCP, "CONNECT_TCP.START";
                         CmdConnectCommand, "CONNECT_COMMAND.START";
                         CmdConnectSA, "CONNECT_SA.START";
@@ -177,6 +179,7 @@ let rec state_machine = [
 
   Group ("CONNECT", connect_state_machine);
   Group ("CONNECT_UNIX", connect_unix_state_machine);
+  Group ("CONNECT_VSOCK", connect_vsock_state_machine);
   Group ("CONNECT_TCP", connect_tcp_state_machine);
   Group ("CONNECT_COMMAND", connect_command_state_machine);
   Group ("CONNECT_SA", connect_sa_state_machine);
@@ -236,6 +239,16 @@ and connect_unix_state_machine = [
   };
 ]
 
+(* State machine implementing [nbd_aio_connect_vsock]. *)
+and connect_vsock_state_machine = [
+  State {
+    default_state with
+    name = "START";
+    comment = "Connect to an AF_VSOCK socket";
+    external_events = [];
+  };
+]
+
 (* State machine implementing [nbd_aio_connect_tcp]. *)
 and connect_tcp_state_machine = [
   State {
@@ -1491,6 +1504,20 @@ when the connection has been made.";
     example = Some "examples/fetch-first-sector.c";
   };
 
+  "connect_vsock", {
+    default_call with
+    args = [ UInt32 "cid"; UInt32 "port" ]; ret = RErr;
+    permitted_states = [ Created ];
+    shortdesc = "connect to NBD server over AF_VSOCK protocol";
+    longdesc = "\
+Connect (synchronously) over the C<AF_VSOCK> protocol from a
+virtual machine to an NBD server, usually running on the host.  The
+C<cid> and C<port> parameters specify the server address.  Usually
+C<cid> should be C<2> (to connect to the host), and C<port>
might be
+C<10809> or another port number assigned to you by the host
+administrator.  This call returns when the connection has been made.";
+  };
+
   "connect_tcp", {
     default_call with
     args = [ String "hostname"; String "port" ]; ret =
RErr;
@@ -2148,6 +2175,22 @@ on the connection.";
     example = Some "examples/aio-connect-read.c";
   };
 
+  "aio_connect_vsock", {
+    default_call with
+    args = [ UInt32 "cid"; UInt32 "port" ]; ret = RErr;
+    permitted_states = [ Created ];
+    shortdesc = "connect to the NBD server over AF_VSOCK socket";
+    longdesc = "\
+Begin connecting to the NBD server over the C<AF_VSOCK>
+protocol to the server C<cid:port>.  Parameters behave as documented in
+L<nbd_connect_vsock(3)>.
+
+You can check if the connection is still connecting by calling
+L<nbd_aio_is_connecting(3)>, or if it has connected to the server
+and completed the NBD handshake by calling L<nbd_aio_is_ready(3)>,
+on the connection.";
+  };
+
   "aio_connect_tcp", {
     default_call with
     args = [ String "hostname"; String "port" ]; ret =
RErr;
@@ -2793,6 +2836,8 @@ let first_version = [
   "aio_connect_systemd_socket_activation", (1, 2);
   "connect_socket", (1, 2);
   "aio_connect_socket", (1, 2);
+  "connect_vsock", (1, 2);
+  "aio_connect_vsock", (1, 2);
 
   (* These calls are proposed for a future version of libnbd, but
    * have not been added to any released version so far.
@@ -3151,7 +3196,7 @@ end = struct
 let all_external_events    [NotifyRead; NotifyWrite;
    CmdCreate;
-   CmdConnectSockAddr; CmdConnectUnix; CmdConnectTCP;
+   CmdConnectSockAddr; CmdConnectUnix; CmdConnectVSock; CmdConnectTCP;
    CmdConnectCommand; CmdConnectSA; CmdConnectSocket;
    CmdIssue]
 
@@ -3161,6 +3206,7 @@ let string_of_external_event = function
   | CmdCreate -> "CmdCreate"
   | CmdConnectSockAddr -> "CmdConnectSockAddr"
   | CmdConnectUnix -> "CmdConnectUnix"
+  | CmdConnectVSock -> "CmdConnectVSock"
   | CmdConnectTCP -> "CmdConnectTCP"
   | CmdConnectCommand -> "CmdConnectCommand"
   | CmdConnectSA -> "CmdConnectSA"
@@ -3173,6 +3219,7 @@ let c_string_of_external_event = function
   | CmdCreate -> "cmd_create"
   | CmdConnectSockAddr -> "cmd_connect_sockaddr"
   | CmdConnectUnix -> "cmd_connect_unix"
+  | CmdConnectVSock -> "cmd_connect_vsock"
   | CmdConnectTCP -> "cmd_connect_tcp"
   | CmdConnectCommand -> "cmd_connect_command"
   | CmdConnectSA -> "cmd_connect_sa"
@@ -3624,7 +3671,7 @@ let generate_lib_states_run_c ()            | NotifyWrite
-> pr "    r |= LIBNBD_AIO_DIRECTION_WRITE;\n"
           | CmdCreate
           | CmdConnectSockAddr
-          | CmdConnectUnix | CmdConnectTCP
+          | CmdConnectUnix | CmdConnectVSock | CmdConnectTCP
           | CmdConnectCommand | CmdConnectSA | CmdConnectSocket
           | CmdIssue -> ()
       ) events;
diff --git a/generator/states-connect.c b/generator/states-connect.c
index d62b0f5..e4658a7 100644
--- a/generator/states-connect.c
+++ b/generator/states-connect.c
@@ -37,6 +37,10 @@
 #include <sys/socket.h>
 #include <sys/un.h>
 
+#ifdef HAVE_LINUX_VM_SOCKETS_H
+#include <linux/vm_sockets.h>
+#endif
+
 /* Disable Nagle's algorithm on the socket, but don't fail. */
 static void
 disable_nagle (int sock)
@@ -118,6 +122,24 @@ STATE_MACHINE {
   SET_NEXT_STATE (%^CONNECT.START);
   return 0;
 
+ CONNECT_VSOCK.START:
+#ifdef AF_VSOCK
+  struct sockaddr_vm svm = {
+    .svm_family = AF_VSOCK,
+    .svm_cid = h->svm_cid,
+    .svm_port = h->svm_port,
+  };
+  const socklen_t len = sizeof svm;
+
+  memcpy (&h->connaddr, &svm, len);
+  h->connaddrlen = len;
+  SET_NEXT_STATE (%^CONNECT.START);
+  return 0;
+#else
+  set_error (ENOTSUP, "AF_VSOCK protocol is not supported");
+  SET_NEXT_STATE (%.DEAD);
+#endif
+
  CONNECT_TCP.START:
   int r;
 
diff --git a/lib/connect.c b/lib/connect.c
index a0ef5f1..d8bb121 100644
--- a/lib/connect.c
+++ b/lib/connect.c
@@ -92,6 +92,16 @@ nbd_unlocked_connect_unix (struct nbd_handle *h, const char
*unixsocket)
   return wait_until_connected (h);
 }
 
+/* Connect to a vsock. */
+int
+nbd_unlocked_connect_vsock (struct nbd_handle *h, uint32_t cid, uint32_t port)
+{
+  if (nbd_unlocked_aio_connect_vsock (h, cid, port) == -1)
+    return -1;
+
+  return wait_until_connected (h);
+}
+
 /* Connect to a TCP port. */
 int
 nbd_unlocked_connect_tcp (struct nbd_handle *h,
@@ -388,6 +398,16 @@ nbd_unlocked_aio_connect_unix (struct nbd_handle *h, const
char *unixsocket)
   return nbd_internal_run (h, cmd_connect_unix);
 }
 
+int
+nbd_unlocked_aio_connect_vsock (struct nbd_handle *h,
+                                uint32_t cid, uint32_t port)
+{
+  h->svm_cid = cid;
+  h->svm_port = port;
+
+  return nbd_internal_run (h, cmd_connect_vsock);
+}
+
 int
 nbd_unlocked_aio_connect_tcp (struct nbd_handle *h,
                               const char *hostname, const char *port)
diff --git a/lib/internal.h b/lib/internal.h
index 6433183..ba053c2 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -197,6 +197,9 @@ struct nbd_handle {
   /* When connecting to Unix domain socket. */
   char *unixsocket;
 
+  /* When connecting to a vsock. */
+  uint32_t svm_cid, svm_port;
+
   /* When connecting to TCP ports, these fields are used. */
   char *hostname, *port;
   struct addrinfo hints;
-- 
2.23.0
Richard W.M. Jones
2019-Oct-18  15:42 UTC
Re: [Libguestfs] [PATCH libnbd 0/2] api: Add support for AF_VSOCK.
On Fri, Oct 18, 2019 at 04:39:11PM +0100, Richard W.M. Jones wrote:> $ dd if=/dev/random of=mp/nbd bs=1024 count=100 conv=notrunc,nocreatObviously didn't intend to use /dev/random there. With /dev/zero it gets about 50 MB/s, but I'm certain the performance problem is entirely down to FUSE. With libnbd directly it ought to be many times faster. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com virt-builder quickly builds VMs from scratch http://libguestfs.org/virt-builder.1.html
Eric Blake
2019-Oct-18  15:49 UTC
Re: [Libguestfs] [PATCH libnbd 1/2] states: Don't assume socket address family is always AF_UNIX.
On 10/18/19 10:39 AM, Richard W.M. Jones wrote:> Get the address family from h->connaddr instead. > > This should make no difference to existing code. > --- > generator/states-connect.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) >ACK> diff --git a/generator/states-connect.c b/generator/states-connect.c > index 04e894c..d62b0f5 100644 > --- a/generator/states-connect.c > +++ b/generator/states-connect.c > @@ -51,7 +51,8 @@ STATE_MACHINE { > int fd; > > assert (!h->sock); > - fd = socket (AF_UNIX, SOCK_STREAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0); > + fd = socket (h->connaddr.ss_family, > + SOCK_STREAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0); > if (fd == -1) { > SET_NEXT_STATE (%.DEAD); > set_error (errno, "socket"); >-- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Eric Blake
2019-Oct-18  15:58 UTC
Re: [Libguestfs] [PATCH libnbd 2/2] api: Add support for AF_VSOCK.
On 10/18/19 10:39 AM, Richard W.M. Jones wrote:> This adds a new API for connecting to AF_VSOCK protocol > (https://wiki.qemu.org/Features/VirtioVsock). > > For example: > > nbd_connect_vsock (nbd, 2, 10809); > > There is no test of this feature because it only works between guest > and host. You cannot start a server and client on the host and talk > between them, which is what we'd need to write a sane test. > ---> @@ -357,6 +366,23 @@ main (int argc, char *argv[]) > exit (EXIT_FAILURE); > } > break; > + > + case MODE_VSOCK: > + if (sscanf (argv[optind], "%" SCNu32, &cid) != 1) {sscanf() can't detect overflow ;(> +++ b/generator/generator> + "connect_vsock", { > + default_call with > + args = [ UInt32 "cid"; UInt32 "port" ]; ret = RErr; > + permitted_states = [ Created ]; > + shortdesc = "connect to NBD server over AF_VSOCK protocol"; > + longdesc = "\ > +Connect (synchronously) over the C<AF_VSOCK> protocol from a > +virtual machine to an NBD server, usually running on the host. The > +C<cid> and C<port> parameters specify the server address. Usually > +C<cid> should be C<2> (to connect to the host), and C<port> might be > +C<10809> or another port number assigned to you by the host > +administrator. This call returns when the connection has been made.";You mentioned that right now, nbdkit has to be server on host, and libnbd is client on guest. But if we can let nbdkit specify a cid, doesn't this mean we can run nbdkit as server in guest, and then connect libnbd as client on host? Then add 'nbdkit nbd vsock=...' to let the nbdkit pass-through wrapper convert vsock from guest into TCP or Unix socket on the host to other host clients that don't know how to do vsock.> @@ -2793,6 +2836,8 @@ let first_version = [ > "aio_connect_systemd_socket_activation", (1, 2); > "connect_socket", (1, 2); > "aio_connect_socket", (1, 2); > + "connect_vsock", (1, 2); > + "aio_connect_vsock", (1, 2); >As this is Linux-only (and for that matter, depends on kernel vsock support), we probably need "supports_vsock" as an additional function.> +++ b/generator/states-connect.c > @@ -37,6 +37,10 @@ > #include <sys/socket.h> > #include <sys/un.h> > > +#ifdef HAVE_LINUX_VM_SOCKETS_H > +#include <linux/vm_sockets.h> > +#endif > + > /* Disable Nagle's algorithm on the socket, but don't fail. */ > static void > disable_nagle (int sock) > @@ -118,6 +122,24 @@ STATE_MACHINE { > SET_NEXT_STATE (%^CONNECT.START); > return 0; > > + CONNECT_VSOCK.START: > +#ifdef AF_VSOCK > + struct sockaddr_vm svm = { > + .svm_family = AF_VSOCK, > + .svm_cid = h->svm_cid, > + .svm_port = h->svm_port, > + };Are there scenarios (mismatch in kernel vs. headers compiled against, for instance) where compilation says AF_VSOCK exists but where all attempts at vsock fail? If so, is there anything that we should check dynamically, rather than just compile-time presence of AF_VSOCK? But if nothing else, having: int libnbd_internal_supports_vsock(struct nbd_handle*h) { #ifdef AF_VSOCK return 1; #else return 0; } is worth having. Otherwise, the idea is pretty cool! -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org