Richard W.M. Jones
2019-Sep-26 16:40 UTC
[Libguestfs] [PATCH libnbd 1/2] lib: Avoid killing subprocess twice.
If the user calls nbd_kill_subprocess, we shouldn't kill the process again when we close the handle (since the process has likely gone and we might be killing a different process). --- lib/handle.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/handle.c b/lib/handle.c index 2af25fe..5ad818e 100644 --- a/lib/handle.c +++ b/lib/handle.c @@ -315,6 +315,8 @@ nbd_unlocked_kill_subprocess (struct nbd_handle *h, int signum) return -1; } + h->pid = -1; + return 0; } -- 2.23.0
Richard W.M. Jones
2019-Sep-26 16:40 UTC
[Libguestfs] [PATCH libnbd 2/2] api: Implement local command with systemd socket activation.
This adds new APIs for running a local NBD server and connecting to it using systemd socket activation (instead of stdin/stdout). This includes interop tests against nbdkit and qemu-nbd which I believe are the only NBD servers supporting socket activation. (If we find others then we can add more interop tests in future.) The upstream spec for systemd socket activation is here: http://0pointer.de/blog/projects/socket-activation.html --- .gitignore | 2 + TODO | 2 - generator/generator | 53 +++++++++++++++++- generator/states-connect.c | 109 ++++++++++++++++++++++++++++++++++++ interop/Makefile.am | 22 ++++++++ interop/socket-activation.c | 92 ++++++++++++++++++++++++++++++ lib/connect.c | 28 +++++++++ lib/handle.c | 10 ++++ lib/internal.h | 6 ++ 9 files changed, 319 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index dd8a052..99fce5c 100644 --- a/.gitignore +++ b/.gitignore @@ -71,6 +71,8 @@ Makefile.in /interop/interop-qemu-nbd /interop/interop-qemu-nbd-tls-certs /interop/interop-qemu-nbd-tls-psk +/interop/socket-activation-nbdkit +/interop/socket-activation-qemu-nbd /interop/structured-read /lib/api.c /lib/libnbd.pc diff --git a/TODO b/TODO index 642d39f..92e2c47 100644 --- a/TODO +++ b/TODO @@ -17,8 +17,6 @@ NBD_INFO_BLOCK_SIZE. TLS should properly shut down the session (calling gnutls_bye). -Implement nbd_connect + systemd socket activation. - Improve function trace output so that: - Long strings are truncated. - Strings with non-printable characters are escaped. diff --git a/generator/generator b/generator/generator index 3b63665..d0b4d46 100755 --- a/generator/generator +++ b/generator/generator @@ -95,6 +95,7 @@ type external_event | CmdConnectUnix (* [nbd_aio_connect_unix] *) | CmdConnectTCP (* [nbd_aio_connect_tcp] *) | CmdConnectCommand (* [nbd_aio_connect_command] *) + | CmdConnectSA (* [nbd_aio_connect_socket_activation] *) | CmdIssue (* issuing an NBD command *) type location = string * int (* source location: file, line number *) @@ -168,13 +169,15 @@ let rec state_machine = [ CmdConnectSockAddr, "CONNECT.START"; CmdConnectUnix, "CONNECT_UNIX.START"; CmdConnectTCP, "CONNECT_TCP.START"; - CmdConnectCommand, "CONNECT_COMMAND.START" ]; + CmdConnectCommand, "CONNECT_COMMAND.START"; + CmdConnectSA, "CONNECT_SA.START" ]; }; Group ("CONNECT", connect_state_machine); Group ("CONNECT_UNIX", connect_unix_state_machine); Group ("CONNECT_TCP", connect_tcp_state_machine); Group ("CONNECT_COMMAND", connect_command_state_machine); + Group ("CONNECT_SA", connect_sa_state_machine); Group ("MAGIC", magic_state_machine); Group ("OLDSTYLE", oldstyle_state_machine); @@ -272,6 +275,16 @@ and connect_command_state_machine = [ }; ] +(* State machine implementing [nbd_aio_connect_socket_activation]. *) +and connect_sa_state_machine = [ + State { + default_state with + name = "START"; + comment = "Connect to a subprocess with systemd socket activation"; + external_events = []; + }; +] + (* Parse initial magic string from the server. *) and magic_state_machine = [ State { @@ -1501,6 +1514,18 @@ behave like inetd clients, such as C<nbdkit --single>."; example = Some "examples/connect-command.c"; }; + "connect_socket_activation", { + default_call with + args = [ StringList "argv" ]; ret = RErr; + permitted_states = [ Created ]; + shortdesc = "connect using systemd socket activation"; + longdesc = "\ +Run the command as a subprocess and connect to it using +systemd socket activation. This is for use with NBD servers +such as C<nbdkit> which understand socket activation."; + see_also = ["L<nbd_kill_subprocess(3)>"]; + }; + "is_read_only", { default_call with args = []; ret = RBool; @@ -2093,6 +2118,22 @@ Run the command as a subprocess and begin connecting to it over stdin/stdout. Parameters behave as documented in L<nbd_connect_command(3)>. +You can check if the connection is still connecting by calling +L<nbd_aio_is_connecting(3)>, or if it has connected to the server +and completed the NBD handshake by calling L<nbd_aio_is_ready(3)>, +on the connection."; + }; + + "aio_connect_socket_activation", { + default_call with + args = [ StringList "argv" ]; ret = RErr; + permitted_states = [ Created ]; + shortdesc = "connect using systemd socket activation"; + longdesc = "\ +Run the command as a subprocess and begin connecting to it using +systemd socket activation. Parameters behave as documented in +L<nbd_connect_socket_activation(3)>. + You can check if the connection is still connecting by calling L<nbd_aio_is_connecting(3)>, or if it has connected to the server and completed the NBD handshake by calling L<nbd_aio_is_ready(3)>, @@ -2678,6 +2719,8 @@ let first_version = [ "get_protocol", (1, 2); "set_handshake_flags", (1, 2); "get_handshake_flags", (1, 2); + "connect_socket_activation", (1, 2); + "aio_connect_socket_activation", (1, 2); (* These calls are proposed for a future version of libnbd, but * have not been added to any released version so far. @@ -3035,7 +3078,8 @@ end = struct let all_external_events [NotifyRead; NotifyWrite; CmdCreate; - CmdConnectSockAddr; CmdConnectUnix; CmdConnectTCP; CmdConnectCommand; + CmdConnectSockAddr; CmdConnectUnix; CmdConnectTCP; + CmdConnectCommand; CmdConnectSA; CmdIssue] let string_of_external_event = function @@ -3046,6 +3090,7 @@ let string_of_external_event = function | CmdConnectUnix -> "CmdConnectUnix" | CmdConnectTCP -> "CmdConnectTCP" | CmdConnectCommand -> "CmdConnectCommand" + | CmdConnectSA -> "CmdConnectSA" | CmdIssue -> "CmdIssue" let c_string_of_external_event = function @@ -3056,6 +3101,7 @@ let c_string_of_external_event = function | CmdConnectUnix -> "cmd_connect_unix" | CmdConnectTCP -> "cmd_connect_tcp" | CmdConnectCommand -> "cmd_connect_command" + | CmdConnectSA -> "cmd_connect_sa" | CmdIssue -> "cmd_issue" (* Find a state in the state machine hierarchy by path. The [path] @@ -3484,7 +3530,8 @@ let generate_lib_states_c () | NotifyWrite -> pr " r |= LIBNBD_AIO_DIRECTION_WRITE;\n" | CmdCreate | CmdConnectSockAddr - | CmdConnectUnix | CmdConnectTCP | CmdConnectCommand + | CmdConnectUnix | CmdConnectTCP + | CmdConnectCommand | CmdConnectSA | CmdIssue -> () ) events; pr " break;\n"; diff --git a/generator/states-connect.c b/generator/states-connect.c index e9b3582..51b907e 100644 --- a/generator/states-connect.c +++ b/generator/states-connect.c @@ -37,6 +37,9 @@ #include <sys/socket.h> #include <sys/un.h> +/* This is baked into the systemd socket activation API. */ +#define FIRST_SOCKET_ACTIVATION_FD 3 + /* Disable Nagle's algorithm on the socket, but don't fail. */ static void disable_nagle (int sock) @@ -292,4 +295,110 @@ disable_nagle (int sock) SET_NEXT_STATE (%^MAGIC.START); return 0; + CONNECT_SA.START: + /* This is in some ways similar to nbd_aio_connect_command above, + * but we must pass a listening socket to the child process and + * we must set LISTEN_PID and LISTEN_FDS environment variables. + */ + size_t len; + int s; + struct sockaddr_un addr; + pid_t pid; + int flags; + char listen_pid[16]; + + assert (!h->sock); + assert (h->argv); + assert (h->argv[0]); + + /* Use /tmp instead of TMPDIR because we must ensure the path is + * short enough to store in the sockaddr_un. On some platforms this + * may cause problems so we may need to revisit it. XXX + */ + h->sa_tmpdir = strdup ("/tmp/libnbdXXXXXX"); + h->sa_sockpath = strdup ("/tmp/libnbdXXXXXX/sock"); + if (h->sa_tmpdir == NULL || h->sa_sockpath == NULL) { + SET_NEXT_STATE (%.DEAD); + set_error (errno, "strdup"); + return 0; + } + + if (mkdtemp (h->sa_tmpdir) == NULL) { + SET_NEXT_STATE (%.DEAD); + set_error (errno, "mkdtemp"); + return 0; + } + len = strlen (h->sa_tmpdir); + memcpy (h->sa_sockpath, h->sa_tmpdir, len); + + s = socket (AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); + if (s == -1) { + SET_NEXT_STATE (%.DEAD); + set_error (errno, "socket"); + return 0; + } + + addr.sun_family = AF_UNIX; + memcpy (addr.sun_path, h->sa_sockpath, strlen (h->sa_sockpath) + 1); + if (bind (s, (struct sockaddr *) &addr, sizeof addr) == -1) { + SET_NEXT_STATE (%.DEAD); + set_error (errno, "bind: %s", h->sa_sockpath); + return 0; + } + + if (listen (s, 1) == -1) { + SET_NEXT_STATE (%.DEAD); + set_error (errno, "listen"); + return 0; + } + + pid = fork (); + if (pid == -1) { + SET_NEXT_STATE (%.DEAD); + set_error (errno, "fork"); + close (s); + return 0; + } + if (pid == 0) { /* child - run command */ + if (s != FIRST_SOCKET_ACTIVATION_FD) { + dup2 (s, FIRST_SOCKET_ACTIVATION_FD); + close (s); + } + else { + /* We must unset CLOEXEC on the fd. (dup2 above does this + * implicitly because CLOEXEC is set on the fd, not on the + * socket). + */ + flags = fcntl (s, F_GETFD, 0); + if (flags == -1) { + perror ("fcntl: F_GETFD"); + _exit (EXIT_FAILURE); + } + if (fcntl (s, F_SETFD, flags & ~FD_CLOEXEC) == -1) { + perror ("fcntl: F_SETFD"); + _exit (EXIT_FAILURE); + } + } + + snprintf (listen_pid, sizeof listen_pid, "%ld", (long) getpid ()); + setenv ("LISTEN_PID", listen_pid, 1); + setenv ("LISTEN_FDS", "1", 1); + + /* Restore SIGPIPE back to SIG_DFL. */ + signal (SIGPIPE, SIG_DFL); + + execvp (h->argv[0], h->argv); + perror (h->argv[0]); + _exit (EXIT_FAILURE); + } + + /* Parent. */ + close (s); + h->pid = pid; + + h->connaddrlen = sizeof addr; + memcpy (&h->connaddr, &addr, h->connaddrlen); + SET_NEXT_STATE (%^CONNECT.START); + return 0; + } /* END STATE MACHINE */ diff --git a/interop/Makefile.am b/interop/Makefile.am index 43350a8..d30cdf1 100644 --- a/interop/Makefile.am +++ b/interop/Makefile.am @@ -48,11 +48,13 @@ if HAVE_QEMU_NBD check_PROGRAMS += \ interop-qemu-nbd \ dirty-bitmap \ + socket-activation-qemu-nbd \ structured-read \ $(NULL) TESTS += \ interop-qemu-nbd \ dirty-bitmap.sh \ + socket-activation-qemu-nbd \ structured-read.sh \ $(NULL) @@ -124,6 +126,15 @@ dirty_bitmap_CPPFLAGS = -I$(top_srcdir)/include dirty_bitmap_CFLAGS = $(WARNINGS_CFLAGS) dirty_bitmap_LDADD = $(top_builddir)/lib/libnbd.la +socket_activation_qemu_nbd_SOURCES = socket-activation.c +socket_activation_qemu_nbd_CPPFLAGS = \ + -I$(top_srcdir)/include \ + -DSERVER=\"$(QEMU_NBD)\" \ + -DSERVER_PARAMS='"-f", "raw", "-x", "", tmpfile' \ + $(NULL) +socket_activation_qemu_nbd_CFLAGS = $(WARNINGS_CFLAGS) +socket_activation_qemu_nbd_LDADD = $(top_builddir)/lib/libnbd.la + structured_read_SOURCES = structured-read.c structured_read_CPPFLAGS = -I$(top_srcdir)/include structured_read_CFLAGS = $(WARNINGS_CFLAGS) @@ -135,9 +146,11 @@ if HAVE_NBDKIT check_PROGRAMS += \ interop-nbdkit \ + socket-activation-nbdkit \ $(NULL) TESTS += \ interop-nbdkit \ + socket-activation-nbdkit \ $(NULL) # See above comment about files in ../tests/Makefile.am @@ -245,6 +258,15 @@ interop_nbdkit_tls_psk_allow_fallback_CPPFLAGS = \ interop_nbdkit_tls_psk_allow_fallback_CFLAGS = $(WARNINGS_CFLAGS) interop_nbdkit_tls_psk_allow_fallback_LDADD = $(top_builddir)/lib/libnbd.la +socket_activation_nbdkit_SOURCES = socket-activation.c +socket_activation_nbdkit_CPPFLAGS = \ + -I$(top_srcdir)/include \ + -DSERVER=\"$(NBDKIT)\" \ + -DSERVER_PARAMS='"file", tmpfile' \ + $(NULL) +socket_activation_nbdkit_CFLAGS = $(WARNINGS_CFLAGS) +socket_activation_nbdkit_LDADD = $(top_builddir)/lib/libnbd.la + endif HAVE_NBDKIT check-valgrind: diff --git a/interop/socket-activation.c b/interop/socket-activation.c new file mode 100644 index 0000000..db6f6ca --- /dev/null +++ b/interop/socket-activation.c @@ -0,0 +1,92 @@ +/* NBD client library in userspace + * Copyright (C) 2019 Red Hat Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* Test interop with nbdkit or qemu-nbd, and systemd socket activation. */ + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <unistd.h> + +#include <libnbd.h> + +#define SIZE 1048576 + +int +main (int argc, char *argv[]) +{ + struct nbd_handle *nbd; + char tmpfile[] = "/tmp/nbdXXXXXX"; + int fd; + int64_t actual_size; + char buf[512]; + int r = -1; + + /* Create a large sparse temporary file. */ + fd = mkstemp (tmpfile); + if (fd == -1 || + ftruncate (fd, SIZE) == -1 || + close (fd) == -1) { + perror (tmpfile); + goto out; + } + + nbd = nbd_create (); + if (nbd == NULL) { + fprintf (stderr, "%s\n", nbd_get_error ()); + goto out; + } + + char *args[] = { SERVER, SERVER_PARAMS, NULL }; + if (nbd_connect_socket_activation (nbd, args) == -1) { + fprintf (stderr, "%s\n", nbd_get_error ()); + goto out; + } + + actual_size = nbd_get_size (nbd); + if (actual_size == -1) { + fprintf (stderr, "%s\n", nbd_get_error ()); + goto out; + } + if (actual_size != SIZE) { + fprintf (stderr, "%s: actual size %" PRIi64 " <> expected size %d", + argv[0], actual_size, SIZE); + goto out; + } + + if (nbd_pread (nbd, buf, sizeof buf, 0, 0) == -1) { + fprintf (stderr, "%s\n", nbd_get_error ()); + goto out; + } + + if (nbd_shutdown (nbd, 0) == -1) { + fprintf (stderr, "%s\n", nbd_get_error ()); + goto out; + } + + nbd_close (nbd); + + r = 0; + out: + unlink (tmpfile); + + exit (r == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} diff --git a/lib/connect.c b/lib/connect.c index f98bcdb..c1cbef7 100644 --- a/lib/connect.c +++ b/lib/connect.c @@ -112,6 +112,16 @@ nbd_unlocked_connect_command (struct nbd_handle *h, char **argv) return wait_until_connected (h); } +/* Connect to a local command, use systemd socket activation. */ +int +nbd_unlocked_connect_socket_activation (struct nbd_handle *h, char **argv) +{ + if (nbd_unlocked_aio_connect_socket_activation (h, argv) == -1) + return -1; + + return wait_until_connected (h); +} + int nbd_unlocked_aio_connect (struct nbd_handle *h, const struct sockaddr *addr, socklen_t len) @@ -405,3 +415,21 @@ nbd_unlocked_aio_connect_command (struct nbd_handle *h, char **argv) return nbd_internal_run (h, cmd_connect_command); } + +int +nbd_unlocked_aio_connect_socket_activation (struct nbd_handle *h, char **argv) +{ + char **copy; + + copy = nbd_internal_copy_string_list (argv); + if (!copy) { + set_error (errno, "copy_string_list"); + return -1; + } + + if (h->argv) + nbd_internal_free_string_list (h->argv); + h->argv = copy; + + return nbd_internal_run (h, cmd_connect_sa); +} diff --git a/lib/handle.c b/lib/handle.c index 5ad818e..18cc8d0 100644 --- a/lib/handle.c +++ b/lib/handle.c @@ -129,6 +129,16 @@ nbd_close (struct nbd_handle *h) free_cmd_list (h->cmds_in_flight); free_cmd_list (h->cmds_done); nbd_internal_free_string_list (h->argv); + if (h->sa_sockpath) { + if (h->pid > 0) + kill (h->pid, SIGTERM); + unlink (h->sa_sockpath); + free (h->sa_sockpath); + } + if (h->sa_tmpdir) { + rmdir (h->sa_tmpdir); + free (h->sa_tmpdir); + } free (h->unixsocket); free (h->hostname); free (h->port); diff --git a/lib/internal.h b/lib/internal.h index bdb0e83..bf8b9aa 100644 --- a/lib/internal.h +++ b/lib/internal.h @@ -188,6 +188,12 @@ struct nbd_handle { char **argv; pid_t pid; + /* When using systemd socket activation, this directory and socket + * must be deleted, and the pid above must be killed. + */ + char *sa_tmpdir; + char *sa_sockpath; + /* When connecting to Unix domain socket. */ char *unixsocket; -- 2.23.0
Eric Blake
2019-Sep-26 18:42 UTC
Re: [Libguestfs] [PATCH libnbd 1/2] lib: Avoid killing subprocess twice.
On 9/26/19 11:40 AM, Richard W.M. Jones wrote:> If the user calls nbd_kill_subprocess, we shouldn't kill the process > again when we close the handle (since the process has likely gone and > we might be killing a different process). > --- > lib/handle.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/lib/handle.c b/lib/handle.c > index 2af25fe..5ad818e 100644 > --- a/lib/handle.c > +++ b/lib/handle.c > @@ -315,6 +315,8 @@ nbd_unlocked_kill_subprocess (struct nbd_handle *h, int signum) > return -1; > } > > + h->pid = -1; > +Ouch - this means we completely forget about the child process, even if signum == SIGHUP and was meant merely to get the server to reload state rather than to kill it (we've talked about making nbdkit have a way to reload configuration); and prevents a client from using the API a second time to send a more severe signal like SIGKILL if the first didn't have the desired effect. You are right, however, that once this is called, we have to be more careful that any future interaction with the pid does not race with a scenario where our child has gone away, and some other process come into its place. Is it viable to check /proc to see if the child process is the same one that we spawned, for example, /proc/CHILD/status looking to see if PPid: still points to us? -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Richard W.M. Jones
2019-Sep-26 19:44 UTC
Re: [Libguestfs] [PATCH libnbd 1/2] lib: Avoid killing subprocess twice.
On Thu, Sep 26, 2019 at 01:42:19PM -0500, Eric Blake wrote:> On 9/26/19 11:40 AM, Richard W.M. Jones wrote: > >If the user calls nbd_kill_subprocess, we shouldn't kill the process > >again when we close the handle (since the process has likely gone and > >we might be killing a different process). > >--- > > lib/handle.c | 2 ++ > > 1 file changed, 2 insertions(+) > > > >diff --git a/lib/handle.c b/lib/handle.c > >index 2af25fe..5ad818e 100644 > >--- a/lib/handle.c > >+++ b/lib/handle.c > >@@ -315,6 +315,8 @@ nbd_unlocked_kill_subprocess (struct nbd_handle *h, int signum) > > return -1; > > } > >+ h->pid = -1; > >+ > > Ouch - this means we completely forget about the child process, even > if signum == SIGHUP and was meant merely to get the server to reload > state rather than to kill it (we've talked about making nbdkit have > a way to reload configuration); and prevents a client from using the > API a second time to send a more severe signal like SIGKILL if the > first didn't have the desired effect. > > You are right, however, that once this is called, we have to be more > careful that any future interaction with the pid does not race with > a scenario where our child has gone away, and some other process > come into its place. Is it viable to check /proc to see if the > child process is the same one that we spawned, for example, > /proc/CHILD/status looking to see if PPid: still points to us?Yes, although that wouldn't work on FreeBSD. The patch as I wrote it is wrong (as I think you imply above) because it leaves a zombie child around, so a long-running libnbd-using process will accumulate zombies. Firstly my opinion is we shouldn't worry too much about reloading the configuration, because it seems like it would be much more reliable to create a new handle + command. However sending signals of increasing severity does seem like something a caller would want to do. It seems rather hard to cope with all this within the existing API, so possibly we need new API(s) here ... Also pidfd's would help greatly if they were more widely available ...... Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com virt-top is 'top' for virtual machines. Tiny program with many powerful monitoring features, net stats, disk stats, logging, etc. http://people.redhat.com/~rjones/virt-top
Eric Blake
2019-Sep-26 20:56 UTC
Re: [Libguestfs] [PATCH libnbd 2/2] api: Implement local command with systemd socket activation.
On 9/26/19 11:40 AM, Richard W.M. Jones wrote:> This adds new APIs for running a local NBD server and connecting to it > using systemd socket activation (instead of stdin/stdout). > > This includes interop tests against nbdkit and qemu-nbd which I > believe are the only NBD servers supporting socket activation. (If we > find others then we can add more interop tests in future.) > > The upstream spec for systemd socket activation is here: > http://0pointer.de/blog/projects/socket-activation.html > ---> +++ b/generator/states-connect.c > @@ -37,6 +37,9 @@ > #include <sys/socket.h> > #include <sys/un.h> > > +/* This is baked into the systemd socket activation API. */ > +#define FIRST_SOCKET_ACTIVATION_FD 3 > + > /* Disable Nagle's algorithm on the socket, but don't fail. */ > static void > disable_nagle (int sock) > @@ -292,4 +295,110 @@ disable_nagle (int sock) > SET_NEXT_STATE (%^MAGIC.START); > return 0; > > + CONNECT_SA.START: > + /* This is in some ways similar to nbd_aio_connect_command above, > + * but we must pass a listening socket to the child process and > + * we must set LISTEN_PID and LISTEN_FDS environment variables. > + */ > + size_t len; > + int s; > + struct sockaddr_un addr; > + pid_t pid; > + int flags; > + char listen_pid[16]; > + > + assert (!h->sock); > + assert (h->argv); > + assert (h->argv[0]); > + > + /* Use /tmp instead of TMPDIR because we must ensure the path is > + * short enough to store in the sockaddr_un. On some platforms this > + * may cause problems so we may need to revisit it. XXX > + */Is the use of socketpair() any better than creating a socket under /tmp?> + h->sa_tmpdir = strdup ("/tmp/libnbdXXXXXX"); > + h->sa_sockpath = strdup ("/tmp/libnbdXXXXXX/sock"); > + if (h->sa_tmpdir == NULL || h->sa_sockpath == NULL) { > + SET_NEXT_STATE (%.DEAD); > + set_error (errno, "strdup"); > + return 0; > + } > + > + if (mkdtemp (h->sa_tmpdir) == NULL) { > + SET_NEXT_STATE (%.DEAD); > + set_error (errno, "mkdtemp"); > + return 0; > + } > + len = strlen (h->sa_tmpdir); > + memcpy (h->sa_sockpath, h->sa_tmpdir, len); > + > + s = socket (AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); > + if (s == -1) { > + SET_NEXT_STATE (%.DEAD); > + set_error (errno, "socket"); > + return 0; > + }If we fail here or later, do/should we try to clean up the /tmp/libnbdXXX directory created earlier? /me reads ahead - nbd_close tries to address it Still, if we fail at this point, h->sa_sockpath is set but not yet created [1]> + > + addr.sun_family = AF_UNIX; > + memcpy (addr.sun_path, h->sa_sockpath, strlen (h->sa_sockpath) + 1); > + if (bind (s, (struct sockaddr *) &addr, sizeof addr) == -1) { > + SET_NEXT_STATE (%.DEAD); > + set_error (errno, "bind: %s", h->sa_sockpath); > + return 0; > + } > + > + if (listen (s, 1) == -1) { > + SET_NEXT_STATE (%.DEAD); > + set_error (errno, "listen"); > + return 0; > + } > + > + pid = fork (); > + if (pid == -1) { > + SET_NEXT_STATE (%.DEAD); > + set_error (errno, "fork"); > + close (s); > + return 0; > + } > + if (pid == 0) { /* child - run command */ > + if (s != FIRST_SOCKET_ACTIVATION_FD) { > + dup2 (s, FIRST_SOCKET_ACTIVATION_FD);No check for errors? But that's probably okay in this instance (we know that we won't fail with EBADF, for example).> + close (s); > + } > + else { > + /* We must unset CLOEXEC on the fd. (dup2 above does this > + * implicitly because CLOEXEC is set on the fd, not on the > + * socket). > + */ > + flags = fcntl (s, F_GETFD, 0); > + if (flags == -1) { > + perror ("fcntl: F_GETFD");perror is not async-signal-safe. Calling it in a child of a potentially-multi-threaded parent is therefore prone to deadlock, if perror() triggers a request to grab any resource that was left locked by a different thread holding the lock but stranded by the fork.> + _exit (EXIT_FAILURE); > + } > + if (fcntl (s, F_SETFD, flags & ~FD_CLOEXEC) == -1) { > + perror ("fcntl: F_SETFD"); > + _exit (EXIT_FAILURE); > + }About all we can _safely_ do is let our _exit() status inform the parent process that something failed, and let the parent process print the error message. But even passing errno as the _exit() value is not portable (GNU Hurd errno values are intentionally larger than what fit in 8-bit returns that the parent would see). It's also possible to set up a cloexec pipe (in addition to everything else) so that the child can write() error details into the pipe, but that's a lot of effort compared to just blindly stating that the child failed but declaring full details of why as lost.> + } > + > + snprintf (listen_pid, sizeof listen_pid, "%ld", (long) getpid ()); > + setenv ("LISTEN_PID", listen_pid, 1); > + setenv ("LISTEN_FDS", "1", 1);snprintf() and setenv() are also not async-signal-safe. Which is a bummer, since you really don't know the child pid until in the child. You could open-code the translation of a decimal number into a buffer without snprintf, but you also have to figure out how to safely put it into the environ seen by the child. How does systemd do it? We _could_ create a helper executable, where we fork() into the child process, exec() the helper to overwrite the child pid with something that is no longer constrained by the limits of async-safety, so that the helper app can then use snprintf/setenv at will before re-execing again with the real target executable. The next question is how worried are we about actually being hit by an non-async-signal-safe hang. What you have works 99.9% of the time, but if we are trying to be a library that is usable by any application, we have to decide if that is good enough.> + > + /* Restore SIGPIPE back to SIG_DFL. */ > + signal (SIGPIPE, SIG_DFL); > + > + execvp (h->argv[0], h->argv); > + perror (h->argv[0]);and another unsafe perror.> + _exit (EXIT_FAILURE);Is EXIT_FAILURE the best here, or should we consider exiting with status 126/127 to match what other programs (sh, env, nice, nohup, ...) do when execvp() fails?> + } > + > + /* Parent. */ > + close (s); > + h->pid = pid; > + > + h->connaddrlen = sizeof addr; > + memcpy (&h->connaddr, &addr, h->connaddrlen); > + SET_NEXT_STATE (%^CONNECT.START); > + return 0; > + > } /* END STATE MACHINE */> +++ b/interop/socket-activation.c> +#define SIZE 1048576 > + > +int > +main (int argc, char *argv[]) > +{ > + struct nbd_handle *nbd; > + char tmpfile[] = "/tmp/nbdXXXXXX"; > + int fd; > + int64_t actual_size; > + char buf[512]; > + int r = -1; > + > + /* Create a large sparse temporary file. */ > + fd = mkstemp (tmpfile); > + if (fd == -1 || > + ftruncate (fd, SIZE) == -1 || > + close (fd) == -1) { > + perror (tmpfile); > + goto out; > + }Is it worth populating anything in the temp file...> + > + nbd = nbd_create (); > + if (nbd == NULL) { > + fprintf (stderr, "%s\n", nbd_get_error ()); > + goto out; > + } > + > + char *args[] = { SERVER, SERVER_PARAMS, NULL }; > + if (nbd_connect_socket_activation (nbd, args) == -1) { > + fprintf (stderr, "%s\n", nbd_get_error ()); > + goto out; > + } > + > + actual_size = nbd_get_size (nbd); > + if (actual_size == -1) { > + fprintf (stderr, "%s\n", nbd_get_error ()); > + goto out; > + } > + if (actual_size != SIZE) { > + fprintf (stderr, "%s: actual size %" PRIi64 " <> expected size %d", > + argv[0], actual_size, SIZE); > + goto out; > + } > + > + if (nbd_pread (nbd, buf, sizeof buf, 0, 0) == -1) { > + fprintf (stderr, "%s\n", nbd_get_error ()); > + goto out; > + }...and checking that we read it back out? But even a successful read without checking contents is pretty good evidence that the socket activation worked.> + > + if (nbd_shutdown (nbd, 0) == -1) { > + fprintf (stderr, "%s\n", nbd_get_error ()); > + goto out; > + } > + > + nbd_close (nbd); > + > + r = 0; > + out: > + unlink (tmpfile); > + > + exit (r == 0 ? EXIT_SUCCESS : EXIT_FAILURE); > +} > diff --git a/lib/connect.c b/lib/connect.c > index f98bcdb..c1cbef7 100644 > --- a/lib/connect.c> +++ b/lib/handle.c > @@ -129,6 +129,16 @@ nbd_close (struct nbd_handle *h) > free_cmd_list (h->cmds_in_flight); > free_cmd_list (h->cmds_done); > nbd_internal_free_string_list (h->argv); > + if (h->sa_sockpath) { > + if (h->pid > 0) > + kill (h->pid, SIGTERM);Are we sure that SIGTERM Is always going to be sufficient? Or do we need a tiered approach where we try SIGTERM, but followup with SIGKILL if too much time elapses? I guess it is the same situation for nbd_connect_command, does that mean we should have a common helper function for doing appropriate cleanup? Do we document that nbd_close() may block?> + unlink (h->sa_sockpath);[1] we can end up calling unlink() on a path that does not exist. But since we aren't checking error status here, we are effectively ignoring the ENOENT in that case.> + free (h->sa_sockpath); > + } > + if (h->sa_tmpdir) { > + rmdir (h->sa_tmpdir); > + free (h->sa_tmpdir); > + } > free (h->unixsocket); > free (h->hostname); > free (h->port); > diff --git a/lib/internal.h b/lib/internal.h > index bdb0e83..bf8b9aa 100644 > --- a/lib/internal.h > +++ b/lib/internal.h > @@ -188,6 +188,12 @@ struct nbd_handle { > char **argv; > pid_t pid; > > + /* When using systemd socket activation, this directory and socket > + * must be deleted, and the pid above must be killed. > + */ > + char *sa_tmpdir; > + char *sa_sockpath; > + > /* When connecting to Unix domain socket. */ > char *unixsocket; > >-- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Eric Blake
2019-Sep-26 21:22 UTC
Re: [Libguestfs] [PATCH libnbd 2/2] api: Implement local command with systemd socket activation.
On 9/26/19 11:40 AM, Richard W.M. Jones wrote:> This adds new APIs for running a local NBD server and connecting to it > using systemd socket activation (instead of stdin/stdout). > > This includes interop tests against nbdkit and qemu-nbd which I > believe are the only NBD servers supporting socket activation. (If we > find others then we can add more interop tests in future.) > > The upstream spec for systemd socket activation is here: > http://0pointer.de/blog/projects/socket-activation.html > ---> + > + /* Use /tmp instead of TMPDIR because we must ensure the path is > + * short enough to store in the sockaddr_un. On some platforms this > + * may cause problems so we may need to revisit it. XXX > + */ > + h->sa_tmpdir = strdup ("/tmp/libnbdXXXXXX"); > + h->sa_sockpath = strdup ("/tmp/libnbdXXXXXX/sock"); > + if (h->sa_tmpdir == NULL || h->sa_sockpath == NULL) { > + SET_NEXT_STATE (%.DEAD); > + set_error (errno, "strdup"); > + return 0; > + } > + > + if (mkdtemp (h->sa_tmpdir) == NULL) { > + SET_NEXT_STATE (%.DEAD); > + set_error (errno, "mkdtemp"); > + return 0; > + } > + len = strlen (h->sa_tmpdir); > + memcpy (h->sa_sockpath, h->sa_tmpdir, len); > + > + s = socket (AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); > + if (s == -1) { > + SET_NEXT_STATE (%.DEAD); > + set_error (errno, "socket"); > + return 0; > + } > + > + addr.sun_family = AF_UNIX; > + memcpy (addr.sun_path, h->sa_sockpath, strlen (h->sa_sockpath) + 1);What if we used the abstract socket namespace instead? Then we don't have to worry about mkdtmp or cleanup of the socket file. True, that may only work on Linux, but we could add an API to query if we support socket activation (true on Linux, false on BSD)... -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Seemingly Similar Threads
- [PATCH libnbd v2 0/2] Implement systemd socket activation.
- [PATCH libnbd 0/2] api: Add support for AF_VSOCK.
- [PATCH libnbd 1/4] generator: Allow long ‘name - shortdesc’ in man pages.
- [PATCH libnbd 2/2] api: Implement local command with systemd socket activation.
- [PATCH libnbd v2 2/2] api: Implement local command with systemd socket activation.