Richard W.M. Jones
2019-Sep-30  16:32 UTC
[Libguestfs] [PATCH libnbd v2 0/2] Implement systemd socket activation.
v1 was posted here: https://www.redhat.com/archives/libguestfs/2019-September/thread.html#00337 v2: - Drop the first patch. - Hopefully fix the multiple issues with fork-safety and general behaviour on error paths. Note this requires execvpe for which there seems to be no equivalent on FreeBSD, except some kind of tedious path parsing (but can we assign to environ?) Rich.
Richard W.M. Jones
2019-Sep-30  16:32 UTC
[Libguestfs] [PATCH libnbd v2 1/2] lib: Don't use perror after fork in nbd_connect_callback.
perror is not fork-safe and so could deadlock.  Instead open code a
fork-safe version of perror.  While this fixes the current behaviour,
in the long term we'd like to capture the error message into the usual
error mechanism, so this is not the full and final fix for this issue.
Also this fixes the exit code to be 126/127 instead of 1.
Thanks: Eric Blake
---
 TODO                       |  1 +
 configure.ac               | 10 ++++++
 generator/states-connect.c |  7 ++--
 lib/internal.h             |  2 ++
 lib/utils.c                | 66 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 84 insertions(+), 2 deletions(-)
diff --git a/TODO b/TODO
index 6c03736..2f23a34 100644
--- a/TODO
+++ b/TODO
@@ -57,3 +57,4 @@ Suggested API improvements:
   - it should be possible to use nbd_close and never block, so
     maybe nbd_shutdown should wait for the subprocess or there
     should be another API to do this
+  - capture error message when nbd_connect_command fails
diff --git a/configure.ac b/configure.ac
index 6296ccf..35a4fed 100644
--- a/configure.ac
+++ b/configure.ac
@@ -77,6 +77,16 @@ AC_CHECK_HEADERS([\
     stdatomic.h \
     sys/endian.h])
 
+dnl Check for sys_errlist (optional).
+AC_MSG_CHECKING([for sys_errlist])
+AC_TRY_LINK([], [extern int sys_errlist; char *p = &sys_errlist;], [
+    AC_DEFINE([HAVE_SYS_ERRLIST], [1],
+              [Define if libc has a sys_errlist variable.])
+    AC_MSG_RESULT([yes])
+], [
+    AC_MSG_RESULT([no])
+])
+
 dnl Check for GnuTLS (optional, for TLS support).
 AC_ARG_WITH([gnutls],
     [AS_HELP_STRING([--without-gnutls],
diff --git a/generator/states-connect.c b/generator/states-connect.c
index 7a828f4..04e894c 100644
--- a/generator/states-connect.c
+++ b/generator/states-connect.c
@@ -259,8 +259,11 @@ STATE_MACHINE {
     signal (SIGPIPE, SIG_DFL);
 
     execvp (h->argv[0], h->argv);
-    perror (h->argv[0]);
-    _exit (EXIT_FAILURE);
+    nbd_internal_fork_safe_perror (h->argv[0]);
+    if (errno == ENOENT)
+      _exit (127);
+    else
+      _exit (126);
   }
 
   /* Parent. */
diff --git a/lib/internal.h b/lib/internal.h
index bdb0e83..31bc3d4 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -384,5 +384,7 @@ extern void nbd_internal_hexdump (const void *data, size_t
len, FILE *fp);
 extern size_t nbd_internal_string_list_length (char **argv);
 extern char **nbd_internal_copy_string_list (char **argv);
 extern void nbd_internal_free_string_list (char **argv);
+extern const char *nbd_internal_fork_safe_itoa (long v, char *buf, size_t len);
+extern void nbd_internal_fork_safe_perror (const char *s);
 
 #endif /* LIBNBD_INTERNAL_H */
diff --git a/lib/utils.c b/lib/utils.c
index 2d7fbf3..644781b 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -20,7 +20,10 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
 #include <ctype.h>
+#include <errno.h>
 
 #include "internal.h"
 
@@ -96,3 +99,66 @@ nbd_internal_free_string_list (char **argv)
     free (argv[i]);
   free (argv);
 }
+
+/* Like sprintf (s, "%ld", v).  The caller must supply a scratch
+ * buffer which is large enough for the result (32 bytes is fine), but
+ * note that the returned string does not point to the start of this
+ * buffer.
+ */
+const char *
+nbd_internal_fork_safe_itoa (long v, char *buf, size_t bufsize)
+{
+  size_t i = bufsize - 1;
+  bool neg = false;
+
+  buf[i--] = '\0';
+  if (v < 0) {
+    neg = true;
+    v = -v;                     /* XXX fails for INT_MIN */
+  }
+  if (v == 0)
+    buf[i--] = '0';
+  else {
+    while (v) {
+      buf[i--] = '0' + (v % 10);
+      v /= 10;
+    }
+  }
+  if (neg)
+    buf[i--] = '-';
+
+  i++;
+  return &buf[i];
+}
+
+/* Fork-safe version of perror.  ONLY use this after fork and before
+ * exec, the rest of the time use set_error().
+ */
+void
+nbd_internal_fork_safe_perror (const char *s)
+{
+  const int err = errno;
+
+#if defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
+#endif
+  write (2, s, strlen (s));
+  write (2, ": ", 2);
+#if HAVE_SYS_ERRLIST
+  write (2, sys_errlist[errno], strlen (sys_errlist[errno]));
+#else
+  char buf[32];
+  const char *v = nbd_internal_fork_safe_itoa ((long) errno, buf, sizeof buf);
+  write (2, v, strlen (v));
+#endif
+  write (2, "\n", 1);
+#if defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+
+  /* Restore original errno in case it was disturbed by the system
+   * calls above.
+   */
+  errno = err;
+}
-- 
2.23.0
Richard W.M. Jones
2019-Sep-30  16:32 UTC
[Libguestfs] [PATCH libnbd v2 2/2] api: Implement local command with systemd socket activation.
This adds new APIs for running a local NBD server and connecting to it
using systemd socket activation (instead of stdin/stdout).
This includes interop tests against nbdkit and qemu-nbd which I
believe are the only NBD servers supporting socket activation.  (If we
find others then we can add more interop tests in future.)
The upstream spec for systemd socket activation is here:
http://0pointer.de/blog/projects/socket-activation.html
---
 .gitignore                                   |   2 +
 TODO                                         |   2 -
 configure.ac                                 |   4 +
 generator/Makefile.am                        |   1 +
 generator/generator                          |  53 ++++-
 generator/states-connect-socket-activation.c | 228 +++++++++++++++++++
 interop/Makefile.am                          |  22 ++
 interop/socket-activation.c                  |  92 ++++++++
 lib/connect.c                                |  28 +++
 lib/handle.c                                 |  10 +
 lib/internal.h                               |   6 +
 11 files changed, 443 insertions(+), 5 deletions(-)
diff --git a/.gitignore b/.gitignore
index 1720e8f..9254d1a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -71,6 +71,8 @@ Makefile.in
 /interop/interop-qemu-nbd
 /interop/interop-qemu-nbd-tls-certs
 /interop/interop-qemu-nbd-tls-psk
+/interop/socket-activation-nbdkit
+/interop/socket-activation-qemu-nbd
 /interop/structured-read
 /lib/api.c
 /lib/libnbd.pc
diff --git a/TODO b/TODO
index 2f23a34..71d678b 100644
--- a/TODO
+++ b/TODO
@@ -17,8 +17,6 @@ NBD_INFO_BLOCK_SIZE.
 
 TLS should properly shut down the session (calling gnutls_bye).
 
-Implement nbd_connect + systemd socket activation.
-
 Improve function trace output so that:
  - Long strings are truncated.
  - Strings with non-printable characters are escaped.
diff --git a/configure.ac b/configure.ac
index 35a4fed..2ebd47c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -77,6 +77,10 @@ AC_CHECK_HEADERS([\
     stdatomic.h \
     sys/endian.h])
 
+dnl Check for functions, all optional.
+AC_CHECK_FUNCS([\
+    execvpe])
+
 dnl Check for sys_errlist (optional).
 AC_MSG_CHECKING([for sys_errlist])
 AC_TRY_LINK([], [extern int sys_errlist; char *p = &sys_errlist;], [
diff --git a/generator/Makefile.am b/generator/Makefile.am
index c20184a..dca17de 100644
--- a/generator/Makefile.am
+++ b/generator/Makefile.am
@@ -24,6 +24,7 @@ DISTCLEANFILES = stamp-generator
 states_code = \
 	states.c \
 	states-connect.c \
+	states-connect-socket-activation.c \
 	states-issue-command.c \
 	states-magic.c \
 	states-newstyle-opt-export-name.c \
diff --git a/generator/generator b/generator/generator
index 2fd2907..7a6b980 100755
--- a/generator/generator
+++ b/generator/generator
@@ -95,6 +95,7 @@ type external_event    | CmdConnectUnix              (*
[nbd_aio_connect_unix] *)
   | CmdConnectTCP               (* [nbd_aio_connect_tcp] *)
   | CmdConnectCommand           (* [nbd_aio_connect_command] *)
+  | CmdConnectSA                (* [nbd_aio_connect_socket_activation] *)
   | CmdIssue                    (* issuing an NBD command *)
 
 type location = string * int    (* source location: file, line number *)
@@ -168,13 +169,15 @@ let rec state_machine = [
                         CmdConnectSockAddr, "CONNECT.START";
                         CmdConnectUnix, "CONNECT_UNIX.START";
                         CmdConnectTCP, "CONNECT_TCP.START";
-                        CmdConnectCommand, "CONNECT_COMMAND.START" ];
+                        CmdConnectCommand, "CONNECT_COMMAND.START";
+                        CmdConnectSA, "CONNECT_SA.START" ];
   };
 
   Group ("CONNECT", connect_state_machine);
   Group ("CONNECT_UNIX", connect_unix_state_machine);
   Group ("CONNECT_TCP", connect_tcp_state_machine);
   Group ("CONNECT_COMMAND", connect_command_state_machine);
+  Group ("CONNECT_SA", connect_sa_state_machine);
 
   Group ("MAGIC", magic_state_machine);
   Group ("OLDSTYLE", oldstyle_state_machine);
@@ -272,6 +275,16 @@ and connect_command_state_machine = [
   };
 ]
 
+(* State machine implementing [nbd_aio_connect_socket_activation]. *)
+and connect_sa_state_machine = [
+  State {
+    default_state with
+    name = "START";
+    comment = "Connect to a subprocess with systemd socket
activation";
+    external_events = [];
+  };
+]
+
 (* Parse initial magic string from the server. *)
 and magic_state_machine = [
   State {
@@ -1501,6 +1514,18 @@ behave like inetd clients, such as C<nbdkit
--single>.";
     example = Some "examples/connect-command.c";
   };
 
+  "connect_socket_activation", {
+    default_call with
+    args = [ StringList "argv" ]; ret = RErr;
+    permitted_states = [ Created ];
+    shortdesc = "connect using systemd socket activation";
+    longdesc = "\
+Run the command as a subprocess and connect to it using
+systemd socket activation.  This is for use with NBD servers
+such as C<nbdkit> which understand socket activation.";
+    see_also = ["L<nbd_kill_subprocess(3)>"];
+  };
+
   "is_read_only", {
     default_call with
     args = []; ret = RBool;
@@ -2093,6 +2118,22 @@ Run the command as a subprocess and begin connecting to
it over
 stdin/stdout.  Parameters behave as documented in
 L<nbd_connect_command(3)>.
 
+You can check if the connection is still connecting by calling
+L<nbd_aio_is_connecting(3)>, or if it has connected to the server
+and completed the NBD handshake by calling L<nbd_aio_is_ready(3)>,
+on the connection.";
+  };
+
+  "aio_connect_socket_activation", {
+    default_call with
+    args = [ StringList "argv" ]; ret = RErr;
+    permitted_states = [ Created ];
+    shortdesc = "connect using systemd socket activation";
+    longdesc = "\
+Run the command as a subprocess and begin connecting to it using
+systemd socket activation.  Parameters behave as documented in
+L<nbd_connect_socket_activation(3)>.
+
 You can check if the connection is still connecting by calling
 L<nbd_aio_is_connecting(3)>, or if it has connected to the server
 and completed the NBD handshake by calling L<nbd_aio_is_ready(3)>,
@@ -2678,6 +2719,8 @@ let first_version = [
   "get_protocol", (1, 2);
   "set_handshake_flags", (1, 2);
   "get_handshake_flags", (1, 2);
+  "connect_socket_activation", (1, 2);
+  "aio_connect_socket_activation", (1, 2);
 
   (* These calls are proposed for a future version of libnbd, but
    * have not been added to any released version so far.
@@ -3036,7 +3079,8 @@ end = struct
 let all_external_events    [NotifyRead; NotifyWrite;
    CmdCreate;
-   CmdConnectSockAddr; CmdConnectUnix; CmdConnectTCP; CmdConnectCommand;
+   CmdConnectSockAddr; CmdConnectUnix; CmdConnectTCP;
+   CmdConnectCommand; CmdConnectSA;
    CmdIssue]
 
 let string_of_external_event = function
@@ -3047,6 +3091,7 @@ let string_of_external_event = function
   | CmdConnectUnix -> "CmdConnectUnix"
   | CmdConnectTCP -> "CmdConnectTCP"
   | CmdConnectCommand -> "CmdConnectCommand"
+  | CmdConnectSA -> "CmdConnectSA"
   | CmdIssue -> "CmdIssue"
 
 let c_string_of_external_event = function
@@ -3057,6 +3102,7 @@ let c_string_of_external_event = function
   | CmdConnectUnix -> "cmd_connect_unix"
   | CmdConnectTCP -> "cmd_connect_tcp"
   | CmdConnectCommand -> "cmd_connect_command"
+  | CmdConnectSA -> "cmd_connect_sa"
   | CmdIssue -> "cmd_issue"
 
 (* Find a state in the state machine hierarchy by path.  The [path]
@@ -3504,7 +3550,8 @@ let generate_lib_states_run_c ()            | NotifyWrite
-> pr "    r |= LIBNBD_AIO_DIRECTION_WRITE;\n"
           | CmdCreate
           | CmdConnectSockAddr
-          | CmdConnectUnix | CmdConnectTCP | CmdConnectCommand
+          | CmdConnectUnix | CmdConnectTCP
+          | CmdConnectCommand | CmdConnectSA
           | CmdIssue -> ()
       ) events;
       pr "    break;\n";
diff --git a/generator/states-connect-socket-activation.c
b/generator/states-connect-socket-activation.c
new file mode 100644
index 0000000..67e043a
--- /dev/null
+++ b/generator/states-connect-socket-activation.c
@@ -0,0 +1,228 @@
+/* nbd client library in userspace: state machine
+ * Copyright (C) 2013-2019 Red Hat Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* State machine related to connecting with systemd socket activation. */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "internal.h"
+
+/* This is baked into the systemd socket activation API. */
+#define FIRST_SOCKET_ACTIVATION_FD 3
+
+/* Prepare environment for calling execvpe when doing systemd socket
+ * activation.  Takes the current environment and copies it.  Removes
+ * any existing LISTEN_PID or LISTEN_FDS and replaces them with new
+ * variables.  env[0] is "LISTEN_PID=..." which is filled in by
+ * CONNECT_SA.START, and env[1] is "LISTEN_FDS=1".
+ */
+static char **
+prepare_socket_activation_environment (void)
+{
+  char **env = NULL;
+  char *p0 = NULL, *p1 = NULL;
+  size_t i, len;
+  void *vp;
+
+  p0 = strdup ("LISTEN_PID=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
+  if (p0 == NULL)
+    goto err;
+  p1 = strdup ("LISTEN_FDS=1");
+  if (p1 == NULL)
+    goto err;
+
+  /* Copy the current environment. */
+  env = nbd_internal_copy_string_list (environ);
+  if (env == NULL)
+    goto err;
+
+  /* Reserve slots env[0] and env[1]. */
+  len = nbd_internal_string_list_length (env);
+  vp = realloc (env,
+                sizeof (char *) * (len + 3 /* include final NULL entry */));
+  if (vp == NULL)
+    goto err;
+  env = vp;
+  memmove (&env[2], &env[0], sizeof (char *) * (len + 1));
+
+  env[0] = p0;          /* Code below assumes env[0] is LISTEN_PID. */
+  env[1] = p1;
+
+  /* Remove any existing LISTEN_PID or LISTEN_FDS instances. */
+  for (i = 2; env[i] != NULL; ++i) {
+    if (strncmp (env[i], "LISTEN_PID=", 11) == 0 ||
+        strncmp (env[i], "LISTEN_FDS=", 11) == 0) {
+      memmove (&env[i], &env[i+1],
+               sizeof (char *) * (nbd_internal_string_list_length
(&env[i])));
+      i--;
+    }
+  }
+
+  return env;
+
+ err:
+  set_error (errno, "malloc");
+  nbd_internal_free_string_list (env);
+  free (p0);
+  free (p1);
+  return NULL;
+}
+
+STATE_MACHINE {
+ CONNECT_SA.START:
+#ifdef HAVE_EXECVPE
+  size_t len;
+  int s;
+  struct sockaddr_un addr;
+  char **env;
+  pid_t pid;
+  int flags;
+
+  assert (!h->sock);
+  assert (h->argv);
+  assert (h->argv[0]);
+
+  /* Use /tmp instead of TMPDIR because we must ensure the path is
+   * short enough to store in the sockaddr_un.  On some platforms this
+   * may cause problems so we may need to revisit it.  XXX
+   */
+  h->sa_tmpdir = strdup ("/tmp/libnbdXXXXXX");
+  if (h->sa_tmpdir == NULL) {
+    SET_NEXT_STATE (%.DEAD);
+    set_error (errno, "strdup");
+    return 0;
+  }
+  if (mkdtemp (h->sa_tmpdir) == NULL) {
+    SET_NEXT_STATE (%.DEAD);
+    set_error (errno, "mkdtemp");
+    /* Avoid cleanup in nbd_close. */
+    free (h->sa_tmpdir);
+    h->sa_tmpdir = NULL;
+    return 0;
+  }
+
+  h->sa_sockpath = strdup ("/tmp/libnbdXXXXXX/sock");
+  if (h->sa_sockpath == NULL) {
+    SET_NEXT_STATE (%.DEAD);
+    set_error (errno, "strdup");
+    return 0;
+  }
+
+  len = strlen (h->sa_tmpdir);
+  memcpy (h->sa_sockpath, h->sa_tmpdir, len);
+
+  s = socket (AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+  if (s == -1) {
+    SET_NEXT_STATE (%.DEAD);
+    set_error (errno, "socket");
+    return 0;
+  }
+
+  addr.sun_family = AF_UNIX;
+  memcpy (addr.sun_path, h->sa_sockpath, strlen (h->sa_sockpath) + 1);
+  if (bind (s, (struct sockaddr *) &addr, sizeof addr) == -1) {
+    SET_NEXT_STATE (%.DEAD);
+    set_error (errno, "bind: %s", h->sa_sockpath);
+    close (s);
+    return 0;
+  }
+
+  if (listen (s, 1) == -1) {
+    SET_NEXT_STATE (%.DEAD);
+    set_error (errno, "listen");
+    close (s);
+    return 0;
+  }
+
+  env = prepare_socket_activation_environment ();
+  if (!env) {
+    SET_NEXT_STATE (%.DEAD);
+    close (s);
+    return 0;
+  }
+
+  pid = fork ();
+  if (pid == -1) {
+    SET_NEXT_STATE (%.DEAD);
+    set_error (errno, "fork");
+    close (s);
+    nbd_internal_free_string_list (env);
+    return 0;
+  }
+  if (pid == 0) {         /* child - run command */
+    if (s != FIRST_SOCKET_ACTIVATION_FD) {
+      dup2 (s, FIRST_SOCKET_ACTIVATION_FD);
+      close (s);
+    }
+    else {
+      /* We must unset CLOEXEC on the fd.  (dup2 above does this
+       * implicitly because CLOEXEC is set on the fd, not on the
+       * socket).
+       */
+      flags = fcntl (s, F_GETFD, 0);
+      if (flags == -1) {
+        nbd_internal_fork_safe_perror ("fcntl: F_GETFD");
+        _exit (126);
+      }
+      if (fcntl (s, F_SETFD, flags & ~FD_CLOEXEC) == -1) {
+        nbd_internal_fork_safe_perror ("fcntl: F_SETFD");
+        _exit (126);
+      }
+    }
+
+    char buf[32];
+    const char *v +      nbd_internal_fork_safe_itoa ((long) getpid (), buf,
sizeof buf);
+    strcpy (&env[0][11], v);
+
+    /* Restore SIGPIPE back to SIG_DFL. */
+    signal (SIGPIPE, SIG_DFL);
+
+    execvpe (h->argv[0], h->argv, env);
+    nbd_internal_fork_safe_perror (h->argv[0]);
+    if (errno == ENOENT)
+      _exit (127);
+    else
+      _exit (126);
+  }
+
+  /* Parent. */
+  close (s);
+  nbd_internal_free_string_list (env);
+  h->pid = pid;
+
+  h->connaddrlen = sizeof addr;
+  memcpy (&h->connaddr, &addr, h->connaddrlen);
+  SET_NEXT_STATE (%^CONNECT.START);
+  return 0;
+
+#else /* !HAVE_EXECVPE */
+  SET_NEXT_STATE (%.DEAD)
+  set_error (ENOTSUP, "platform does not support socket activation");
+  return 0;
+#endif
+
+} /* END STATE MACHINE */
diff --git a/interop/Makefile.am b/interop/Makefile.am
index 43350a8..d30cdf1 100644
--- a/interop/Makefile.am
+++ b/interop/Makefile.am
@@ -48,11 +48,13 @@ if HAVE_QEMU_NBD
 check_PROGRAMS += \
 	interop-qemu-nbd \
 	dirty-bitmap \
+	socket-activation-qemu-nbd \
 	structured-read \
 	$(NULL)
 TESTS += \
 	interop-qemu-nbd \
 	dirty-bitmap.sh \
+	socket-activation-qemu-nbd \
 	structured-read.sh \
 	$(NULL)
 
@@ -124,6 +126,15 @@ dirty_bitmap_CPPFLAGS = -I$(top_srcdir)/include
 dirty_bitmap_CFLAGS = $(WARNINGS_CFLAGS)
 dirty_bitmap_LDADD = $(top_builddir)/lib/libnbd.la
 
+socket_activation_qemu_nbd_SOURCES = socket-activation.c
+socket_activation_qemu_nbd_CPPFLAGS = \
+	-I$(top_srcdir)/include \
+	-DSERVER=\"$(QEMU_NBD)\" \
+	-DSERVER_PARAMS='"-f", "raw", "-x",
"", tmpfile' \
+	$(NULL)
+socket_activation_qemu_nbd_CFLAGS = $(WARNINGS_CFLAGS)
+socket_activation_qemu_nbd_LDADD = $(top_builddir)/lib/libnbd.la
+
 structured_read_SOURCES = structured-read.c
 structured_read_CPPFLAGS = -I$(top_srcdir)/include
 structured_read_CFLAGS = $(WARNINGS_CFLAGS)
@@ -135,9 +146,11 @@ if HAVE_NBDKIT
 
 check_PROGRAMS += \
 	interop-nbdkit \
+	socket-activation-nbdkit \
 	$(NULL)
 TESTS += \
 	interop-nbdkit \
+	socket-activation-nbdkit \
 	$(NULL)
 
 # See above comment about files in ../tests/Makefile.am
@@ -245,6 +258,15 @@ interop_nbdkit_tls_psk_allow_fallback_CPPFLAGS = \
 interop_nbdkit_tls_psk_allow_fallback_CFLAGS = $(WARNINGS_CFLAGS)
 interop_nbdkit_tls_psk_allow_fallback_LDADD = $(top_builddir)/lib/libnbd.la
 
+socket_activation_nbdkit_SOURCES = socket-activation.c
+socket_activation_nbdkit_CPPFLAGS = \
+	-I$(top_srcdir)/include \
+	-DSERVER=\"$(NBDKIT)\" \
+	-DSERVER_PARAMS='"file", tmpfile' \
+	$(NULL)
+socket_activation_nbdkit_CFLAGS = $(WARNINGS_CFLAGS)
+socket_activation_nbdkit_LDADD = $(top_builddir)/lib/libnbd.la
+
 endif HAVE_NBDKIT
 
 check-valgrind:
diff --git a/interop/socket-activation.c b/interop/socket-activation.c
new file mode 100644
index 0000000..db6f6ca
--- /dev/null
+++ b/interop/socket-activation.c
@@ -0,0 +1,92 @@
+/* NBD client library in userspace
+ * Copyright (C) 2019 Red Hat Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Test interop with nbdkit or qemu-nbd, and systemd socket activation. */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <unistd.h>
+
+#include <libnbd.h>
+
+#define SIZE 1048576
+
+int
+main (int argc, char *argv[])
+{
+  struct nbd_handle *nbd;
+  char tmpfile[] = "/tmp/nbdXXXXXX";
+  int fd;
+  int64_t actual_size;
+  char buf[512];
+  int r = -1;
+
+  /* Create a large sparse temporary file. */
+  fd = mkstemp (tmpfile);
+  if (fd == -1 ||
+      ftruncate (fd, SIZE) == -1 ||
+      close (fd) == -1) {
+    perror (tmpfile);
+    goto out;
+  }
+
+  nbd = nbd_create ();
+  if (nbd == NULL) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    goto out;
+  }
+
+  char *args[] = { SERVER, SERVER_PARAMS, NULL };
+  if (nbd_connect_socket_activation (nbd, args) == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    goto out;
+  }
+
+  actual_size = nbd_get_size (nbd);
+  if (actual_size == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    goto out;
+  }
+  if (actual_size != SIZE) {
+    fprintf (stderr, "%s: actual size %" PRIi64 " <>
expected size %d",
+             argv[0], actual_size, SIZE);
+    goto out;
+  }
+
+  if (nbd_pread (nbd, buf, sizeof buf, 0, 0) == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    goto out;
+  }
+
+  if (nbd_shutdown (nbd, 0) == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    goto out;
+  }
+
+  nbd_close (nbd);
+
+  r = 0;
+ out:
+  unlink (tmpfile);
+
+  exit (r == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
+}
diff --git a/lib/connect.c b/lib/connect.c
index f98bcdb..c1cbef7 100644
--- a/lib/connect.c
+++ b/lib/connect.c
@@ -112,6 +112,16 @@ nbd_unlocked_connect_command (struct nbd_handle *h, char
**argv)
   return wait_until_connected (h);
 }
 
+/* Connect to a local command, use systemd socket activation. */
+int
+nbd_unlocked_connect_socket_activation (struct nbd_handle *h, char **argv)
+{
+  if (nbd_unlocked_aio_connect_socket_activation (h, argv) == -1)
+    return -1;
+
+  return wait_until_connected (h);
+}
+
 int
 nbd_unlocked_aio_connect (struct nbd_handle *h,
                           const struct sockaddr *addr, socklen_t len)
@@ -405,3 +415,21 @@ nbd_unlocked_aio_connect_command (struct nbd_handle *h,
char **argv)
 
   return nbd_internal_run (h, cmd_connect_command);
 }
+
+int
+nbd_unlocked_aio_connect_socket_activation (struct nbd_handle *h, char **argv)
+{
+  char **copy;
+
+  copy = nbd_internal_copy_string_list (argv);
+  if (!copy) {
+    set_error (errno, "copy_string_list");
+    return -1;
+  }
+
+  if (h->argv)
+    nbd_internal_free_string_list (h->argv);
+  h->argv = copy;
+
+  return nbd_internal_run (h, cmd_connect_sa);
+}
diff --git a/lib/handle.c b/lib/handle.c
index 2af25fe..a7f2c79 100644
--- a/lib/handle.c
+++ b/lib/handle.c
@@ -129,6 +129,16 @@ nbd_close (struct nbd_handle *h)
   free_cmd_list (h->cmds_in_flight);
   free_cmd_list (h->cmds_done);
   nbd_internal_free_string_list (h->argv);
+  if (h->sa_sockpath) {
+    if (h->pid > 0)
+      kill (h->pid, SIGTERM);
+    unlink (h->sa_sockpath);
+    free (h->sa_sockpath);
+  }
+  if (h->sa_tmpdir) {
+    rmdir (h->sa_tmpdir);
+    free (h->sa_tmpdir);
+  }
   free (h->unixsocket);
   free (h->hostname);
   free (h->port);
diff --git a/lib/internal.h b/lib/internal.h
index 31bc3d4..6433183 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -188,6 +188,12 @@ struct nbd_handle {
   char **argv;
   pid_t pid;
 
+  /* When using systemd socket activation, this directory and socket
+   * must be deleted, and the pid above must be killed.
+   */
+  char *sa_tmpdir;
+  char *sa_sockpath;
+
   /* When connecting to Unix domain socket. */
   char *unixsocket;
 
-- 
2.23.0
Eric Blake
2019-Sep-30  21:27 UTC
Re: [Libguestfs] [PATCH libnbd v2 1/2] lib: Don't use perror after fork in nbd_connect_callback.
On 9/30/19 11:32 AM, Richard W.M. Jones wrote:> perror is not fork-safe and so could deadlock. Instead open code a > fork-safe version of perror. While this fixes the current behaviour, > in the long term we'd like to capture the error message into the usual > error mechanism, so this is not the full and final fix for this issue. > > Also this fixes the exit code to be 126/127 instead of 1. > > Thanks: Eric Blake > --- > TODO | 1 + > configure.ac | 10 ++++++ > generator/states-connect.c | 7 ++-- > lib/internal.h | 2 ++ > lib/utils.c | 66 ++++++++++++++++++++++++++++++++++++++ > 5 files changed, 84 insertions(+), 2 deletions(-) >> +++ b/configure.ac > @@ -77,6 +77,16 @@ AC_CHECK_HEADERS([\ > stdatomic.h \ > sys/endian.h]) > > +dnl Check for sys_errlist (optional). > +AC_MSG_CHECKING([for sys_errlist]) > +AC_TRY_LINK([], [extern int sys_errlist; char *p = &sys_errlist;], [We probably ought to use AC_CACHE_CHECK for our various configure.ac things, to let a user have more control at overriding things if we probed wrong without having to patch configure.ac. But that's a separate problem, not affecting this one.> + > +/* Like sprintf (s, "%ld", v). The caller must supply a scratch > + * buffer which is large enough for the result (32 bytes is fine), but > + * note that the returned string does not point to the start of this > + * buffer. > + */ > +const char * > +nbd_internal_fork_safe_itoa (long v, char *buf, size_t bufsize) > +{ > + size_t i = bufsize - 1; > + bool neg = false; > + > + buf[i--] = '\0'; > + if (v < 0) { > + neg = true; > + v = -v; /* XXX fails for INT_MIN */Easy enough to work around. In the parameters: unsigned long v, and the condition here should be: if ((long) v < 0)> + } > + if (v == 0) > + buf[i--] = '0'; > + else { > + while (v) { > + buf[i--] = '0' + (v % 10); > + v /= 10; > + } > + } > + if (neg) > + buf[i--] = '-'; > + > + i++; > + return &buf[i];Do we want to assert that bufsize is large enough? (Or rather, abort() if it is not, since assert() is borderline in fork-safe context)> +} > + > +/* Fork-safe version of perror. ONLY use this after fork and before > + * exec, the rest of the time use set_error(). > + */ > +void > +nbd_internal_fork_safe_perror (const char *s) > +{ > + const int err = errno; > + > +#if defined(__GNUC__) > +#pragma GCC diagnostic push > +#pragma GCC diagnostic ignored "-Wunused-result" > +#endif > + write (2, s, strlen (s));Surprisingly, strlen() is not listed in current POSIX' list of async-signal-safe functions. But I have an open bug to remedy that, and don't see any problem in using it. I think it looks nicer to use STDERR_FILENO instead of 2.> + write (2, ": ", 2); > +#if HAVE_SYS_ERRLIST > + write (2, sys_errlist[errno], strlen (sys_errlist[errno])); > +#else > + char buf[32]; > + const char *v = nbd_internal_fork_safe_itoa ((long) errno, buf, sizeof buf); > + write (2, v, strlen (v)); > +#endif > + write (2, "\n", 1); > +#if defined(__GNUC__) > +#pragma GCC diagnostic pop > +#endif > + > + /* Restore original errno in case it was disturbed by the system > + * calls above. > + */ > + errno = err; > +} >Otherwise looks sane. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Eric Blake
2019-Oct-01  13:24 UTC
Re: [Libguestfs] [PATCH libnbd v2 2/2] api: Implement local command with systemd socket activation.
On 9/30/19 11:32 AM, Richard W.M. Jones wrote:> This adds new APIs for running a local NBD server and connecting to it > using systemd socket activation (instead of stdin/stdout). > > This includes interop tests against nbdkit and qemu-nbd which I > believe are the only NBD servers supporting socket activation. (If we > find others then we can add more interop tests in future.) > > The upstream spec for systemd socket activation is here: > http://0pointer.de/blog/projects/socket-activation.html > ---> +++ b/generator/states-connect-socket-activation.c> +/* This is baked into the systemd socket activation API. */ > +#define FIRST_SOCKET_ACTIVATION_FD 3 > + > +/* Prepare environment for calling execvpe when doing systemd socket > + * activation. Takes the current environment and copies it. Removes > + * any existing LISTEN_PID or LISTEN_FDS and replaces them with new > + * variables. env[0] is "LISTEN_PID=..." which is filled in by > + * CONNECT_SA.START, and env[1] is "LISTEN_FDS=1". > + */I know that getenv()/setenv()/putenv() tend to prefer sorted environ, but I also think that exec HAS to handle a hand-built environ that is not sorted, so you should be okay with this shortcut.> +static char ** > +prepare_socket_activation_environment (void) > +{ > + char **env = NULL; > + char *p0 = NULL, *p1 = NULL; > + size_t i, len; > + void *vp; > + > + p0 = strdup ("LISTEN_PID=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); > + if (p0 == NULL) > + goto err; > + p1 = strdup ("LISTEN_FDS=1"); > + if (p1 == NULL) > + goto err; > + > + /* Copy the current environment. */ > + env = nbd_internal_copy_string_list (environ);POSIX says the external symbol 'environ' has to exist for linking purposes, but also states that no standard header is required to declare it. You may want to add an 'extern char **environ;' line before this function for portability. On the other hand, gnulib documents that on newer Mac OS, even that doesn't work, where the solution is '#define environ (*_NSGetEnviron())'. I guess we'll deal with it when somebody actually reports compilation failure.> + > + /* Remove any existing LISTEN_PID or LISTEN_FDS instances. */ > + for (i = 2; env[i] != NULL; ++i) { > + if (strncmp (env[i], "LISTEN_PID=", 11) == 0 || > + strncmp (env[i], "LISTEN_FDS=", 11) == 0) { > + memmove (&env[i], &env[i+1], > + sizeof (char *) * (nbd_internal_string_list_length (&env[i]))); > + i--; > + } > + }Lots of O(N) traversals of the list, but this probably isn't our hot spot, and so probably not worth optimizing further.> +STATE_MACHINE { > + CONNECT_SA.START: > +#ifdef HAVE_EXECVPE > + size_t len; > + int s; > + struct sockaddr_un addr; > + char **env; > + pid_t pid; > + int flags; > + > + assert (!h->sock); > + assert (h->argv); > + assert (h->argv[0]); > + > + /* Use /tmp instead of TMPDIR because we must ensure the path is > + * short enough to store in the sockaddr_un. On some platforms this > + * may cause problems so we may need to revisit it. XXX > + */ > + h->sa_tmpdir = strdup ("/tmp/libnbdXXXXXX"); > + if (h->sa_tmpdir == NULL) { > + SET_NEXT_STATE (%.DEAD); > + set_error (errno, "strdup"); > + return 0; > + } > + if (mkdtemp (h->sa_tmpdir) == NULL) { > + SET_NEXT_STATE (%.DEAD); > + set_error (errno, "mkdtemp"); > + /* Avoid cleanup in nbd_close. */ > + free (h->sa_tmpdir); > + h->sa_tmpdir = NULL; > + return 0; > + } > + > + h->sa_sockpath = strdup ("/tmp/libnbdXXXXXX/sock"); > + if (h->sa_sockpath == NULL) { > + SET_NEXT_STATE (%.DEAD); > + set_error (errno, "strdup"); > + return 0; > + } > + > + len = strlen (h->sa_tmpdir); > + memcpy (h->sa_sockpath, h->sa_tmpdir, len);Is it worth using: asprintf (&h->sa_sockpath, "%s/sock", h->sa_tmpdir); for less code? asprintf might not be standard, but we already require execvpe, which probably means asprintf is available. But your open-coded variant works, too.> + > + s = socket (AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); > + if (s == -1) { > + SET_NEXT_STATE (%.DEAD); > + set_error (errno, "socket"); > + return 0; > + }I guess the child process can add O_NONBLOCK if they want it.> + > + addr.sun_family = AF_UNIX; > + memcpy (addr.sun_path, h->sa_sockpath, strlen (h->sa_sockpath) + 1); > + if (bind (s, (struct sockaddr *) &addr, sizeof addr) == -1) { > + SET_NEXT_STATE (%.DEAD); > + set_error (errno, "bind: %s", h->sa_sockpath); > + close (s); > + return 0; > + } > + > + if (listen (s, 1) == -1) { > + SET_NEXT_STATE (%.DEAD); > + set_error (errno, "listen"); > + close (s); > + return 0; > + } > + > + env = prepare_socket_activation_environment (); > + if (!env) { > + SET_NEXT_STATE (%.DEAD); > + close (s); > + return 0; > + } > + > + pid = fork (); > + if (pid == -1) { > + SET_NEXT_STATE (%.DEAD); > + set_error (errno, "fork"); > + close (s); > + nbd_internal_free_string_list (env); > + return 0; > + } > + if (pid == 0) { /* child - run command */ > + if (s != FIRST_SOCKET_ACTIVATION_FD) { > + dup2 (s, FIRST_SOCKET_ACTIVATION_FD); > + close (s); > + } > + else { > + /* We must unset CLOEXEC on the fd. (dup2 above does this > + * implicitly because CLOEXEC is set on the fd, not on the > + * socket). > + */ > + flags = fcntl (s, F_GETFD, 0); > + if (flags == -1) { > + nbd_internal_fork_safe_perror ("fcntl: F_GETFD"); > + _exit (126); > + } > + if (fcntl (s, F_SETFD, flags & ~FD_CLOEXEC) == -1) { > + nbd_internal_fork_safe_perror ("fcntl: F_SETFD"); > + _exit (126); > + } > + } > +Looks correct.> + char buf[32]; > + const char *v > + nbd_internal_fork_safe_itoa ((long) getpid (), buf, sizeof buf); > + strcpy (&env[0][11], v);We're using the magic '11' in several places, maybe it's worth a #define to make it obvious it is strlen("LISTEN_PID=") ?> + > + /* Restore SIGPIPE back to SIG_DFL. */ > + signal (SIGPIPE, SIG_DFL); > + > + execvpe (h->argv[0], h->argv, env); > + nbd_internal_fork_safe_perror (h->argv[0]); > + if (errno == ENOENT) > + _exit (127); > + else > + _exit (126); > + } > + > + /* Parent. */ > + close (s); > + nbd_internal_free_string_list (env); > + h->pid = pid; > + > + h->connaddrlen = sizeof addr; > + memcpy (&h->connaddr, &addr, h->connaddrlen); > + SET_NEXT_STATE (%^CONNECT.START); > + return 0; > + > +#else /* !HAVE_EXECVPE */ > + SET_NEXT_STATE (%.DEAD) > + set_error (ENOTSUP, "platform does not support socket activation"); > + return 0; > +#endifWe probably ought to add a matching nbd_supports_socket_activation() feature function. Or, it would be possible to create a fallback for execvpe() on platforms that lack it by using execlpe() and our own path-walker utility function. Can be done as a followup patch. If we do that, then the mere presence of LIBNBD_HAVE_NBD_CONNECT_SA is witness enough of the functionality, rather than needing a runtime probe.> +++ b/lib/connect.c> + > +int > +nbd_unlocked_aio_connect_socket_activation (struct nbd_handle *h, char **argv) > +{ > + char **copy; > + > + copy = nbd_internal_copy_string_list (argv); > + if (!copy) { > + set_error (errno, "copy_string_list"); > + return -1; > + } > + > + if (h->argv) > + nbd_internal_free_string_list (h->argv);How can h->argv ever be previously set?> + h->argv = copy; > + > + return nbd_internal_run (h, cmd_connect_sa); > +} > diff --git a/lib/handle.c b/lib/handle.c > index 2af25fe..a7f2c79 100644 > --- a/lib/handle.c > +++ b/lib/handle.c > @@ -129,6 +129,16 @@ nbd_close (struct nbd_handle *h) > free_cmd_list (h->cmds_in_flight); > free_cmd_list (h->cmds_done); > nbd_internal_free_string_list (h->argv); > + if (h->sa_sockpath) { > + if (h->pid > 0) > + kill (h->pid, SIGTERM); > + unlink (h->sa_sockpath); > + free (h->sa_sockpath); > + } > + if (h->sa_tmpdir) { > + rmdir (h->sa_tmpdir); > + free (h->sa_tmpdir); > + } > free (h->unixsocket); > free (h->hostname); > free (h->port);Somewhat pre-existing: we have a waitpid() here (good, so we don't hang on to a zombie process), but we are relying on the child process to gracefully go away (whether for connect_command when stdin closes, or for connect_sa on receipt of SIGTERM). Do we need a retry loop that escalates to SIGKILL if the child process does not quickly respond to the initial condition? On the other hand, the fact that our waitpid() blocks until the child changes status means that if a child ever wedges, the fact that we wedge too gives some visibility to the client that it's not libnbd's fault and that they need to get the bug fixed in their child process. I think it is ready to push. We may still need further tweaks, but that's often the case. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Reasonably Related Threads
- [PATCH libnbd 1/2] lib: Avoid killing subprocess twice.
- Re: [PATCH libnbd v2 2/2] api: Implement local command with systemd socket activation.
- [PATCH libnbd 0/2] api: Add support for AF_VSOCK.
- [PATCH libnbd v2 2/2] api: Implement local command with systemd socket activation.
- [PATCH libnbd 1/4] generator: Allow long ‘name - shortdesc’ in man pages.