Richard W.M. Jones
2022-Jul-27 16:30 UTC
[Libguestfs] [PATCH libnbd 1/2] lib/crypto: Use GNUTLS_NO_SIGNAL if available
libnbd has long used MSG_NOSIGNAL to avoid receiving SIGPIPE if we accidentally write on a closed socket, which is a nice alternative to using a SIGPIPE signal handler. However with TLS connections, gnutls did not use this flag and so programs using libnbd + TLS would receive SIGPIPE in some situations, notably if the server closed the connection abruptly while we were trying to write something. GnuTLS 3.4.2 introduces GNUTLS_NO_SIGNAL which does the same thing. Use this flag if available. RHEL 7 has an older gnutls which lacks this flag. To avoid qemu-nbd interop tests failing (rarely, but more often with a forthcoming change to TLS shutdown behaviour), register a SIGPIPE signal handler in the test if the flag is missing. --- configure.ac | 15 +++++++++++++++ interop/interop.c | 10 ++++++++++ lib/crypto.c | 7 ++++++- 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 86c3a08690..b5bae4f1b2 100644 --- a/configure.ac +++ b/configure.ac @@ -182,6 +182,21 @@ AS_IF([test "$GNUTLS_LIBS" != ""],[ gnutls_session_set_verify_cert \ gnutls_transport_is_ktls_enabled \ ]) + AC_MSG_CHECKING([if gnutls has GNUTLS_NO_SIGNAL]) + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([ + #include <gnutls/gnutls.h> + gnutls_session_t session; + ], [ + gnutls_init(&session, GNUTLS_CLIENT|GNUTLS_NO_SIGNAL); + ]) + ], [ + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_GNUTLS_NO_SIGNAL], [1], + [GNUTLS_NO_SIGNAL found at compile time]) + ], [ + AC_MSG_RESULT([no]) + ]) LIBS="$old_LIBS" ]) diff --git a/interop/interop.c b/interop/interop.c index b41f3ca887..036545bd82 100644 --- a/interop/interop.c +++ b/interop/interop.c @@ -84,6 +84,16 @@ main (int argc, char *argv[]) REQUIRES #endif + /* Ignore SIGPIPE. We only need this for GnuTLS < 3.4.2, since + * newer GnuTLS has the GNUTLS_NO_SIGNAL flag which adds + * MSG_NOSIGNAL to each write call. + */ +#if !HAVE_GNUTLS_NO_SIGNAL +#if TLS + signal (SIGPIPE, SIG_IGN); +#endif +#endif + /* Create a large sparse temporary file. */ #ifdef NEEDS_TMPFILE int fd = mkstemp (TMPFILE); diff --git a/lib/crypto.c b/lib/crypto.c index ffc2b4ab5f..ffba4bda9b 100644 --- a/lib/crypto.c +++ b/lib/crypto.c @@ -590,7 +590,12 @@ nbd_internal_crypto_create_session (struct nbd_handle *h, gnutls_psk_client_credentials_t pskcreds = NULL; gnutls_certificate_credentials_t xcreds = NULL; - err = gnutls_init (&session, GNUTLS_CLIENT|GNUTLS_NONBLOCK); + err = gnutls_init (&session, + GNUTLS_CLIENT | GNUTLS_NONBLOCK +#if HAVE_GNUTLS_NO_SIGNAL + | GNUTLS_NO_SIGNAL +#endif + ); if (err < 0) { set_error (errno, "gnutls_init: %s", gnutls_strerror (err)); return NULL; -- 2.37.0.rc2
Richard W.M. Jones
2022-Jul-27 16:30 UTC
[Libguestfs] [PATCH libnbd 2/2] lib/crypto.c: Ignore TLS premature termination after write shutdown
qemu-nbd doesn't call gnutls_bye to cleanly shut down the connection after we send NBD_CMD_DISC. When copying from a qemu-nbd server (or any operation which calls nbd_shutdown) you will see errors like this: $ nbdcopy nbds://foo?tls-certificates=/var/tmp/pki null: nbds://foo?tls-certificates=/var/tmp/pki: nbd_shutdown: gnutls_record_recv: The TLS connection was non-properly terminated. Relatedly you may also see: nbd_shutdown: gnutls_record_recv: Error in the pull function. This commit suppresses the error in the case where we know that we have shut down writes (which happens after NBD_CMD_DISC has been sent on the wire). --- interop/interop.c | 9 --------- lib/crypto.c | 17 +++++++++++++++++ lib/internal.h | 1 + 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/interop/interop.c b/interop/interop.c index 036545bd82..cce9407aa1 100644 --- a/interop/interop.c +++ b/interop/interop.c @@ -226,19 +226,10 @@ main (int argc, char *argv[]) /* XXX In future test more operations here. */ -#if !TLS - /* XXX qemu doesn't shut down the connection nicely (using - * gnutls_bye) and because of this the following call will fail - * with: - * - * nbd_shutdown: gnutls_record_recv: The TLS connection was - * non-properly terminated. - */ if (nbd_shutdown (nbd, 0) == -1) { fprintf (stderr, "%s\n", nbd_get_error ()); exit (EXIT_FAILURE); } -#endif nbd_close (nbd); diff --git a/lib/crypto.c b/lib/crypto.c index ffba4bda9b..a2832b1585 100644 --- a/lib/crypto.c +++ b/lib/crypto.c @@ -189,6 +189,22 @@ tls_recv (struct nbd_handle *h, struct socket *sock, void *buf, size_t len) errno = EAGAIN; return -1; } + if (h->tls_shut_writes && + (r == GNUTLS_E_PULL_ERROR || r == GNUTLS_E_PREMATURE_TERMINATION)) { + /* qemu-nbd doesn't call gnutls_bye to cleanly shut down the + * connection after we send NBD_CMD_DISC, instead it simply + * closes the connection. On the client side we see + * "gnutls_record_recv: The TLS connection was non-properly + * terminated" or "gnutls_record_recv: Error in the pull + * function.". + * + * If we see these errors after we shut down the write side + * (h->tls_shut_writes), which happens after we have sent + * NBD_CMD_DISC on the wire, downgrade them to a debug message. + */ + debug (h, "gnutls_record_recv: %s", gnutls_strerror (r)); + return 0; /* EOF */ + } set_error (0, "gnutls_record_recv: %s", gnutls_strerror (r)); errno = EIO; return -1; @@ -236,6 +252,7 @@ tls_shut_writes (struct nbd_handle *h, struct socket *sock) return false; if (r != 0) debug (h, "ignoring gnutls_bye failure: %s", gnutls_strerror (r)); + h->tls_shut_writes = true; return sock->u.tls.oldsock->ops->shut_writes (h, sock->u.tls.oldsock); } diff --git a/lib/internal.h b/lib/internal.h index 4121a5cdaa..8dbe2e4568 100644 --- a/lib/internal.h +++ b/lib/internal.h @@ -307,6 +307,7 @@ struct nbd_handle { struct command *reply_cmd; bool disconnect_request; /* True if we've queued NBD_CMD_DISC */ + bool tls_shut_writes; /* Used by lib/crypto.c to track disconnect. */ }; struct meta_context { -- 2.37.0.rc2
Eric Blake
2022-Jul-28 12:22 UTC
[Libguestfs] [PATCH libnbd 1/2] lib/crypto: Use GNUTLS_NO_SIGNAL if available
On Wed, Jul 27, 2022 at 05:30:58PM +0100, Richard W.M. Jones wrote:> libnbd has long used MSG_NOSIGNAL to avoid receiving SIGPIPE if we > accidentally write on a closed socket, which is a nice alternative to > using a SIGPIPE signal handler. However with TLS connections, gnutlsEspecially since we are embedded in a larger program, where we may not always have say on whether the rest of the program wants a SIGPIPE handler installed.> did not use this flag and so programs using libnbd + TLS would receive > SIGPIPE in some situations, notably if the server closed the > connection abruptly while we were trying to write something. > > GnuTLS 3.4.2 introduces GNUTLS_NO_SIGNAL which does the same thing. > Use this flag if available. > > RHEL 7 has an older gnutls which lacks this flag. To avoid qemu-nbd > interop tests failing (rarely, but more often with a forthcoming > change to TLS shutdown behaviour), register a SIGPIPE signal handler > in the test if the flag is missing. > --- > configure.ac | 15 +++++++++++++++ > interop/interop.c | 10 ++++++++++ > lib/crypto.c | 7 ++++++- > 3 files changed, 31 insertions(+), 1 deletion(-) > > diff --git a/configure.ac b/configure.ac > index 86c3a08690..b5bae4f1b2 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -182,6 +182,21 @@ AS_IF([test "$GNUTLS_LIBS" != ""],[ > gnutls_session_set_verify_cert \ > gnutls_transport_is_ktls_enabled \ > ]) > + AC_MSG_CHECKING([if gnutls has GNUTLS_NO_SIGNAL])Do we really need this? gnutls is one of the nicer programs which does both an enum and #define wrappers around the enum: /usr/include/gnutls/gnutls.h:505: GNUTLS_NO_SIGNAL = (1<<6), /usr/include/gnutls/gnutls.h:533:#define GNUTLS_NO_SIGNAL (1<<6) which means that we can merely use #ifdef GNUTLS_NO_SIGNAL rather than having to define HAVE_GNUTLS_NO_SIGNAL as a wrapper ourselves, provided that we only care about the feature during .c files. (If we cared during Makefile.am, to skip compilation of an entire test, then it would make more sense to add this snippet to configure.ac)> +++ b/interop/interop.c > @@ -84,6 +84,16 @@ main (int argc, char *argv[]) > REQUIRES > #endif > > + /* Ignore SIGPIPE. We only need this for GnuTLS < 3.4.2, since > + * newer GnuTLS has the GNUTLS_NO_SIGNAL flag which adds > + * MSG_NOSIGNAL to each write call.It sounds like MSG_NOSIGNAL does not exist on all platforms, so even with GnuTLS >= 3.4.2 we may still be in the scenario where we install this signal handler. The comment could be made slightly more accurate, along the lines of: We only need this for GnuTLS that lacks the GNUTLS_NO_SIGNAL flag (either because it predates GnuTLS 3.4.2 or because the OS lacks MSG_NOSIGNAL support).> + */ > +#if !HAVE_GNUTLS_NO_SIGNAL > +#if TLSCould simplify to: #if TLS && !defined(GNUTLS_NO_SIGNAL)> + signal (SIGPIPE, SIG_IGN); > +#endif > +#endif > + > /* Create a large sparse temporary file. */ > #ifdef NEEDS_TMPFILE > int fd = mkstemp (TMPFILE); > diff --git a/lib/crypto.c b/lib/crypto.c > index ffc2b4ab5f..ffba4bda9b 100644 > --- a/lib/crypto.c > +++ b/lib/crypto.c > @@ -590,7 +590,12 @@ nbd_internal_crypto_create_session (struct nbd_handle *h, > gnutls_psk_client_credentials_t pskcreds = NULL; > gnutls_certificate_credentials_t xcreds = NULL; > > - err = gnutls_init (&session, GNUTLS_CLIENT|GNUTLS_NONBLOCK); > + err = gnutls_init (&session, > + GNUTLS_CLIENT | GNUTLS_NONBLOCK > +#if HAVE_GNUTLS_NO_SIGNAL > + | GNUTLS_NO_SIGNAL > +#endif > + );I'm not a fan of mid-expression #if. If the expression is itself a macro call, the preprocessor may have issues. I would have written: unsigned int flags = GNUTLS_CLIENT | GNUTLS_NONBLOCK; #ifdef GNUTLS_NO_SIGNAL flags |= GNUTLS_NO_SIGNAL; #endif err = gnutls_init (&session, flags); However, despite my comments, I agree with the principle of the patch - we cannot assume the larger application will install a SIGPIPE handler (except in the limited cases like our testsuite or nbdcopy where we ARE the larger application), so telling GnuTLS to avoid it on our behalf when possible is wise. We may also want to add documentation to warn users (such as on BSD) that larger applications that do not install SIGPIPE handlers, but which lack MSG_NOSIGNAL, are at risk of subtle failures when the server disconnects abruptly. I also found this link, which suggests we may also want to try setsockopt (SO_NOSIGPIPE) where that is available (MacOS), and that we are out of luck on Solaris: https://github.com/lpeterse/haskell-socket/issues/8 -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
Laszlo Ersek
2022-Jul-28 12:57 UTC
[Libguestfs] [PATCH libnbd 1/2] lib/crypto: Use GNUTLS_NO_SIGNAL if available
On 07/27/22 18:30, Richard W.M. Jones wrote:> libnbd has long used MSG_NOSIGNAL to avoid receiving SIGPIPE if we > accidentally write on a closed socket, which is a nice alternative to > using a SIGPIPE signal handler. However with TLS connections, gnutls > did not use this flag and so programs using libnbd + TLS would receive > SIGPIPE in some situations, notably if the server closed the > connection abruptly while we were trying to write something. > > GnuTLS 3.4.2 introduces GNUTLS_NO_SIGNAL which does the same thing. > Use this flag if available. > > RHEL 7 has an older gnutls which lacks this flag. To avoid qemu-nbd > interop tests failing (rarely, but more often with a forthcoming > change to TLS shutdown behaviour), register a SIGPIPE signal handler > in the test if the flag is missing. > --- > configure.ac | 15 +++++++++++++++ > interop/interop.c | 10 ++++++++++ > lib/crypto.c | 7 ++++++- > 3 files changed, 31 insertions(+), 1 deletion(-) > > diff --git a/configure.ac b/configure.ac > index 86c3a08690..b5bae4f1b2 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -182,6 +182,21 @@ AS_IF([test "$GNUTLS_LIBS" != ""],[ > gnutls_session_set_verify_cert \ > gnutls_transport_is_ktls_enabled \ > ]) > + AC_MSG_CHECKING([if gnutls has GNUTLS_NO_SIGNAL]) > + AC_COMPILE_IFELSE( > + [AC_LANG_PROGRAM([ > + #include <gnutls/gnutls.h> > + gnutls_session_t session; > + ], [ > + gnutls_init(&session, GNUTLS_CLIENT|GNUTLS_NO_SIGNAL); > + ]) > + ], [ > + AC_MSG_RESULT([yes]) > + AC_DEFINE([HAVE_GNUTLS_NO_SIGNAL], [1], > + [GNUTLS_NO_SIGNAL found at compile time]) > + ], [ > + AC_MSG_RESULT([no]) > + ]) > LIBS="$old_LIBS" > ]) > > diff --git a/interop/interop.c b/interop/interop.c > index b41f3ca887..036545bd82 100644 > --- a/interop/interop.c > +++ b/interop/interop.c > @@ -84,6 +84,16 @@ main (int argc, char *argv[]) > REQUIRES > #endif > > + /* Ignore SIGPIPE. We only need this for GnuTLS < 3.4.2, since > + * newer GnuTLS has the GNUTLS_NO_SIGNAL flag which adds > + * MSG_NOSIGNAL to each write call. > + */ > +#if !HAVE_GNUTLS_NO_SIGNAL > +#if TLS > + signal (SIGPIPE, SIG_IGN); > +#endif > +#endif > + > /* Create a large sparse temporary file. */ > #ifdef NEEDS_TMPFILE > int fd = mkstemp (TMPFILE); > diff --git a/lib/crypto.c b/lib/crypto.c > index ffc2b4ab5f..ffba4bda9b 100644 > --- a/lib/crypto.c > +++ b/lib/crypto.c > @@ -590,7 +590,12 @@ nbd_internal_crypto_create_session (struct nbd_handle *h, > gnutls_psk_client_credentials_t pskcreds = NULL; > gnutls_certificate_credentials_t xcreds = NULL; > > - err = gnutls_init (&session, GNUTLS_CLIENT|GNUTLS_NONBLOCK); > + err = gnutls_init (&session, > + GNUTLS_CLIENT | GNUTLS_NONBLOCK > +#if HAVE_GNUTLS_NO_SIGNAL > + | GNUTLS_NO_SIGNAL > +#endif > + ); > if (err < 0) { > set_error (errno, "gnutls_init: %s", gnutls_strerror (err)); > return NULL; >I've never seen a single example of SIGPIPE being useful to a program that checks for, and properly handles, write() / send() / sendto() / sendmsg() failures. Even if we want to die with a SIGPIPE actually (so that the parent see it), we can recognize errno == EPIPE, then reset the disposition of SIGPIPE to SIG_DFLT in a controlled manner, and re-raise the signal. (But this is totally obscure; in most cases, a parent process is satisfied if the child exits with status 1 or so in case the child sees an EPIPE.) So, why not set the disposition of SIGPIPE to SIG_IGN indiscriminately? Thanks Laszlo