Richard W.M. Jones
2019-Sep-12 10:02 UTC
[Libguestfs] [PATCH libnbd 1/2] nbd_connect_tcp: Try to return errno from underlying connect(2) call.
When we make a TCP connection we have to make multiple underlying
connect(2) calls, once for each address returned by getaddrinfo.
Unfortunately this meant that we lost the errno from any of these
calls:
$ nbdsh -c 'h.connect_tcp ("localhost", "nbd")'
nbd.Error: nbd_connect_tcp: connect: localhost:nbd: could not connect to remote
host
This commit saves the errno from the first failed connect(2):
$ ./run nbdsh -c 'h.connect_tcp ("localhost",
"nbd")'
nbd.Error: nbd_connect_tcp: connect: localhost:nbd: could not connect to remote
host: Connection refused (ECONNREFUSED)
---
generator/states-connect.c | 12 ++++++++++--
lib/internal.h | 1 +
2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/generator/states-connect.c b/generator/states-connect.c
index 9e2e1d4..e9b3582 100644
--- a/generator/states-connect.c
+++ b/generator/states-connect.c
@@ -128,6 +128,8 @@ disable_nagle (int sock)
h->result = NULL;
}
+ h->connect_errno = 0;
+
memset (&h->hints, 0, sizeof h->hints);
h->hints.ai_family = AF_UNSPEC;
h->hints.ai_socktype = SOCK_STREAM;
@@ -160,7 +162,8 @@ disable_nagle (int sock)
* Save errno from most recent connect(2) call. XXX
*/
SET_NEXT_STATE (%^START);
- set_error (0, "connect: %s:%s: could not connect to remote host",
+ set_error (h->connect_errno,
+ "connect: %s:%s: could not connect to remote host",
h->hostname, h->port);
return -1;
}
@@ -182,6 +185,8 @@ disable_nagle (int sock)
if (connect (fd, h->rp->ai_addr, h->rp->ai_addrlen) == -1) {
if (errno != EINPROGRESS) {
+ if (h->connect_errno == 0)
+ h->connect_errno = errno;
SET_NEXT_STATE (%NEXT_ADDRESS);
return 0;
}
@@ -203,8 +208,11 @@ disable_nagle (int sock)
/* This checks the status of the original connect call. */
if (status == 0)
SET_NEXT_STATE (%^MAGIC.START);
- else
+ else {
+ if (h->connect_errno == 0)
+ h->connect_errno = status;
SET_NEXT_STATE (%NEXT_ADDRESS);
+ }
return 0;
CONNECT_TCP.NEXT_ADDRESS:
diff --git a/lib/internal.h b/lib/internal.h
index a48edff..ccaca32 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -188,6 +188,7 @@ struct nbd_handle {
char *hostname, *port;
struct addrinfo hints;
struct addrinfo *result, *rp;
+ int connect_errno;
/* When sending metadata contexts, this is used. */
size_t querynum;
--
2.23.0
Richard W.M. Jones
2019-Sep-12 10:02 UTC
[Libguestfs] [PATCH libnbd 2/2] interop: Retry TCP connections to qemu-nbd.
The test interop-qemu-nbd-tls-certs frequently fails on slow (32 bit)
machines in Fedora Koji. (Is crypto slow on these already overloaded
machines?)
As we cannot wait for a signal when qemu-nbd is ready start serving,
we have to use a sleep. The current sleep is 5 seconds, which is not
long enough. Making the sleep longer would work but is inconsiderate
for people using faster machines. Therefore replace this with a retry
loop with exponential backoff.
I tested this with a simple wrapper around qemu-nbd which did:
sleep 5; exec /usr/bin/qemu-nbd "$@"
---
interop/interop.c | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/interop/interop.c b/interop/interop.c
index 662d871..a3ab39b 100644
--- a/interop/interop.c
+++ b/interop/interop.c
@@ -28,6 +28,7 @@
#include <fcntl.h>
#include <time.h>
#include <signal.h>
+#include <errno.h>
#include <sys/types.h>
#include <libnbd.h>
@@ -44,6 +45,7 @@ main (int argc, char *argv[])
int port;
char port_str[16];
pid_t pid = -1;
+ int retry;
#endif
int64_t actual_size;
char buf[512];
@@ -114,14 +116,19 @@ main (int argc, char *argv[])
}
/* Unfortunately there's no good way to wait for qemu-nbd to start
- * serving, so ...
+ * serving, so we need to retry here.
*/
- sleep (5);
-
- if (nbd_connect_tcp (nbd, "localhost", port_str) == -1) {
- fprintf (stderr, "%s\n", nbd_get_error ());
- goto out;
+ for (retry = 0; retry < 5; ++retry) {
+ sleep (1 << retry);
+ if (nbd_connect_tcp (nbd, "localhost", port_str) == -1) {
+ fprintf (stderr, "%s\n", nbd_get_error ());
+ if (nbd_get_errno () != ECONNREFUSED)
+ goto out;
+ }
+ else break;
}
+ if (retry == 5)
+ goto out;
#else /* !SERVE_OVER_TCP */
--
2.23.0
Eric Blake
2019-Sep-12 11:44 UTC
Re: [Libguestfs] [PATCH libnbd 2/2] interop: Retry TCP connections to qemu-nbd.
On 9/12/19 5:02 AM, Richard W.M. Jones wrote:> The test interop-qemu-nbd-tls-certs frequently fails on slow (32 bit) > machines in Fedora Koji. (Is crypto slow on these already overloaded > machines?) > > As we cannot wait for a signal when qemu-nbd is ready start serving, > we have to use a sleep. The current sleep is 5 seconds, which is not > long enough. Making the sleep longer would work but is inconsiderate > for people using faster machines. Therefore replace this with a retry > loop with exponential backoff. > > I tested this with a simple wrapper around qemu-nbd which did: > > sleep 5; exec /usr/bin/qemu-nbd "$@" > --- > interop/interop.c | 19 +++++++++++++------ > 1 file changed, 13 insertions(+), 6 deletions(-) >> /* Unfortunately there's no good way to wait for qemu-nbd to start > - * serving, so ... > + * serving, so we need to retry here. > */qemu 4.1 added --pid-file, where you can test for that file existing to know when the server is up and running. But we still want an adaptive exponential backoff whether we test for a file or just try connections, so this patch is fine.> - sleep (5); > - > - if (nbd_connect_tcp (nbd, "localhost", port_str) == -1) { > - fprintf (stderr, "%s\n", nbd_get_error ()); > - goto out; > + for (retry = 0; retry < 5; ++retry) { > + sleep (1 << retry); > + if (nbd_connect_tcp (nbd, "localhost", port_str) == -1) { > + fprintf (stderr, "%s\n", nbd_get_error ()); > + if (nbd_get_errno () != ECONNREFUSED) > + goto out; > + } > + else break; > } > + if (retry == 5) > + goto out; > > #else /* !SERVE_OVER_TCP */ > >ACK series -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Reasonably Related Threads
- [PATCH libnbd 0/2] Change qemu-nbd interop tests to use socket activation.
- [PATCH libnbd 2/2] api: Add support for AF_VSOCK.
- [PATCH libnbd 2/2] api: Implement local command with systemd socket activation.
- [PATCH libnbd 0/2] api: Add support for AF_VSOCK.
- [PATCH libnbd 2/2] interop: Retry TCP connections to qemu-nbd.