Eric Blake
2022-Aug-12 02:06 UTC
[Libguestfs] [libnbd PATCH 0/4] Add some resilience to unexpected structured replies
While working on 64-bit extensions for NBD, I found myself getting annoyed that the slightest bug kills the entire libnbd connection, rather than trying to resynchronize with the server, even though the intent of structured replies is to make resync easier. Eric Blake (4): generator: Use static buffer for discaring reads generator: Refactor handling of structured replies states: Add new RESYNC state for bad structured replies states: Use RESYNC to handle more structured reply server bugs generator/state_machine.ml | 9 +- generator/states-reply-structured.c | 228 ++++++++++++++-------------- generator/states.c | 37 ++--- 3 files changed, 141 insertions(+), 133 deletions(-) -- 2.37.1
Eric Blake
2022-Aug-12 02:06 UTC
[Libguestfs] [libnbd PATCH 1/4] generator: Use static buffer for discaring reads
No need to malloc()/free() a throw-away buffer, when we can just use a static one. We normally don't even have to worry about thread-safety issues between separate connections (as long as we never read the buffer, we don't care if separate nbd handles clobber the same buffer at the same time); but for DUMP_PACKETS, we have to use stack allocations instead of static allocation (this makes stack utilization much larger, but this is okay for an expensive debugging mode that isn't even going to be compiled in normal practice). --- generator/states.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/generator/states.c b/generator/states.c index 654281c..d2aa51d 100644 --- a/generator/states.c +++ b/generator/states.c @@ -1,5 +1,5 @@ /* nbd client library in userspace: state machine - * Copyright (C) 2013-2020 Red Hat Inc. + * Copyright (C) 2013-2022 Red Hat Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -44,33 +44,39 @@ static int recv_into_rbuf (struct nbd_handle *h) { ssize_t r; - char *buf = NULL; void *rbuf; size_t rlen; - if (h->rlen == 0) - return 0; /* move to next state */ - /* As a special case h->rbuf is allowed to be NULL, meaning * throw away the data. + * + * When building with DUMP_PACKETS, it's worth debugging even + * discarded packets; this makes our stack frame larger, but + * DUMP_PACKETS is already for developers. Otherwise, we share a + * single static sink buffer across all nbd handles; we don't care + * about thread-safety issues with two clients discarding data at + * the same time, because we never read the sink. */ +#ifdef DUMP_PACKETS + char sink[1024]; +#else + static char sink[BUFSIZ]; +#endif + + if (h->rlen == 0) + return 0; /* move to next state */ + if (h->rbuf) { rbuf = h->rbuf; rlen = h->rlen; } else { - buf = malloc (BUFSIZ); - if (buf == NULL) { - set_error (errno, "malloc"); - return -1; - } - rbuf = buf; - rlen = MIN (h->rlen, BUFSIZ); + rbuf = sink; + rlen = MIN (h->rlen, sizeof sink); } r = h->sock->ops->recv (h, h->sock, rbuf, rlen); if (r == -1) { - free (buf); if (errno == EAGAIN || errno == EWOULDBLOCK) return 1; /* more data */ /* sock->ops->recv called set_error already. */ @@ -78,17 +84,14 @@ recv_into_rbuf (struct nbd_handle *h) } if (r == 0) { set_error (0, "recv: server disconnected unexpectedly"); - free (buf); return -1; } #ifdef DUMP_PACKETS - if (h->rbuf != NULL) - nbd_internal_hexdump (h->rbuf, r, stderr); + nbd_internal_hexdump (rbuf, r, stderr); #endif if (h->rbuf) h->rbuf += r; h->rlen -= r; - free (buf); if (h->rlen == 0) return 0; /* move to next state */ else -- 2.37.1
Eric Blake
2022-Aug-12 02:06 UTC
[Libguestfs] [libnbd PATCH 2/4] generator: Refactor handling of structured replies
Convert an if-else chain into a switch statement. Upcoming patches will add even more recognized reply types, at which point direct dispatch is smarter. No semantic change; the biggest part of the diff is code motion to consolidate the handling of error vs. unknown reply types into a single default label. --- generator/states-reply-structured.c | 77 +++++++++++++++-------------- 1 file changed, 40 insertions(+), 37 deletions(-) diff --git a/generator/states-reply-structured.c b/generator/states-reply-structured.c index b209b41..2159798 100644 --- a/generator/states-reply-structured.c +++ b/generator/states-reply-structured.c @@ -98,32 +98,22 @@ STATE_MACHINE { return 0; } - if (NBD_REPLY_TYPE_IS_ERR (type)) { - if (length < sizeof h->sbuf.sr.payload.error.error) { - SET_NEXT_STATE (%.DEAD); - set_error (0, "too short length in structured reply error"); - return 0; - } - h->rbuf = &h->sbuf.sr.payload.error.error; - h->rlen = sizeof h->sbuf.sr.payload.error.error; - SET_NEXT_STATE (%RECV_ERROR); - return 0; - } - else if (type == NBD_REPLY_TYPE_NONE) { + switch (type) { + case NBD_REPLY_TYPE_NONE: if (length != 0) { SET_NEXT_STATE (%.DEAD); set_error (0, "invalid length in NBD_REPLY_TYPE_NONE"); - return 0; + break; } if (!(flags & NBD_REPLY_FLAG_DONE)) { SET_NEXT_STATE (%.DEAD); set_error (0, "NBD_REPLY_FLAG_DONE must be set in NBD_REPLY_TYPE_NONE"); - return 0; + break; } SET_NEXT_STATE (%FINISH); - return 0; - } - else if (type == NBD_REPLY_TYPE_OFFSET_DATA) { + break; + + case NBD_REPLY_TYPE_OFFSET_DATA: /* The spec states that 0-length requests are unspecified, but * 0-length replies are broken. Still, it's easy enough to support * them as an extension, so we use < instead of <=. @@ -134,56 +124,56 @@ STATE_MACHINE { "cmd->type=%" PRIu16 ", " "this is likely to be a bug in the server", cmd->type); - return 0; + break; } if (length < sizeof h->sbuf.sr.payload.offset_data) { SET_NEXT_STATE (%.DEAD); set_error (0, "too short length in NBD_REPLY_TYPE_OFFSET_DATA"); - return 0; + break; } h->rbuf = &h->sbuf.sr.payload.offset_data; h->rlen = sizeof h->sbuf.sr.payload.offset_data; SET_NEXT_STATE (%RECV_OFFSET_DATA); - return 0; - } - else if (type == NBD_REPLY_TYPE_OFFSET_HOLE) { + break; + + case NBD_REPLY_TYPE_OFFSET_HOLE: if (cmd->type != NBD_CMD_READ) { SET_NEXT_STATE (%.DEAD); set_error (0, "invalid command for receiving offset-hole chunk, " "cmd->type=%" PRIu16 ", " "this is likely to be a bug in the server", cmd->type); - return 0; + break; } if (length != sizeof h->sbuf.sr.payload.offset_hole) { SET_NEXT_STATE (%.DEAD); set_error (0, "invalid length in NBD_REPLY_TYPE_OFFSET_HOLE"); - return 0; + break; } h->rbuf = &h->sbuf.sr.payload.offset_hole; h->rlen = sizeof h->sbuf.sr.payload.offset_hole; SET_NEXT_STATE (%RECV_OFFSET_HOLE); - return 0; - } - else if (type == NBD_REPLY_TYPE_BLOCK_STATUS) { + break; + + case NBD_REPLY_TYPE_BLOCK_STATUS: if (cmd->type != NBD_CMD_BLOCK_STATUS) { SET_NEXT_STATE (%.DEAD); set_error (0, "invalid command for receiving block-status chunk, " "cmd->type=%" PRIu16 ", " "this is likely to be a bug in the server", cmd->type); - return 0; + break; } /* XXX We should be able to skip the bad reply in these two cases. */ if (length < 12 || ((length-4) & 7) != 0) { SET_NEXT_STATE (%.DEAD); set_error (0, "invalid length in NBD_REPLY_TYPE_BLOCK_STATUS"); - return 0; + break; } if (CALLBACK_IS_NULL (cmd->cb.fn.extent)) { SET_NEXT_STATE (%.DEAD); set_error (0, "not expecting NBD_REPLY_TYPE_BLOCK_STATUS here"); - return 0; + break; } /* We read the context ID followed by all the entries into a * single array and deal with it at the end. @@ -193,18 +183,31 @@ STATE_MACHINE { if (h->bs_entries == NULL) { SET_NEXT_STATE (%.DEAD); set_error (errno, "malloc"); - return 0; + break; } h->rbuf = h->bs_entries; h->rlen = length; SET_NEXT_STATE (%RECV_BS_ENTRIES); - return 0; - } - else { - SET_NEXT_STATE (%.DEAD); - set_error (0, "unknown structured reply type (%" PRIu16 ")", type); - return 0; + break; + + default: + if (NBD_REPLY_TYPE_IS_ERR (type)) { + if (length < sizeof h->sbuf.sr.payload.error.error) { + SET_NEXT_STATE (%.DEAD); + set_error (0, "too short length in structured reply error"); + break; + } + h->rbuf = &h->sbuf.sr.payload.error.error; + h->rlen = sizeof h->sbuf.sr.payload.error.error; + SET_NEXT_STATE (%RECV_ERROR); + } + else { + SET_NEXT_STATE (%.DEAD); + set_error (0, "unknown structured reply type (%" PRIu16 ")", type); + } + break; } + return 0; REPLY.STRUCTURED_REPLY.RECV_ERROR: uint32_t length, msglen; -- 2.37.1
Eric Blake
2022-Aug-12 02:06 UTC
[Libguestfs] [libnbd PATCH 3/4] states: Add new RESYNC state for bad structured replies
Instead of killing the connection when the server gives us an unexpected structured reply for a known command cookie, we can just ignore any payload, then mark that particular command as failed with EPROTO and proceed to the next command. For ease of review, this patch just uses the new state for a server that replies with an un-negotiated structured reply to a client normally expecting only simple replies; the next patch will then tweak other error handling spots within structured reply handling that can also utilize this state instead of going to %.DEAD. A compliant server will never trigger this state; but I was able to test it by quickly hacking nbdkit as follows: | diff --git i/server/protocol.c w/server/protocol.c | index 2ac77055..99f1bec1 100644 | --- i/server/protocol.c | +++ w/server/protocol.c | @@ -738,7 +738,7 @@ protocol_recv_request_send_reply (void) | * us from sending human-readable error messages to the client, so | * we should reconsider this in future. | */ | - if (conn->structured_replies && | + if (/*conn->structured_replies &&*/ | (cmd == NBD_CMD_READ || cmd == NBD_CMD_BLOCK_STATUS)) { | if (!error) { | if (cmd == NBD_CMD_READ) With the following command line: $ ./run /path/to/nbdkit --no-sr -U - memory 1M --run 'nbdsh -u "$uri" -c "\ try: h.pread(1, 0) except nbd.Error as ex: print(ex.string) h.flush() print(h.get_size()) "' the pre-patch results show that we killed the connection without reading the server's reply payload: nbd_pread: server sent unexpected structured reply nbdkit: memory.0: error: read request: Connection reset by peer nbdsh: command line script failed: nbd_flush: invalid state: DEAD: the handle must be connected with the server: Invalid argument while post-patch, we are now able to successfully flush despite the intermediate failure to read: nbd_pread: read: command failed: Protocol error 1048576 Running with LIBNBD_DEBUG=1 gives more details about the client's observation of a server bug. --- generator/state_machine.ml | 9 ++++++- generator/states-reply-structured.c | 38 +++++++++++++++++++++++++---- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/generator/state_machine.ml b/generator/state_machine.ml index 2cfad54..4d4635f 100644 --- a/generator/state_machine.ml +++ b/generator/state_machine.ml @@ -1,6 +1,6 @@ (* hey emacs, this is OCaml code: -*- tuareg -*- *) (* nbd client library in userspace: state machine definition - * Copyright (C) 2013-2020 Red Hat Inc. + * Copyright (C) 2013-2022 Red Hat Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -871,6 +871,13 @@ and external_events = []; }; + State { + default_state with + name = "RESYNC"; + comment = "Ignore payload of an unexpected structured reply"; + external_events = []; + }; + State { default_state with name = "FINISH"; diff --git a/generator/states-reply-structured.c b/generator/states-reply-structured.c index 2159798..752468c 100644 --- a/generator/states-reply-structured.c +++ b/generator/states-reply-structured.c @@ -48,11 +48,6 @@ STATE_MACHINE { /* We've only read the simple_reply. The structured_reply is longer, * so read the remaining part. */ - if (!h->structured_replies) { - set_error (0, "server sent unexpected structured reply"); - SET_NEXT_STATE(%.DEAD); - return 0; - } h->rbuf = &h->sbuf; h->rbuf += sizeof h->sbuf.simple_reply; h->rlen = sizeof h->sbuf.sr.structured_reply; @@ -98,6 +93,13 @@ STATE_MACHINE { return 0; } + if (!h->structured_replies) { + h->rbuf = NULL; + h->rlen = length; + SET_NEXT_STATE (%RESYNC); + return 0; + } + switch (type) { case NBD_REPLY_TYPE_NONE: if (length != 0) { @@ -513,6 +515,32 @@ STATE_MACHINE { } return 0; + REPLY.STRUCTURED_REPLY.RESYNC: + struct command *cmd = h->reply_cmd; + uint16_t type; + uint32_t length; + + assert (cmd); + assert (h->rbuf == NULL); + switch (recv_into_rbuf (h)) { + case -1: SET_NEXT_STATE (%.DEAD); return 0; + case 1: + save_reply_state (h); + SET_NEXT_STATE (%.READY); + return 0; + case 0: + type = be16toh (h->sbuf.sr.structured_reply.type); + length = be32toh (h->sbuf.sr.structured_reply.length); + debug (h, "unexpected reply type %u or payload length %" PRIu32 + " for cookie %" PRIu64 " and command %" PRIu32 + ", this is probably a server bug", + type, length, cmd->cookie, cmd->type); + if (cmd->error == 0) + cmd->error = EPROTO; + SET_NEXT_STATE (%FINISH); + return 0; + } + REPLY.STRUCTURED_REPLY.FINISH: uint16_t flags; -- 2.37.1
Eric Blake
2022-Aug-12 02:06 UTC
[Libguestfs] [libnbd PATCH 4/4] states: Use RESYNC to handle more structured reply server bugs
In software, it's always better to be strict in what you produce and lenient in what you accept. Continuing on from the previous patch, there are quite a few situations where we are expecting a particular structured reply, but can still gracefully recover and keep the connection alive if the server sends us something unexpected (either a wrong reply type, or a wrong length to a correct reply type). There are only a few situations left where we really do want a structured reply to move to DEAD which are unrelated to read() errors on the socket itself; those include when the server's payload is so large as to be a denial of service, or if malloc() fails. While touching the code, note that once we check that cmd->type is NBD_CMD_BLOCK_STATUS, we do not also need to check whether cmd->cb.fn.extent is non-null. The handling for NBD_REPLY_TYPE_ERROR_* is interesting: since the error value comes first, even if we don't get a full error packet over the wire, we can prefer the server's errno over EPROTO. But that means error messages should not use RESYNC except when we don't get a full error value. This patch also changes the code to use EPROTO instead of EINVAL if the server mistakenly sends NBD_SUCCESS as its error code. Again, compliant servers will never trip over to the new state; but an easy way to demonstrate this in action is with a temporary patch to nbdkit: | diff --git i/common/protocol/nbd-protocol.h w/common/protocol/nbd-protocol.h | index e5d6404b..1e05d825 100644 | --- i/common/protocol/nbd-protocol.h | +++ w/common/protocol/nbd-protocol.h | @@ -242,7 +242,7 @@ struct nbd_structured_reply_error { | | /* Structured reply types. */ | #define NBD_REPLY_TYPE_NONE 0 | -#define NBD_REPLY_TYPE_OFFSET_DATA 1 | +#define NBD_REPLY_TYPE_OFFSET_DATA 11 | #define NBD_REPLY_TYPE_OFFSET_HOLE 2 | #define NBD_REPLY_TYPE_BLOCK_STATUS 5 | #define NBD_REPLY_TYPE_ERROR NBD_REPLY_TYPE_ERR (1) With the following command line: $ ./run /path/to/nbdkit -U - memory 1M --run 'nbdsh -u "$uri" -c "\ try: h.pread(1, 0) except nbd.Error as ex: print(ex.string) h.flush() print(h.get_size()) "' pre-patch results show the client hanging up abruptly on the server: nbd_pread: unknown structured reply type (11) nbdsh: command line script failed: nbd_flush: invalid state: DEAD: the handle must be connected with the server: Invalid argument nbdkit: memory.1: error: read request: Connection reset by peer while post-patch flags the server error, but allows the next command: nbd_pread: read: command failed: Protocol error 1048576 Reading the LIBNBD_DEBUG=1 log further shows: libnbd: debug: nbdsh: nbd_pread: unexpected reply type 11 or payload length 9 for cookie 1 and command 0, this is probably a server bug --- generator/states-reply-structured.c | 155 +++++++++++----------------- 1 file changed, 61 insertions(+), 94 deletions(-) diff --git a/generator/states-reply-structured.c b/generator/states-reply-structured.c index 752468c..db32873 100644 --- a/generator/states-reply-structured.c +++ b/generator/states-reply-structured.c @@ -94,6 +94,7 @@ STATE_MACHINE { } if (!h->structured_replies) { + resync: h->rbuf = NULL; h->rlen = length; SET_NEXT_STATE (%RESYNC); @@ -102,16 +103,8 @@ STATE_MACHINE { switch (type) { case NBD_REPLY_TYPE_NONE: - if (length != 0) { - SET_NEXT_STATE (%.DEAD); - set_error (0, "invalid length in NBD_REPLY_TYPE_NONE"); - break; - } - if (!(flags & NBD_REPLY_FLAG_DONE)) { - SET_NEXT_STATE (%.DEAD); - set_error (0, "NBD_REPLY_FLAG_DONE must be set in NBD_REPLY_TYPE_NONE"); - break; - } + if (length != 0 || !(flags & NBD_REPLY_FLAG_DONE)) + goto resync; SET_NEXT_STATE (%FINISH); break; @@ -120,63 +113,28 @@ STATE_MACHINE { * 0-length replies are broken. Still, it's easy enough to support * them as an extension, so we use < instead of <=. */ - if (cmd->type != NBD_CMD_READ) { - SET_NEXT_STATE (%.DEAD); - set_error (0, "invalid command for receiving offset-data chunk, " - "cmd->type=%" PRIu16 ", " - "this is likely to be a bug in the server", - cmd->type); - break; - } - if (length < sizeof h->sbuf.sr.payload.offset_data) { - SET_NEXT_STATE (%.DEAD); - set_error (0, "too short length in NBD_REPLY_TYPE_OFFSET_DATA"); - break; - } + if (cmd->type != NBD_CMD_READ || + length < sizeof h->sbuf.sr.payload.offset_data) + goto resync; h->rbuf = &h->sbuf.sr.payload.offset_data; h->rlen = sizeof h->sbuf.sr.payload.offset_data; SET_NEXT_STATE (%RECV_OFFSET_DATA); break; case NBD_REPLY_TYPE_OFFSET_HOLE: - if (cmd->type != NBD_CMD_READ) { - SET_NEXT_STATE (%.DEAD); - set_error (0, "invalid command for receiving offset-hole chunk, " - "cmd->type=%" PRIu16 ", " - "this is likely to be a bug in the server", - cmd->type); - break; - } - if (length != sizeof h->sbuf.sr.payload.offset_hole) { - SET_NEXT_STATE (%.DEAD); - set_error (0, "invalid length in NBD_REPLY_TYPE_OFFSET_HOLE"); - break; - } + if (cmd->type != NBD_CMD_READ || + length != sizeof h->sbuf.sr.payload.offset_hole) + goto resync; h->rbuf = &h->sbuf.sr.payload.offset_hole; h->rlen = sizeof h->sbuf.sr.payload.offset_hole; SET_NEXT_STATE (%RECV_OFFSET_HOLE); break; case NBD_REPLY_TYPE_BLOCK_STATUS: - if (cmd->type != NBD_CMD_BLOCK_STATUS) { - SET_NEXT_STATE (%.DEAD); - set_error (0, "invalid command for receiving block-status chunk, " - "cmd->type=%" PRIu16 ", " - "this is likely to be a bug in the server", - cmd->type); - break; - } - /* XXX We should be able to skip the bad reply in these two cases. */ - if (length < 12 || ((length-4) & 7) != 0) { - SET_NEXT_STATE (%.DEAD); - set_error (0, "invalid length in NBD_REPLY_TYPE_BLOCK_STATUS"); - break; - } - if (CALLBACK_IS_NULL (cmd->cb.fn.extent)) { - SET_NEXT_STATE (%.DEAD); - set_error (0, "not expecting NBD_REPLY_TYPE_BLOCK_STATUS here"); - break; - } + if (cmd->type != NBD_CMD_BLOCK_STATUS || + length < 12 || ((length-4) & 7) != 0) + goto resync; + assert (CALLBACK_IS_NOT_NULL (cmd->cb.fn.extent)); /* We read the context ID followed by all the entries into a * single array and deal with it at the end. */ @@ -194,25 +152,26 @@ STATE_MACHINE { default: if (NBD_REPLY_TYPE_IS_ERR (type)) { - if (length < sizeof h->sbuf.sr.payload.error.error) { - SET_NEXT_STATE (%.DEAD); - set_error (0, "too short length in structured reply error"); - break; - } + /* Any payload shorter than uint32_t cannot even carry an errno + * value; anything longer, even if it is not long enough to be + * compliant, will favor the wire error over EPROTO during more + * length checks in RECV_ERROR_MESSAGE and RECV_ERROR_TAIL. + */ + if (length < sizeof h->sbuf.sr.payload.error.error.error) + goto resync; h->rbuf = &h->sbuf.sr.payload.error.error; - h->rlen = sizeof h->sbuf.sr.payload.error.error; + h->rlen = MIN (length, sizeof h->sbuf.sr.payload.error.error); SET_NEXT_STATE (%RECV_ERROR); } - else { - SET_NEXT_STATE (%.DEAD); - set_error (0, "unknown structured reply type (%" PRIu16 ")", type); - } + else + goto resync; break; } return 0; REPLY.STRUCTURED_REPLY.RECV_ERROR: - uint32_t length, msglen; + struct command *cmd = h->reply_cmd; + uint32_t length, msglen, error; switch (recv_into_rbuf (h)) { case -1: SET_NEXT_STATE (%.DEAD); return 0; @@ -222,13 +181,25 @@ STATE_MACHINE { return 0; case 0: length = be32toh (h->sbuf.sr.structured_reply.length); + assert (length >= sizeof h->sbuf.sr.payload.error.error.error); + assert (cmd); + + if (length < sizeof h->sbuf.sr.payload.error.error) { + resync: + /* Favor the error packet's errno over RESYNC's EPROTO. */ + error = be32toh (h->sbuf.sr.payload.error.error.error); + if (cmd->error = 0) + cmd->error = nbd_internal_errno_of_nbd_error (error); + h->rbuf = NULL; + h->rlen = length - MIN (length, sizeof h->sbuf.sr.payload.error.error); + SET_NEXT_STATE (%RESYNC); + return 0; + } + msglen = be16toh (h->sbuf.sr.payload.error.error.len); if (msglen > length - sizeof h->sbuf.sr.payload.error.error || - msglen > sizeof h->sbuf.sr.payload.error.msg) { - SET_NEXT_STATE (%.DEAD); - set_error (0, "error message length too large"); - return 0; - } + msglen > sizeof h->sbuf.sr.payload.error.msg) + goto resync; h->rbuf = h->sbuf.sr.payload.error.msg; h->rlen = msglen; @@ -257,24 +228,21 @@ STATE_MACHINE { debug (h, "structured error server message: %.*s", (int) msglen, h->sbuf.sr.payload.error.msg); - /* Special case two specific errors; ignore the tail for all others */ + /* Special case two specific errors; silently ignore tail for all others */ h->rbuf = NULL; h->rlen = length; switch (type) { case NBD_REPLY_TYPE_ERROR: - if (length != 0) { - SET_NEXT_STATE (%.DEAD); - set_error (0, "error payload length too large"); - return 0; - } + if (length != 0) + debug (h, "ignoring unexpected slop after error message, " + "the server may have a bug"); break; case NBD_REPLY_TYPE_ERROR_OFFSET: - if (length != sizeof h->sbuf.sr.payload.error.offset) { - SET_NEXT_STATE (%.DEAD); - set_error (0, "invalid error payload length"); - return 0; - } - h->rbuf = &h->sbuf.sr.payload.error.offset; + if (length == sizeof h->sbuf.sr.payload.error.offset) + debug (h, "unable to safely extract error offset, " + "the server may have a bug"); + else + h->rbuf = &h->sbuf.sr.payload.error.offset; break; } SET_NEXT_STATE (%RECV_ERROR_TAIL); @@ -300,22 +268,19 @@ STATE_MACHINE { assert (cmd); /* guaranteed by CHECK */ /* The spec requires the server to send a non-zero error */ - if (error == NBD_SUCCESS) { - debug (h, "server forgot to set error; using EINVAL"); - error = NBD_EINVAL; - } error = nbd_internal_errno_of_nbd_error (error); + if (error == 0) { + debug (h, "server forgot to set error; using EPROTO"); + error = EPROTO; + } /* Sanity check that any error offset is in range, then invoke - * user callback if present. + * user callback if present. Ignore the offset if it was bogus. */ - if (type == NBD_REPLY_TYPE_ERROR_OFFSET) { + if (type == NBD_REPLY_TYPE_ERROR_OFFSET && h->rbuf) { offset = be64toh (h->sbuf.sr.payload.error.offset); - if (! structured_reply_in_bounds (offset, 0, cmd)) { - SET_NEXT_STATE (%.DEAD); - return 0; - } - if (cmd->type == NBD_CMD_READ && + if (structured_reply_in_bounds (offset, 0, cmd) && + cmd->type == NBD_CMD_READ && CALLBACK_IS_NOT_NULL (cmd->cb.fn.chunk)) { int scratch = error; @@ -330,6 +295,8 @@ STATE_MACHINE { if (cmd->error == 0) cmd->error = scratch; } + else + debug (h, "no use for error offset %" PRIu64, offset); } /* Preserve first error encountered */ -- 2.37.1