Eric Blake
2023-May-15 19:53 UTC
[Libguestfs] [PATCH v3 14/14] nbd/server: Add FLAG_PAYLOAD support to CMD_BLOCK_STATUS
Allow a client to request a subset of negotiated meta contexts. For example, a client may ask to use a single connection to learn about both block status and dirty bitmaps, but where the dirty bitmap queries only need to be performed on a subset of the disk; forcing the server to compute that information on block status queries in the rest of the disk is wasted effort (both at the server, and on the amount of traffic sent over the wire to be parsed and ignored by the client). Qemu as an NBD client never requests to use more than one meta context, so it has no need to use block status payloads. Testing this instead requires support from libnbd, which CAN access multiple meta contexts in parallel from a single NBD connection; an interop test submitted to the libnbd project at the same time as this patch demonstrates the feature working, as well as testing some corner cases (for example, when the payload length is longer than the export length), although other corner cases (like passing the same id duplicated) requires a protocol fuzzer because libnbd is not wired up to break the protocol that badly. This also includes tweaks to 'qemu-nbd --list' to show when a server is advertising the capability, and to the testsuite to reflect the addition to that output. Signed-off-by: Eric Blake <eblake at redhat.com> --- docs/interop/nbd.txt | 2 +- include/block/nbd.h | 32 ++++-- nbd/server.c | 106 +++++++++++++++++- qemu-nbd.c | 1 + nbd/trace-events | 1 + tests/qemu-iotests/223.out | 12 +- tests/qemu-iotests/307.out | 10 +- .../tests/nbd-qemu-allocation.out | 2 +- 8 files changed, 136 insertions(+), 30 deletions(-) diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt index abaf4c28a96..83d85ce8d13 100644 --- a/docs/interop/nbd.txt +++ b/docs/interop/nbd.txt @@ -69,4 +69,4 @@ NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE NBD_CMD_FLAG_FAST_ZERO * 5.2: NBD_CMD_BLOCK_STATUS for "qemu:allocation-depth" * 7.1: NBD_FLAG_CAN_MULTI_CONN for shareable writable exports -* 8.1: NBD_OPT_EXTENDED_HEADERS +* 8.1: NBD_OPT_EXTENDED_HEADERS, NBD_FLAG_BLOCK_STATUS_PAYLOAD diff --git a/include/block/nbd.h b/include/block/nbd.h index 6696d61bd59..3d8d7150121 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -175,6 +175,12 @@ typedef struct NBDExtentExt { uint64_t flags; /* NBD_STATE_* */ } QEMU_PACKED NBDExtentExt; +/* Client payload for limiting NBD_CMD_BLOCK_STATUS reply */ +typedef struct NBDBlockStatusPayload { + uint64_t effect_length; + /* uint32_t ids[] follows, array length implied by header */ +} QEMU_PACKED NBDBlockStatusPayload; + /* Transmission (export) flags: sent from server to client during handshake, but describe what will happen during transmission */ enum { @@ -191,20 +197,22 @@ enum { NBD_FLAG_SEND_RESIZE_BIT = 9, /* Send resize */ NBD_FLAG_SEND_CACHE_BIT = 10, /* Send CACHE (prefetch) */ NBD_FLAG_SEND_FAST_ZERO_BIT = 11, /* FAST_ZERO flag for WRITE_ZEROES */ + NBD_FLAG_BLOCK_STAT_PAYLOAD_BIT = 12, /* PAYLOAD flag for BLOCK_STATUS */ }; -#define NBD_FLAG_HAS_FLAGS (1 << NBD_FLAG_HAS_FLAGS_BIT) -#define NBD_FLAG_READ_ONLY (1 << NBD_FLAG_READ_ONLY_BIT) -#define NBD_FLAG_SEND_FLUSH (1 << NBD_FLAG_SEND_FLUSH_BIT) -#define NBD_FLAG_SEND_FUA (1 << NBD_FLAG_SEND_FUA_BIT) -#define NBD_FLAG_ROTATIONAL (1 << NBD_FLAG_ROTATIONAL_BIT) -#define NBD_FLAG_SEND_TRIM (1 << NBD_FLAG_SEND_TRIM_BIT) -#define NBD_FLAG_SEND_WRITE_ZEROES (1 << NBD_FLAG_SEND_WRITE_ZEROES_BIT) -#define NBD_FLAG_SEND_DF (1 << NBD_FLAG_SEND_DF_BIT) -#define NBD_FLAG_CAN_MULTI_CONN (1 << NBD_FLAG_CAN_MULTI_CONN_BIT) -#define NBD_FLAG_SEND_RESIZE (1 << NBD_FLAG_SEND_RESIZE_BIT) -#define NBD_FLAG_SEND_CACHE (1 << NBD_FLAG_SEND_CACHE_BIT) -#define NBD_FLAG_SEND_FAST_ZERO (1 << NBD_FLAG_SEND_FAST_ZERO_BIT) +#define NBD_FLAG_HAS_FLAGS (1 << NBD_FLAG_HAS_FLAGS_BIT) +#define NBD_FLAG_READ_ONLY (1 << NBD_FLAG_READ_ONLY_BIT) +#define NBD_FLAG_SEND_FLUSH (1 << NBD_FLAG_SEND_FLUSH_BIT) +#define NBD_FLAG_SEND_FUA (1 << NBD_FLAG_SEND_FUA_BIT) +#define NBD_FLAG_ROTATIONAL (1 << NBD_FLAG_ROTATIONAL_BIT) +#define NBD_FLAG_SEND_TRIM (1 << NBD_FLAG_SEND_TRIM_BIT) +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << NBD_FLAG_SEND_WRITE_ZEROES_BIT) +#define NBD_FLAG_SEND_DF (1 << NBD_FLAG_SEND_DF_BIT) +#define NBD_FLAG_CAN_MULTI_CONN (1 << NBD_FLAG_CAN_MULTI_CONN_BIT) +#define NBD_FLAG_SEND_RESIZE (1 << NBD_FLAG_SEND_RESIZE_BIT) +#define NBD_FLAG_SEND_CACHE (1 << NBD_FLAG_SEND_CACHE_BIT) +#define NBD_FLAG_SEND_FAST_ZERO (1 << NBD_FLAG_SEND_FAST_ZERO_BIT) +#define NBD_FLAG_BLOCK_STAT_PAYLOAD (1 << NBD_FLAG_BLOCK_STAT_PAYLOAD_BIT) /* New-style handshake (global) flags, sent from server to client, and control what will happen during handshake phase. */ diff --git a/nbd/server.c b/nbd/server.c index db550c82cd2..ce11285c0d7 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -442,9 +442,9 @@ static int nbd_negotiate_handle_list(NBDClient *client, Error **errp) return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); } -static void nbd_check_meta_export(NBDClient *client) +static void nbd_check_meta_export(NBDClient *client, NBDExport *exp) { - if (client->exp != client->context_exp) { + if (exp != client->context_exp) { client->contexts.count = 0; } } @@ -491,11 +491,15 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, error_setg(errp, "export not found"); return -EINVAL; } + nbd_check_meta_export(client, client->exp); myflags = client->exp->nbdflags; if (client->header_style >= NBD_HEADER_STRUCTURED) { myflags |= NBD_FLAG_SEND_DF; } + if (client->extended_headers && client->contexts.count) { + myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD; + } trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags); stq_be_p(buf, client->exp->size); stw_be_p(buf + 8, myflags); @@ -508,7 +512,6 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); blk_exp_ref(&client->exp->common); - nbd_check_meta_export(client); return 0; } @@ -628,6 +631,9 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) errp, "export '%s' not present", sane_name); } + if (client->opt == NBD_OPT_GO) { + nbd_check_meta_export(client, exp); + } /* Don't bother sending NBD_INFO_NAME unless client requested it */ if (sendname) { @@ -681,6 +687,10 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) if (client->header_style >= NBD_HEADER_STRUCTURED) { myflags |= NBD_FLAG_SEND_DF; } + if (client->extended_headers && + (client->contexts.count || client->opt == NBD_OPT_INFO)) { + myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD; + } trace_nbd_negotiate_new_style_size_flags(exp->size, myflags); stq_be_p(buf, exp->size); stw_be_p(buf + 8, myflags); @@ -716,7 +726,6 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) client->check_align = check_align; QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); blk_exp_ref(&client->exp->common); - nbd_check_meta_export(client); rc = 1; } return rc; @@ -2415,6 +2424,83 @@ static int coroutine_fn nbd_co_send_bitmap(NBDClient *client, return nbd_co_send_extents(client, request, ea, last, context_id, errp); } +/* + * nbd_co_block_status_payload_read + * Called when a client wants a subset of negotiated contexts via a + * BLOCK_STATUS payload. Check the payload for valid length and + * contents. On success, return 0 with request updated to effective + * length. If request was invalid but payload consumed, return 0 with + * request->len and request->contexts.count set to 0 (which will + * trigger an appropriate NBD_EINVAL response later on). On I/O + * error, return -EIO. + */ +static int +nbd_co_block_status_payload_read(NBDClient *client, NBDRequest *request, + Error **errp) +{ + int payload_len = request->len; + g_autofree char *buf = NULL; + g_autofree bool *bitmaps = NULL; + size_t count, i; + uint32_t id; + + assert(request->len <= NBD_MAX_BUFFER_SIZE); + if (payload_len % sizeof(uint32_t) || + payload_len < sizeof(NBDBlockStatusPayload) || + payload_len > (sizeof(NBDBlockStatusPayload) + + sizeof(id) * client->contexts.count)) { + goto skip; + } + + buf = g_malloc(payload_len); + if (nbd_read(client->ioc, buf, payload_len, + "CMD_BLOCK_STATUS data", errp) < 0) { + return -EIO; + } + trace_nbd_co_receive_request_payload_received(request->handle, + payload_len); + memset(&request->contexts, 0, sizeof(request->contexts)); + request->contexts.nr_bitmaps = client->context_exp->nr_export_bitmaps; + bitmaps = g_new0(bool, request->contexts.nr_bitmaps); + count = (payload_len - sizeof(NBDBlockStatusPayload)) / sizeof(id); + payload_len = 0; + + for (i = 0; i < count; i++) { + + id = ldl_be_p(buf + sizeof(NBDBlockStatusPayload) + sizeof(id) * i); + if (id == NBD_META_ID_BASE_ALLOCATION) { + if (request->contexts.base_allocation) { + goto skip; + } + request->contexts.base_allocation = true; + } else if (id == NBD_META_ID_ALLOCATION_DEPTH) { + if (request->contexts.allocation_depth) { + goto skip; + } + request->contexts.allocation_depth = true; + } else { + if (id - NBD_META_ID_DIRTY_BITMAP > + request->contexts.nr_bitmaps || + bitmaps[id - NBD_META_ID_DIRTY_BITMAP]) { + goto skip; + } + bitmaps[id - NBD_META_ID_DIRTY_BITMAP] = true; + } + } + + request->len = ldq_be_p(buf); + request->contexts.count = count; + request->contexts.bitmaps = bitmaps; + bitmaps = NULL; + return 0; + + skip: + trace_nbd_co_receive_block_status_payload_compliance(request->from, + request->len); + request->len = request->contexts.count = 0; + return nbd_drop(client->ioc, payload_len, errp); +} + /* nbd_co_receive_request * Collect a client request. Return 0 if request looks valid, -EIO to drop * connection right away, -EAGAIN to indicate we were interrupted and the @@ -2461,7 +2547,14 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, NBDRequest * if (request->type == NBD_CMD_WRITE || extended_with_payload) { payload_len = request->len; - if (request->type != NBD_CMD_WRITE) { + if (request->type == NBD_CMD_BLOCK_STATUS) { + payload_len = nbd_co_block_status_payload_read(client, + request, + errp); + if (payload_len < 0) { + return -EIO; + } + } else if (request->type != NBD_CMD_WRITE) { /* * For now, we don't support payloads on other * commands; but we can keep the connection alive. @@ -2540,6 +2633,9 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, NBDRequest * valid_flags |= NBD_CMD_FLAG_NO_HOLE | NBD_CMD_FLAG_FAST_ZERO; } else if (request->type == NBD_CMD_BLOCK_STATUS) { valid_flags |= NBD_CMD_FLAG_REQ_ONE; + if (client->extended_headers && client->contexts.count) { + valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN; + } } if (request->flags & ~valid_flags) { error_setg(errp, "unsupported flags for command %s (got 0x%x)", diff --git a/qemu-nbd.c b/qemu-nbd.c index 8c35442626a..b7ab0fdc791 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -222,6 +222,7 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls, [NBD_FLAG_SEND_RESIZE_BIT] = "resize", [NBD_FLAG_SEND_CACHE_BIT] = "cache", [NBD_FLAG_SEND_FAST_ZERO_BIT] = "fast-zero", + [NBD_FLAG_BLOCK_STAT_PAYLOAD_BIT] = "block-status-payload", }; printf(" size: %" PRIu64 "\n", list[i].size); diff --git a/nbd/trace-events b/nbd/trace-events index c20df33a431..da92fe1b56b 100644 --- a/nbd/trace-events +++ b/nbd/trace-events @@ -70,6 +70,7 @@ nbd_co_send_structured_read(uint64_t handle, uint64_t offset, void *data, size_t nbd_co_send_structured_read_hole(uint64_t handle, uint64_t offset, size_t size) "Send structured read hole reply: handle = %" PRIu64 ", offset = %" PRIu64 ", len = %zu" nbd_co_send_extents(uint64_t handle, unsigned int extents, uint32_t id, uint64_t length, int last) "Send block status reply: handle = %" PRIu64 ", extents = %u, context = %d (extents cover %" PRIu64 " bytes, last chunk = %d)" nbd_co_send_structured_error(uint64_t handle, int err, const char *errname, const char *msg) "Send structured error reply: handle = %" PRIu64 ", error = %d (%s), msg = '%s'" +nbd_co_receive_block_status_payload_compliance(uint64_t from, int len) "client sent unusable block status payload: from=0x%" PRIx64 ", len=0x%x" nbd_co_receive_request_decode_type(uint64_t handle, uint16_t type, const char *name) "Decoding type: handle = %" PRIu64 ", type = %" PRIu16 " (%s)" nbd_co_receive_request_payload_received(uint64_t handle, uint64_t len) "Payload received: handle = %" PRIu64 ", len = %" PRIu64 nbd_co_receive_ext_payload_compliance(uint64_t from, uint64_t len) "client sent non-compliant write without payload flag: from=0x%" PRIx64 ", len=0x%" PRIx64 diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out index b98582c38ea..b38f0b7963b 100644 --- a/tests/qemu-iotests/223.out +++ b/tests/qemu-iotests/223.out @@ -83,7 +83,7 @@ exports available: 0 exports available: 3 export: 'n' size: 4194304 - flags: 0x58f ( readonly flush fua df multi cache ) + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) min block: 1 opt block: 4096 max block: 33554432 @@ -94,7 +94,7 @@ exports available: 3 export: 'n2' description: some text size: 4194304 - flags: 0xded ( flush fua trim zeroes df multi cache fast-zero ) + flags: 0x1ded ( flush fua trim zeroes df multi cache fast-zero block-status-payload ) min block: 1 opt block: 4096 max block: 33554432 @@ -104,7 +104,7 @@ exports available: 3 qemu:dirty-bitmap:b2 export: 'n3' size: 4194304 - flags: 0x58f ( readonly flush fua df multi cache ) + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) min block: 1 opt block: 4096 max block: 33554432 @@ -205,7 +205,7 @@ exports available: 0 exports available: 3 export: 'n' size: 4194304 - flags: 0x58f ( readonly flush fua df multi cache ) + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) min block: 1 opt block: 4096 max block: 33554432 @@ -216,7 +216,7 @@ exports available: 3 export: 'n2' description: some text size: 4194304 - flags: 0xded ( flush fua trim zeroes df multi cache fast-zero ) + flags: 0x1ded ( flush fua trim zeroes df multi cache fast-zero block-status-payload ) min block: 1 opt block: 4096 max block: 33554432 @@ -226,7 +226,7 @@ exports available: 3 qemu:dirty-bitmap:b2 export: 'n3' size: 4194304 - flags: 0x58f ( readonly flush fua df multi cache ) + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) min block: 1 opt block: 4096 max block: 33554432 diff --git a/tests/qemu-iotests/307.out b/tests/qemu-iotests/307.out index 2b9a6a67a1a..f645f3315f8 100644 --- a/tests/qemu-iotests/307.out +++ b/tests/qemu-iotests/307.out @@ -15,7 +15,7 @@ wrote 4096/4096 bytes at offset 0 exports available: 1 export: 'fmt' size: 67108864 - flags: 0x58f ( readonly flush fua df multi cache ) + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) min block: XXX opt block: XXX max block: XXX @@ -44,7 +44,7 @@ exports available: 1 exports available: 1 export: 'fmt' size: 67108864 - flags: 0x58f ( readonly flush fua df multi cache ) + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) min block: XXX opt block: XXX max block: XXX @@ -76,7 +76,7 @@ exports available: 1 exports available: 2 export: 'fmt' size: 67108864 - flags: 0x58f ( readonly flush fua df multi cache ) + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) min block: XXX opt block: XXX max block: XXX @@ -86,7 +86,7 @@ exports available: 2 export: 'export1' description: This is the writable second export size: 67108864 - flags: 0xded ( flush fua trim zeroes df multi cache fast-zero ) + flags: 0x1ded ( flush fua trim zeroes df multi cache fast-zero block-status-payload ) min block: XXX opt block: XXX max block: XXX @@ -113,7 +113,7 @@ exports available: 1 export: 'export1' description: This is the writable second export size: 67108864 - flags: 0xded ( flush fua trim zeroes df multi cache fast-zero ) + flags: 0x1ded ( flush fua trim zeroes df multi cache fast-zero block-status-payload ) min block: XXX opt block: XXX max block: XXX diff --git a/tests/qemu-iotests/tests/nbd-qemu-allocation.out b/tests/qemu-iotests/tests/nbd-qemu-allocation.out index 659276032b0..794d1bfce62 100644 --- a/tests/qemu-iotests/tests/nbd-qemu-allocation.out +++ b/tests/qemu-iotests/tests/nbd-qemu-allocation.out @@ -17,7 +17,7 @@ wrote 2097152/2097152 bytes at offset 1048576 exports available: 1 export: '' size: 4194304 - flags: 0x48f ( readonly flush fua df cache ) + flags: 0x148f ( readonly flush fua df cache block-status-payload ) min block: 1 opt block: 4096 max block: 33554432 -- 2.40.1
Vladimir Sementsov-Ogievskiy
2023-Jun-02 09:13 UTC
[Libguestfs] [PATCH v3 14/14] nbd/server: Add FLAG_PAYLOAD support to CMD_BLOCK_STATUS
On 15.05.23 22:53, Eric Blake wrote:> Allow a client to request a subset of negotiated meta contexts. For > example, a client may ask to use a single connection to learn about > both block status and dirty bitmaps, but where the dirty bitmap > queries only need to be performed on a subset of the disk; forcing the > server to compute that information on block status queries in the rest > of the disk is wasted effort (both at the server, and on the amount of > traffic sent over the wire to be parsed and ignored by the client). > > Qemu as an NBD client never requests to use more than one meta > context, so it has no need to use block status payloads. Testing this > instead requires support from libnbd, which CAN access multiple meta > contexts in parallel from a single NBD connection; an interop test > submitted to the libnbd project at the same time as this patch > demonstrates the feature working, as well as testing some corner cases > (for example, when the payload length is longer than the export > length), although other corner cases (like passing the same id > duplicated) requires a protocol fuzzer because libnbd is not wired up > to break the protocol that badly. > > This also includes tweaks to 'qemu-nbd --list' to show when a server > is advertising the capability, and to the testsuite to reflect the > addition to that output. > > Signed-off-by: Eric Blake <eblake at redhat.com> > --- > docs/interop/nbd.txt | 2 +- > include/block/nbd.h | 32 ++++-- > nbd/server.c | 106 +++++++++++++++++- > qemu-nbd.c | 1 + > nbd/trace-events | 1 + > tests/qemu-iotests/223.out | 12 +- > tests/qemu-iotests/307.out | 10 +- > .../tests/nbd-qemu-allocation.out | 2 +- > 8 files changed, 136 insertions(+), 30 deletions(-) > > diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt > index abaf4c28a96..83d85ce8d13 100644 > --- a/docs/interop/nbd.txt > +++ b/docs/interop/nbd.txt > @@ -69,4 +69,4 @@ NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE > NBD_CMD_FLAG_FAST_ZERO > * 5.2: NBD_CMD_BLOCK_STATUS for "qemu:allocation-depth" > * 7.1: NBD_FLAG_CAN_MULTI_CONN for shareable writable exports > -* 8.1: NBD_OPT_EXTENDED_HEADERS > +* 8.1: NBD_OPT_EXTENDED_HEADERS, NBD_FLAG_BLOCK_STATUS_PAYLOAD > diff --git a/include/block/nbd.h b/include/block/nbd.h > index 6696d61bd59..3d8d7150121 100644 > --- a/include/block/nbd.h > +++ b/include/block/nbd.h > @@ -175,6 +175,12 @@ typedef struct NBDExtentExt { > uint64_t flags; /* NBD_STATE_* */ > } QEMU_PACKED NBDExtentExt; > > +/* Client payload for limiting NBD_CMD_BLOCK_STATUS reply */ > +typedef struct NBDBlockStatusPayload { > + uint64_t effect_length; > + /* uint32_t ids[] follows, array length implied by header */ > +} QEMU_PACKED NBDBlockStatusPayload; > + > /* Transmission (export) flags: sent from server to client during handshake, > but describe what will happen during transmission */ > enum { > @@ -191,20 +197,22 @@ enum { > NBD_FLAG_SEND_RESIZE_BIT = 9, /* Send resize */ > NBD_FLAG_SEND_CACHE_BIT = 10, /* Send CACHE (prefetch) */ > NBD_FLAG_SEND_FAST_ZERO_BIT = 11, /* FAST_ZERO flag for WRITE_ZEROES */ > + NBD_FLAG_BLOCK_STAT_PAYLOAD_BIT = 12, /* PAYLOAD flag for BLOCK_STATUS */ > }; > > -#define NBD_FLAG_HAS_FLAGS (1 << NBD_FLAG_HAS_FLAGS_BIT) > -#define NBD_FLAG_READ_ONLY (1 << NBD_FLAG_READ_ONLY_BIT) > -#define NBD_FLAG_SEND_FLUSH (1 << NBD_FLAG_SEND_FLUSH_BIT) > -#define NBD_FLAG_SEND_FUA (1 << NBD_FLAG_SEND_FUA_BIT) > -#define NBD_FLAG_ROTATIONAL (1 << NBD_FLAG_ROTATIONAL_BIT) > -#define NBD_FLAG_SEND_TRIM (1 << NBD_FLAG_SEND_TRIM_BIT) > -#define NBD_FLAG_SEND_WRITE_ZEROES (1 << NBD_FLAG_SEND_WRITE_ZEROES_BIT) > -#define NBD_FLAG_SEND_DF (1 << NBD_FLAG_SEND_DF_BIT) > -#define NBD_FLAG_CAN_MULTI_CONN (1 << NBD_FLAG_CAN_MULTI_CONN_BIT) > -#define NBD_FLAG_SEND_RESIZE (1 << NBD_FLAG_SEND_RESIZE_BIT) > -#define NBD_FLAG_SEND_CACHE (1 << NBD_FLAG_SEND_CACHE_BIT) > -#define NBD_FLAG_SEND_FAST_ZERO (1 << NBD_FLAG_SEND_FAST_ZERO_BIT) > +#define NBD_FLAG_HAS_FLAGS (1 << NBD_FLAG_HAS_FLAGS_BIT) > +#define NBD_FLAG_READ_ONLY (1 << NBD_FLAG_READ_ONLY_BIT) > +#define NBD_FLAG_SEND_FLUSH (1 << NBD_FLAG_SEND_FLUSH_BIT) > +#define NBD_FLAG_SEND_FUA (1 << NBD_FLAG_SEND_FUA_BIT) > +#define NBD_FLAG_ROTATIONAL (1 << NBD_FLAG_ROTATIONAL_BIT) > +#define NBD_FLAG_SEND_TRIM (1 << NBD_FLAG_SEND_TRIM_BIT) > +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << NBD_FLAG_SEND_WRITE_ZEROES_BIT) > +#define NBD_FLAG_SEND_DF (1 << NBD_FLAG_SEND_DF_BIT) > +#define NBD_FLAG_CAN_MULTI_CONN (1 << NBD_FLAG_CAN_MULTI_CONN_BIT) > +#define NBD_FLAG_SEND_RESIZE (1 << NBD_FLAG_SEND_RESIZE_BIT) > +#define NBD_FLAG_SEND_CACHE (1 << NBD_FLAG_SEND_CACHE_BIT) > +#define NBD_FLAG_SEND_FAST_ZERO (1 << NBD_FLAG_SEND_FAST_ZERO_BIT) > +#define NBD_FLAG_BLOCK_STAT_PAYLOAD (1 << NBD_FLAG_BLOCK_STAT_PAYLOAD_BIT) > > /* New-style handshake (global) flags, sent from server to client, and > control what will happen during handshake phase. */ > diff --git a/nbd/server.c b/nbd/server.c > index db550c82cd2..ce11285c0d7 100644 > --- a/nbd/server.c > +++ b/nbd/server.c > @@ -442,9 +442,9 @@ static int nbd_negotiate_handle_list(NBDClient *client, Error **errp) > return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); > } > > -static void nbd_check_meta_export(NBDClient *client) > +static void nbd_check_meta_export(NBDClient *client, NBDExport *exp) > { > - if (client->exp != client->context_exp) { > + if (exp != client->context_exp) { > client->contexts.count = 0; > } > } > @@ -491,11 +491,15 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, > error_setg(errp, "export not found"); > return -EINVAL; > } > + nbd_check_meta_export(client, client->exp); > > myflags = client->exp->nbdflags; > if (client->header_style >= NBD_HEADER_STRUCTURED) { > myflags |= NBD_FLAG_SEND_DF; > } > + if (client->extended_headers && client->contexts.count) { > + myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD; > + } > trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags); > stq_be_p(buf, client->exp->size); > stw_be_p(buf + 8, myflags); > @@ -508,7 +512,6 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, > > QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); > blk_exp_ref(&client->exp->common); > - nbd_check_meta_export(client); > > return 0; > } > @@ -628,6 +631,9 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) > errp, "export '%s' not present", > sane_name); > } > + if (client->opt == NBD_OPT_GO) { > + nbd_check_meta_export(client, exp); > + } > > /* Don't bother sending NBD_INFO_NAME unless client requested it */ > if (sendname) { > @@ -681,6 +687,10 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) > if (client->header_style >= NBD_HEADER_STRUCTURED) { > myflags |= NBD_FLAG_SEND_DF; > } > + if (client->extended_headers && > + (client->contexts.count || client->opt == NBD_OPT_INFO)) { > + myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD; > + } > trace_nbd_negotiate_new_style_size_flags(exp->size, myflags); > stq_be_p(buf, exp->size); > stw_be_p(buf + 8, myflags); > @@ -716,7 +726,6 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) > client->check_align = check_align; > QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); > blk_exp_ref(&client->exp->common); > - nbd_check_meta_export(client); > rc = 1; > } > return rc; > @@ -2415,6 +2424,83 @@ static int coroutine_fn nbd_co_send_bitmap(NBDClient *client, > return nbd_co_send_extents(client, request, ea, last, context_id, errp); > } > > +/* > + * nbd_co_block_status_payload_read > + * Called when a client wants a subset of negotiated contexts via a > + * BLOCK_STATUS payload. Check the payload for valid length and > + * contents. On success, return 0 with request updated to effective > + * length. If request was invalid but payload consumed, return 0 with > + * request->len and request->contexts.count set to 0 (which will > + * trigger an appropriate NBD_EINVAL response later on).Hmm. So, it leads to case NBD_CMD_BLOCK_STATUS: if (!request->len) { return nbd_send_generic_reply(client, request, -EINVAL, "need non-zero length", errp); } EINVAL is OK, but "need non-zero length" message is not appropriate.. I think we need separate reply for the case of invalid payload.> On I/O > + * error, return -EIO. > + */ > +static int > +nbd_co_block_status_payload_read(NBDClient *client, NBDRequest *request, > + Error **errp) > +{ > + int payload_len = request->len; > + g_autofree char *buf = NULL; > + g_autofree bool *bitmaps = NULL; > + size_t count, i; > + uint32_t id; > + > + assert(request->len <= NBD_MAX_BUFFER_SIZE); > + if (payload_len % sizeof(uint32_t) || > + payload_len < sizeof(NBDBlockStatusPayload) || > + payload_len > (sizeof(NBDBlockStatusPayload) + > + sizeof(id) * client->contexts.count)) { > + goto skip; > + } > + > + buf = g_malloc(payload_len); > + if (nbd_read(client->ioc, buf, payload_len, > + "CMD_BLOCK_STATUS data", errp) < 0) { > + return -EIO; > + } > + trace_nbd_co_receive_request_payload_received(request->handle, > + payload_len); > + memset(&request->contexts, 0, sizeof(request->contexts)); > + request->contexts.nr_bitmaps = client->context_exp->nr_export_bitmaps; > + bitmaps = g_new0(bool, request->contexts.nr_bitmaps); > + count = (payload_len - sizeof(NBDBlockStatusPayload)) / sizeof(id);In doc we have MUST: "The payload form MUST occupy 8 + n*4 bytes", do we really want to forgive and ignore unaligned tail? May be better to "goto skip" in this case, to avoid ambiguity.> + payload_len = 0; > + > + for (i = 0; i < count; i++) { > + > + id = ldl_be_p(buf + sizeof(NBDBlockStatusPayload) + sizeof(id) * i); > + if (id == NBD_META_ID_BASE_ALLOCATION) { > + if (request->contexts.base_allocation) { > + goto skip; > + } > + request->contexts.base_allocation = true; > + } else if (id == NBD_META_ID_ALLOCATION_DEPTH) { > + if (request->contexts.allocation_depth) { > + goto skip; > + } > + request->contexts.allocation_depth = true; > + } else { > + if (id - NBD_META_ID_DIRTY_BITMAP > > + request->contexts.nr_bitmaps || > + bitmaps[id - NBD_META_ID_DIRTY_BITMAP]) { > + goto skip; > + } > + bitmaps[id - NBD_META_ID_DIRTY_BITMAP] = true; > + } > + } > + > + request->len = ldq_be_p(buf); > + request->contexts.count = count; > + request->contexts.bitmaps = bitmaps; > + bitmaps = NULL;better I think: request->context.bitmaps = g_steal_pointer(bitmaps); - as a pair to g_autofree.> + return 0; > + > + skip: > + trace_nbd_co_receive_block_status_payload_compliance(request->from, > + request->len); > + request->len = request->contexts.count = 0; > + return nbd_drop(client->ioc, payload_len, errp); > +} > + > /* nbd_co_receive_request > * Collect a client request. Return 0 if request looks valid, -EIO to drop > * connection right away, -EAGAIN to indicate we were interrupted and the > @@ -2461,7 +2547,14 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, NBDRequest * > > if (request->type == NBD_CMD_WRITE || extended_with_payload) { > payload_len = request->len; > - if (request->type != NBD_CMD_WRITE) { > + if (request->type == NBD_CMD_BLOCK_STATUS) { > + payload_len = nbd_co_block_status_payload_read(client, > + request, > + errp); > + if (payload_len < 0) { > + return -EIO; > + }Seems we can handle all payload in one "switch" block, instead of handling BLOCK_STATUS here and postponing WRITE payload (and dropping) up to the end of the block with help of payload_len variable.> + } else if (request->type != NBD_CMD_WRITE) { > /* > * For now, we don't support payloads on other > * commands; but we can keep the connection alive. > @@ -2540,6 +2633,9 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, NBDRequest * > valid_flags |= NBD_CMD_FLAG_NO_HOLE | NBD_CMD_FLAG_FAST_ZERO; > } else if (request->type == NBD_CMD_BLOCK_STATUS) { > valid_flags |= NBD_CMD_FLAG_REQ_ONE; > + if (client->extended_headers && client->contexts.count) { > + valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN; > + } > } > if (request->flags & ~valid_flags) { > error_setg(errp, "unsupported flags for command %s (got 0x%x)", > diff --git a/qemu-nbd.c b/qemu-nbd.c > index 8c35442626a..b7ab0fdc791 100644 > --- a/qemu-nbd.c > +++ b/qemu-nbd.c > @@ -222,6 +222,7 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls, > [NBD_FLAG_SEND_RESIZE_BIT] = "resize", > [NBD_FLAG_SEND_CACHE_BIT] = "cache", > [NBD_FLAG_SEND_FAST_ZERO_BIT] = "fast-zero", > + [NBD_FLAG_BLOCK_STAT_PAYLOAD_BIT] = "block-status-payload", > }; > > printf(" size: %" PRIu64 "\n", list[i].size); > diff --git a/nbd/trace-events b/nbd/trace-events > index c20df33a431..da92fe1b56b 100644 > --- a/nbd/trace-events > +++ b/nbd/trace-events > @@ -70,6 +70,7 @@ nbd_co_send_structured_read(uint64_t handle, uint64_t offset, void *data, size_t > nbd_co_send_structured_read_hole(uint64_t handle, uint64_t offset, size_t size) "Send structured read hole reply: handle = %" PRIu64 ", offset = %" PRIu64 ", len = %zu" > nbd_co_send_extents(uint64_t handle, unsigned int extents, uint32_t id, uint64_t length, int last) "Send block status reply: handle = %" PRIu64 ", extents = %u, context = %d (extents cover %" PRIu64 " bytes, last chunk = %d)" > nbd_co_send_structured_error(uint64_t handle, int err, const char *errname, const char *msg) "Send structured error reply: handle = %" PRIu64 ", error = %d (%s), msg = '%s'" > +nbd_co_receive_block_status_payload_compliance(uint64_t from, int len) "client sent unusable block status payload: from=0x%" PRIx64 ", len=0x%x" > nbd_co_receive_request_decode_type(uint64_t handle, uint16_t type, const char *name) "Decoding type: handle = %" PRIu64 ", type = %" PRIu16 " (%s)" > nbd_co_receive_request_payload_received(uint64_t handle, uint64_t len) "Payload received: handle = %" PRIu64 ", len = %" PRIu64 > nbd_co_receive_ext_payload_compliance(uint64_t from, uint64_t len) "client sent non-compliant write without payload flag: from=0x%" PRIx64 ", len=0x%" PRIx64 > diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out > index b98582c38ea..b38f0b7963b 100644 > --- a/tests/qemu-iotests/223.out > +++ b/tests/qemu-iotests/223.out > @@ -83,7 +83,7 @@ exports available: 0 > exports available: 3 > export: 'n' > size: 4194304 > - flags: 0x58f ( readonly flush fua df multi cache ) > + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) > min block: 1 > opt block: 4096 > max block: 33554432 > @@ -94,7 +94,7 @@ exports available: 3 > export: 'n2' > description: some text > size: 4194304 > - flags: 0xded ( flush fua trim zeroes df multi cache fast-zero ) > + flags: 0x1ded ( flush fua trim zeroes df multi cache fast-zero block-status-payload ) > min block: 1 > opt block: 4096 > max block: 33554432 > @@ -104,7 +104,7 @@ exports available: 3 > qemu:dirty-bitmap:b2 > export: 'n3' > size: 4194304 > - flags: 0x58f ( readonly flush fua df multi cache ) > + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) > min block: 1 > opt block: 4096 > max block: 33554432 > @@ -205,7 +205,7 @@ exports available: 0 > exports available: 3 > export: 'n' > size: 4194304 > - flags: 0x58f ( readonly flush fua df multi cache ) > + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) > min block: 1 > opt block: 4096 > max block: 33554432 > @@ -216,7 +216,7 @@ exports available: 3 > export: 'n2' > description: some text > size: 4194304 > - flags: 0xded ( flush fua trim zeroes df multi cache fast-zero ) > + flags: 0x1ded ( flush fua trim zeroes df multi cache fast-zero block-status-payload ) > min block: 1 > opt block: 4096 > max block: 33554432 > @@ -226,7 +226,7 @@ exports available: 3 > qemu:dirty-bitmap:b2 > export: 'n3' > size: 4194304 > - flags: 0x58f ( readonly flush fua df multi cache ) > + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) > min block: 1 > opt block: 4096 > max block: 33554432 > diff --git a/tests/qemu-iotests/307.out b/tests/qemu-iotests/307.out > index 2b9a6a67a1a..f645f3315f8 100644 > --- a/tests/qemu-iotests/307.out > +++ b/tests/qemu-iotests/307.out > @@ -15,7 +15,7 @@ wrote 4096/4096 bytes at offset 0 > exports available: 1 > export: 'fmt' > size: 67108864 > - flags: 0x58f ( readonly flush fua df multi cache ) > + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) > min block: XXX > opt block: XXX > max block: XXX > @@ -44,7 +44,7 @@ exports available: 1 > exports available: 1 > export: 'fmt' > size: 67108864 > - flags: 0x58f ( readonly flush fua df multi cache ) > + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) > min block: XXX > opt block: XXX > max block: XXX > @@ -76,7 +76,7 @@ exports available: 1 > exports available: 2 > export: 'fmt' > size: 67108864 > - flags: 0x58f ( readonly flush fua df multi cache ) > + flags: 0x158f ( readonly flush fua df multi cache block-status-payload ) > min block: XXX > opt block: XXX > max block: XXX > @@ -86,7 +86,7 @@ exports available: 2 > export: 'export1' > description: This is the writable second export > size: 67108864 > - flags: 0xded ( flush fua trim zeroes df multi cache fast-zero ) > + flags: 0x1ded ( flush fua trim zeroes df multi cache fast-zero block-status-payload ) > min block: XXX > opt block: XXX > max block: XXX > @@ -113,7 +113,7 @@ exports available: 1 > export: 'export1' > description: This is the writable second export > size: 67108864 > - flags: 0xded ( flush fua trim zeroes df multi cache fast-zero ) > + flags: 0x1ded ( flush fua trim zeroes df multi cache fast-zero block-status-payload ) > min block: XXX > opt block: XXX > max block: XXX > diff --git a/tests/qemu-iotests/tests/nbd-qemu-allocation.out b/tests/qemu-iotests/tests/nbd-qemu-allocation.out > index 659276032b0..794d1bfce62 100644 > --- a/tests/qemu-iotests/tests/nbd-qemu-allocation.out > +++ b/tests/qemu-iotests/tests/nbd-qemu-allocation.out > @@ -17,7 +17,7 @@ wrote 2097152/2097152 bytes at offset 1048576 > exports available: 1 > export: '' > size: 4194304 > - flags: 0x48f ( readonly flush fua df cache ) > + flags: 0x148f ( readonly flush fua df cache block-status-payload ) > min block: 1 > opt block: 4096 > max block: 33554432-- Best regards, Vladimir