Eric Blake
2020-Jul-24 04:01 UTC
[Libguestfs] [libnbd PATCH 0/3] Expose server block size constraints
Necessary when writing a client that wants to avoid unnecessary EINVAL errors from sending unaligned requests. At some point, we may want to add synchronous convenience API wrappers that do request splitting or read-modify-write to obey server constraints while still appearing to the library client as accepting any possible request. But such a wrapper should only be synchronous and not copied to nbd_aio_*, because getting locking right is a bear if you have two non-overlapping client requests that want to visit the same larger-granularity window of the server in parallel. Eric Blake (3): states: Request NBD_INFO_BLOCK_SIZE during NBD_OPT_GO api: Add new API to query negotiated block size nbdinfo: Expose block size constraints info/nbdinfo.pod | 6 +++ lib/internal.h | 7 +++ lib/nbd-protocol.h | 10 ++++- generator/API.ml | 70 +++++++++++++++++++++++++++--- generator/state_machine.ml | 7 +++ generator/states-newstyle-opt-go.c | 31 +++++++++++-- lib/flags.c | 18 +++++++- info/nbdinfo.c | 18 ++++++++ 8 files changed, 156 insertions(+), 11 deletions(-) -- 2.27.0
Eric Blake
2020-Jul-24 04:01 UTC
[Libguestfs] [libnbd PATCH 1/3] states: Request NBD_INFO_BLOCK_SIZE during NBD_OPT_GO
The NBD spec allows servers to refuse NBD_OPT_GO to clients that do not promise to obey block size constraints (see NBD_REP_ERR_BLOCK_SIZE_REQD). Upcoming patches will make libnbd able to remember and honor block sizes, but even when we don't honor them, generally the worst that will happen is the server will send us EINVAL errors on unaligned I/O, which is better than complete failure to connect by not even asking about block sizes. All that is required to ask is to include NBD_INFO_BLOCK_SIZE as a client info in our NBD_OPT_GO. The NBD spec is clear that servers that don't understand client requests should silently ignore the request, so it doesn't hurt to always ask (but if we ever _do_ find a non-compliant server where it matters, or if we wanted to improve unit testing of a server's ability to send the REP_ERR_BLOCK_SIZE_REQD error, we could add a knob to tell libnbd to skip the ask for such servers). For now, we just stash what we learn (if anything) in internal state; the next patch will then add API to expose it to the end user. --- lib/internal.h | 7 +++++++ lib/nbd-protocol.h | 10 +++++++++- generator/state_machine.ml | 7 +++++++ generator/states-newstyle-opt-go.c | 30 ++++++++++++++++++++++++++---- 4 files changed, 49 insertions(+), 5 deletions(-) diff --git a/lib/internal.h b/lib/internal.h index 2f1727f..1646b01 100644 --- a/lib/internal.h +++ b/lib/internal.h @@ -109,6 +109,11 @@ struct nbd_handle { uint64_t exportsize; uint16_t eflags; + /* Server block size constraints, or 0 if not advertised. */ + uint32_t block_minimum; + uint32_t block_preferred; + uint32_t block_maximum; + /* Flags set by the state machine to tell what protocol and whether * TLS was negotiated. */ @@ -168,6 +173,7 @@ struct nbd_handle { char str[NBD_MAX_STRING]; } __attribute__((packed)) server; struct nbd_fixed_new_option_reply_info_export export; + struct nbd_fixed_new_option_reply_info_block_size block_size; struct { struct nbd_fixed_new_option_reply_meta_context context; char str[NBD_MAX_STRING]; @@ -193,6 +199,7 @@ struct nbd_handle { uint32_t cflags; uint32_t len; uint16_t nrinfos; + uint16_t info; uint32_t nrqueries; } sbuf; diff --git a/lib/nbd-protocol.h b/lib/nbd-protocol.h index 90fdbd2..627bc6e 100644 --- a/lib/nbd-protocol.h +++ b/lib/nbd-protocol.h @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2013-2019 Red Hat Inc. + * Copyright (C) 2013-2020 Red Hat Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -154,6 +154,14 @@ struct nbd_fixed_new_option_reply_info_export { uint16_t eflags; /* per-export flags */ } NBD_ATTRIBUTE_PACKED; +/* NBD_INFO_BLOCK_SIZE reply (follows fixed_new_option_reply). */ +struct nbd_fixed_new_option_reply_info_block_size { + uint16_t info; /* NBD_INFO_BLOCK_SIZE */ + uint32_t minimum; /* minimum block size */ + uint32_t preferred; /* preferred block size */ + uint32_t maximum; /* maximum block size */ +} NBD_ATTRIBUTE_PACKED; + /* NBD_REP_SERVER reply (follows fixed_new_option_reply). */ struct nbd_fixed_new_option_reply_server { uint32_t export_name_len; /* length of export name */ diff --git a/generator/state_machine.ml b/generator/state_machine.ml index 27b4fa3..144a5f8 100644 --- a/generator/state_machine.ml +++ b/generator/state_machine.ml @@ -542,6 +542,13 @@ and newstyle_opt_go_state_machine = [ external_events = [ NotifyWrite, "" ]; }; + State { + default_state with + name = "SEND_INFO"; + comment = "Send newstyle NBD_OPT_GO request for NBD_INFO_BLOCK_SIZE"; + external_events = [ NotifyWrite, "" ]; + }; + State { default_state with name = "RECV_REPLY"; diff --git a/generator/states-newstyle-opt-go.c b/generator/states-newstyle-opt-go.c index 57ad55a..39406fa 100644 --- a/generator/states-newstyle-opt-go.c +++ b/generator/states-newstyle-opt-go.c @@ -1,5 +1,5 @@ /* nbd client library in userspace: state machine - * Copyright (C) 2013-2019 Red Hat Inc. + * Copyright (C) 2013-2020 Red Hat Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -23,7 +23,8 @@ STATE_MACHINE { h->sbuf.option.version = htobe64 (NBD_NEW_VERSION); h->sbuf.option.option = htobe32 (NBD_OPT_GO); h->sbuf.option.optlen - htobe32 (/* exportnamelen */ 4 + strlen (h->export_name) + /* nrinfos */ 2); + htobe32 (/* exportnamelen */ 4 + strlen (h->export_name) + + /* nrinfos */ 2 + /* INFO_BLOCK_SIZE */ 2); h->wbuf = &h->sbuf; h->wlen = sizeof h->sbuf.option; h->wflags = MSG_MORE; @@ -59,7 +60,7 @@ STATE_MACHINE { switch (send_from_wbuf (h)) { case -1: SET_NEXT_STATE (%.DEAD); return 0; case 0: - h->sbuf.nrinfos = 0; + h->sbuf.nrinfos = htobe16 (1); h->wbuf = &h->sbuf; h->wlen = 2; SET_NEXT_STATE (%SEND_NRINFOS); @@ -67,6 +68,17 @@ STATE_MACHINE { return 0; NEWSTYLE.OPT_GO.SEND_NRINFOS: + switch (send_from_wbuf (h)) { + case -1: SET_NEXT_STATE (%.DEAD); return 0; + case 0: + h->sbuf.info = htobe16 (NBD_INFO_BLOCK_SIZE); + h->wbuf = &h->sbuf; + h->wlen = 2; + SET_NEXT_STATE (%SEND_INFO); + } + return 0; + + NEWSTYLE.OPT_GO.SEND_INFO: switch (send_from_wbuf (h)) { case -1: SET_NEXT_STATE (%.DEAD); return 0; case 0: @@ -130,8 +142,18 @@ STATE_MACHINE { return 0; } break; + case NBD_INFO_BLOCK_SIZE: + if (len != sizeof h->sbuf.or.payload.block_size) { + SET_NEXT_STATE (%.DEAD); + set_error (0, "handshake: incorrect NBD_INFO_BLOCK_SIZE option reply length"); + return 0; + } + h->block_minimum = be32toh (h->sbuf.or.payload.block_size.minimum); + h->block_preferred = be32toh (h->sbuf.or.payload.block_size.preferred); + h->block_maximum = be32toh (h->sbuf.or.payload.block_size.maximum); + break; default: - /* XXX Handle other info types, like NBD_INFO_BLOCK_SIZE */ + /* XXX Handle other info types, like NBD_INFO_DESCRIPTION */ debug (h, "skipping unknown NBD_REP_INFO type %d", be16toh (h->sbuf.or.payload.export.info)); break; -- 2.27.0
Eric Blake
2020-Jul-24 04:01 UTC
[Libguestfs] [libnbd PATCH 2/3] api: Add new API to query negotiated block size
Servers such as qemu can already tell us their preferred block size, and work is underway to let nbdkit do likewise. As this information can prove useful to clients, we need a new API to expose it. The API is designed to support future enum values if the NBD protocol is ever extended further to advertise larger limits for trim, zero, and cache operations. --- generator/API.ml | 70 +++++++++++++++++++++++++++--- generator/states-newstyle-opt-go.c | 1 + lib/flags.c | 18 +++++++- 3 files changed, 83 insertions(+), 6 deletions(-) diff --git a/generator/API.ml b/generator/API.ml index 0b6c748..985d715 100644 --- a/generator/API.ml +++ b/generator/API.ml @@ -138,7 +138,15 @@ let tls_enum = { "REQUIRE", 2; ] } -let all_enums = [ tls_enum ] +let block_size_enum = { + enum_prefix = "SIZE"; + enums = [ + "MINIMUM", 0; + "PREFERRED", 1; + "MAXIMUM", 2; + ] +} +let all_enums = [ tls_enum; block_size_enum ] (* Flags. See also Constants below. *) let cmd_flags = { @@ -1227,7 +1235,8 @@ Most modern NBD servers use C<\"newstyle-fixed\">. ^ non_blocking_test_call_description; see_also = [Link "get_handshake_flags"; Link "get_structured_replies_negotiated"; - Link "get_tls_negotiated"]; + Link "get_tls_negotiated"; + Link "get_block_size"]; }; "get_size", { @@ -1242,6 +1251,55 @@ Returns the size in bytes of the NBD export." example = Some "examples/get-size.c"; }; + "get_block_size", { + default_call with + args = [Enum ("size_type", block_size_enum)]; ret = RInt64; + permitted_states = [ Connected; Closed ]; + shortdesc = "return a specific server block size constraint"; + longdesc = "\ +Returns a specific size constraint advertised by the server, if any. If +the return is zero, the server did not advertise a constraint. C<size_type> +must be one of the following constraints: + +=over 4 + +=item C<LIBNBD_SIZE_MINIMUM> = 0 + +If non-zero, this will be a power of 2 between 1 and 64k; any client +request that is not aligned in length or offset to this size is likely +to fail with C<EINVAL>. The image size will generally also be a +multiple of this value (if not, the final few bytes are inaccessible +while obeying alignment constraints). If zero, it is safest to +assume a minimum block size of 512, although many servers support +a minimum block size of 1. + +=item C<LIBNBD_SIZE_PREFERRED> = 1 + +If non-zero, this is a power of 2 representing the preferred size for +efficient I/O. Smaller requests may incur overhead such as +read-modify-write cycles that will not be present when using I/O that +is a multiple of this value. This value may be larger than the size +of the export. If zero, using 4k as a preferred block size tends to +give decent performance. + +=item C<LIBNBD_SIZE_MAXIMUM> = 2 + +If non-zero, this represents the maximum length that the server is +willing to handle during C<nbd_pread> or C<nbd_pwrite>. Other functions +like C<nbd_zero> may still be able to use larger sizes. Note that +this function returns what the server advertised, but libnbd itself +imposes a maximum of 64M. If zero, some NBD servers will abruptly +disconnect if a transaction involves more than 32M. + +=back + +Future NBD extensions may result in additional C<size_type> values. +" +^ non_blocking_test_call_description; + see_also = [Link "get_protocol"; + Link "get_size"] + }; + "pread", { default_call with args = [ BytesOut ("buf", "count"); UInt64 "offset" ]; @@ -1260,7 +1318,8 @@ C<LIBNBD_CMD_FLAG_DF>. The C<flags> parameter must be C<0> for now (it exists for future NBD protocol extensions)."; - see_also = [Link "aio_pread"; Link "pread_structured"]; + see_also = [Link "aio_pread"; Link "pread_structured"; + Link "get_block_size"]; example = Some "examples/fetch-first-sector.c"; }; @@ -1338,7 +1397,7 @@ more than one fragment (if that is supported - some servers cannot do this, see L<nbd_can_df(3)>). Libnbd does not validate that the server actually obeys the flag."; see_also = [Link "can_df"; Link "pread"; - Link "aio_pread_structured"]; + Link "aio_pread_structured"; Link "get_block_size"]; }; "pwrite", { @@ -1361,7 +1420,7 @@ return until the data has been committed to permanent storage (if that is supported - some servers cannot do this, see L<nbd_can_fua(3)>)."; see_also = [Link "can_fua"; Link "is_read_only"; - Link "aio_pwrite"]; + Link "aio_pwrite"; Link "get_block_size"]; example = Some "examples/reads-and-writes.c"; }; @@ -2277,6 +2336,7 @@ let first_version = [ "get_list_exports", (1, 4); "get_nr_list_exports", (1, 4); "get_list_export_name", (1, 4); + "get_block_size", (1, 4); (* These calls are proposed for a future version of libnbd, but * have not been added to any released version so far. diff --git a/generator/states-newstyle-opt-go.c b/generator/states-newstyle-opt-go.c index 39406fa..501144d 100644 --- a/generator/states-newstyle-opt-go.c +++ b/generator/states-newstyle-opt-go.c @@ -151,6 +151,7 @@ STATE_MACHINE { h->block_minimum = be32toh (h->sbuf.or.payload.block_size.minimum); h->block_preferred = be32toh (h->sbuf.or.payload.block_size.preferred); h->block_maximum = be32toh (h->sbuf.or.payload.block_size.maximum); + /* XXX Sanity check that server values are sane... */ break; default: /* XXX Handle other info types, like NBD_INFO_DESCRIPTION */ diff --git a/lib/flags.c b/lib/flags.c index d55d10a..208097b 100644 --- a/lib/flags.c +++ b/lib/flags.c @@ -1,5 +1,5 @@ /* NBD client library in userspace - * Copyright (C) 2013-2019 Red Hat Inc. + * Copyright (C) 2013-2020 Red Hat Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -157,3 +157,19 @@ nbd_unlocked_get_size (struct nbd_handle *h) return h->exportsize; } + +int64_t nbd_unlocked_get_block_size (struct nbd_handle *h, int type) +{ + /* XXX Should we sanitize these, or provide sane defaults when server + * did not? + */ + switch (type) { + case LIBNBD_SIZE_MINIMUM: + return h->block_minimum; + case LIBNBD_SIZE_PREFERRED: + return h->block_preferred; + case LIBNBD_SIZE_MAXIMUM: + return h->block_maximum; + } + return 0; +} -- 2.27.0
Eric Blake
2020-Jul-24 04:01 UTC
[Libguestfs] [libnbd PATCH 3/3] nbdinfo: Expose block size constraints
Now that we have an API, it's time to put it to use. --- info/nbdinfo.pod | 6 ++++++ info/nbdinfo.c | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/info/nbdinfo.pod b/info/nbdinfo.pod index 9761551..d0d20a9 100644 --- a/info/nbdinfo.pod +++ b/info/nbdinfo.pod @@ -45,6 +45,9 @@ L<https://github.com/NetworkBlockDevice/nbd/blob/master/doc/uri.md>): can_multi_conn: true can_trim: true can_zero: true + block_size_minimum: 1 + block_size_preferred: 4096 + block_size_maximum: 33554432 To display only the size in bytes of the NBD export (useful for scripting) use the I<--size> parameter: @@ -73,6 +76,9 @@ the I<--json> parameter: "can_multi_conn": true, "can_trim": false, "can_zero": false, + "block_size_minimum": 1, + "block_size_preferred": 4096, + "block_size_maximum": 33554432, "export-size": 2125119488 } ] diff --git a/info/nbdinfo.c b/info/nbdinfo.c index 455cfc8..4c58815 100644 --- a/info/nbdinfo.c +++ b/info/nbdinfo.c @@ -264,6 +264,7 @@ list_one_export (struct nbd_handle *nbd) int is_rotational, is_read_only; int can_cache, can_df, can_fast_zero, can_flush, can_fua, can_multi_conn, can_trim, can_zero; + int64_t block_minimum, block_preferred, block_maximum; /* Collect the metadata we are going to display. */ size = nbd_get_size (nbd); @@ -288,6 +289,9 @@ list_one_export (struct nbd_handle *nbd) can_multi_conn = nbd_can_multi_conn (nbd); can_trim = nbd_can_trim (nbd); can_zero = nbd_can_zero (nbd); + block_minimum = nbd_get_block_size (nbd, LIBNBD_SIZE_MINIMUM); + block_preferred = nbd_get_block_size (nbd, LIBNBD_SIZE_PREFERRED); + block_maximum = nbd_get_block_size (nbd, LIBNBD_SIZE_MAXIMUM); if (!json_output) { printf ("export="); @@ -317,6 +321,12 @@ list_one_export (struct nbd_handle *nbd) printf ("\t%s: %s\n", "can_trim", can_trim ? "true" : "false"); if (can_zero >= 0) printf ("\t%s: %s\n", "can_zero", can_zero ? "true" : "false"); + if (block_minimum > 0) + printf ("\t%s: %" PRId64 "\n", "block_size_minimum", block_minimum); + if (block_preferred > 0) + printf ("\t%s: %" PRId64 "\n", "block_size_preferred", block_preferred); + if (block_maximum > 0) + printf ("\t%s: %" PRId64 "\n", "block_size_maximum", block_maximum); } else { printf ("\"exports\": [\n"); @@ -363,6 +373,14 @@ list_one_export (struct nbd_handle *nbd) printf ("\t\"%s\": %s,\n", "can_zero", can_zero ? "true" : "false"); + if (block_minimum > 0) + printf ("\t\"%s\": %" PRId64 ",\n", "block_size_minimum", block_minimum); + if (block_preferred > 0) + printf ("\t\"%s\": %" PRId64 ",\n", "block_size_preferred", + block_preferred); + if (block_maximum > 0) + printf ("\t\"%s\": %" PRId64 ",\n", "block_size_maximum", block_maximum); + /* Put this one at the end because of the stupid comma thing in JSON. */ printf ("\t\"export-size\": %" PRIi64 "\n", size); -- 2.27.0
Richard W.M. Jones
2020-Jul-27 09:47 UTC
Re: [Libguestfs] [libnbd PATCH 3/3] nbdinfo: Expose block size constraints
Series looks good, ACK. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com libguestfs lets you edit virtual machines. Supports shell scripting, bindings from many languages. http://libguestfs.org
Seemingly Similar Threads
- [libnbd PATCH 0/2] Improve nbdinfo display of block constraints
- [libnbd PATCH v2 0/2] opt_list_meta_context
- [libnbd PATCH v2 00/13] Adding nbd_set_opt_mode to improve nbdinfo
- [PATCH libnbd 0/5] info: --map: Coalesce adjacent extents of the same type.
- [libnbd PATCH 0/2] Expose export description