I've mentioned this topic before (in fact, the idea of adding NBD_CMD_FLAG_DF was first mentioned at [1]), but finally finished enough of an implementation to feel confident in posting it. I'd still like to add something under examples/ that uses the new API to implement strict checking of a server's structured replies read implementation (ensure that a server never sends data after an error at the same offset, that it does not send overlapping data, and that it does not report success unless all of the range is covered). [1] https://www.redhat.com/archives/libguestfs/2019-May/msg00209.html Eric Blake (8): states: Add state for structured reply completion states: Consolidate search for current reply's command pread: Reject server SR read response with no data chunks states: Prepare for read callback states: Wire in a read callback states: Add nbd_pread_callback API states: Add tests for nbd_pread_callback states: Add DF flag support for pread .gitignore | 1 + generator/generator | 140 +++++++++++++++++++-- generator/states-reply-simple.c | 31 +++-- generator/states-reply-structured.c | 147 +++++++++++----------- generator/states-reply.c | 31 ++++- interop/Makefile.am | 11 +- interop/structured-read.c | 182 ++++++++++++++++++++++++++++ interop/structured-read.sh | 57 +++++++++ lib/aio.c | 2 + lib/flags.c | 11 ++ lib/internal.h | 22 +++- lib/nbd-protocol.h | 1 + lib/protocol.c | 1 + lib/rw.c | 51 +++++++- python/t/405-pread-callback.py | 42 +++++++ tests/oldstyle.c | 74 ++++++++++- 16 files changed, 686 insertions(+), 118 deletions(-) create mode 100644 interop/structured-read.c create mode 100755 interop/structured-read.sh create mode 100644 python/t/405-pread-callback.py -- 2.20.1
Eric Blake
2019-Jun-18  00:07 UTC
[Libguestfs] [libnbd PATCH 1/8] states: Add state for structured reply completion
Rather than repeating the logic for checking flags at multiple sites,
stick it in a new state.
---
 generator/generator                 |  7 +++++
 generator/states-reply-structured.c | 43 ++++++++++-------------------
 2 files changed, 22 insertions(+), 28 deletions(-)
diff --git a/generator/generator b/generator/generator
index a289741..4c81859 100755
--- a/generator/generator
+++ b/generator/generator
@@ -798,6 +798,13 @@ and structured_reply_state_machine = [
     comment = "Receive a structured reply block-status payload";
     external_events = [ NotifyRead, "" ];
   };
+
+  State {
+    default_state with
+    name = "FINISH";
+    comment = "Finish receiving a structured reply";
+    external_events = [];
+  };
 ]
 (*----------------------------------------------------------------------*)
diff --git a/generator/states-reply-structured.c
b/generator/states-reply-structured.c
index 6337dad..2125e41 100644
--- a/generator/states-reply-structured.c
+++ b/generator/states-reply-structured.c
@@ -103,7 +103,7 @@
       set_error (0, "NBD_REPLY_FLAG_DONE must be set in
NBD_REPLY_TYPE_NONE");
       return -1;
     }
-    SET_NEXT_STATE (%^FINISH_COMMAND);
+    SET_NEXT_STATE (%FINISH);
     return 0;
   }
   else if (type == NBD_REPLY_TYPE_OFFSET_DATA) {
@@ -225,7 +225,6 @@
  REPLY.STRUCTURED_REPLY.RECV_ERROR_TAIL:
   struct command_in_flight *cmd;
-  uint16_t flags;
   uint64_t handle;
   uint32_t error;
   uint64_t offset;
@@ -234,7 +233,6 @@
   switch (recv_into_rbuf (h)) {
   case -1: SET_NEXT_STATE (%.DEAD); return -1;
   case 0:
-    flags = be16toh (h->sbuf.sr.structured_reply.flags);
     handle = be64toh (h->sbuf.sr.structured_reply.handle);
     error = be32toh (h->sbuf.sr.payload.error.error.error);
     type = be16toh (h->sbuf.sr.structured_reply.type);
@@ -264,10 +262,7 @@
     if (cmd->error == 0)
       cmd->error = nbd_internal_errno_of_nbd_error (error);
-    if (flags & NBD_REPLY_FLAG_DONE)
-      SET_NEXT_STATE (%^FINISH_COMMAND);
-    else
-      SET_NEXT_STATE (%.READY);
+    SET_NEXT_STATE(%FINISH);
   }
   return 0;
@@ -334,24 +329,15 @@
   return 0;
  REPLY.STRUCTURED_REPLY.RECV_OFFSET_DATA_DATA:
-  uint16_t flags;
-
   switch (recv_into_rbuf (h)) {
   case -1: SET_NEXT_STATE (%.DEAD); return -1;
-  case 0:
-    flags = be16toh (h->sbuf.sr.structured_reply.flags);
-
-    if (flags & NBD_REPLY_FLAG_DONE)
-      SET_NEXT_STATE (%^FINISH_COMMAND);
-    else
-      SET_NEXT_STATE (%.READY);
+  case 0:  SET_NEXT_STATE (%FINISH);
   }
   return 0;
  REPLY.STRUCTURED_REPLY.RECV_OFFSET_HOLE:
   struct command_in_flight *cmd;
   uint64_t handle;
-  uint16_t flags;
   uint64_t offset;
   uint32_t length;
@@ -359,7 +345,6 @@
   case -1: SET_NEXT_STATE (%.DEAD); return -1;
   case 0:
     handle = be64toh (h->sbuf.sr.structured_reply.handle);
-    flags = be16toh (h->sbuf.sr.structured_reply.flags);
     offset = be64toh (h->sbuf.sr.payload.offset_hole.offset);
     length = be32toh (h->sbuf.sr.payload.offset_hole.length);
@@ -404,17 +389,13 @@
     memset (cmd->data + offset, 0, length);
-    if (flags & NBD_REPLY_FLAG_DONE)
-      SET_NEXT_STATE (%^FINISH_COMMAND);
-    else
-      SET_NEXT_STATE (%.READY);
+    SET_NEXT_STATE(%FINISH);
   }
   return 0;
  REPLY.STRUCTURED_REPLY.RECV_BS_ENTRIES:
   struct command_in_flight *cmd;
   uint64_t handle;
-  uint16_t flags;
   uint32_t length;
   size_t i;
   uint32_t context_id;
@@ -424,7 +405,6 @@
   case -1: SET_NEXT_STATE (%.DEAD); return -1;
   case 0:
     handle = be64toh (h->sbuf.sr.structured_reply.handle);
-    flags = be16toh (h->sbuf.sr.structured_reply.flags);
     length = be32toh (h->sbuf.sr.structured_reply.length);
     /* Find the command amongst the commands in flight. */
@@ -470,11 +450,18 @@
                context_id);
     }
-    if (flags & NBD_REPLY_FLAG_DONE)
-      SET_NEXT_STATE (%^FINISH_COMMAND);
-    else
-      SET_NEXT_STATE (%.READY);
+    SET_NEXT_STATE(%FINISH);
   }
   return 0;
+ REPLY.STRUCTURED_REPLY.FINISH:
+  uint16_t flags;
+
+  flags = be16toh (h->sbuf.sr.structured_reply.flags);
+  if (flags & NBD_REPLY_FLAG_DONE)
+    SET_NEXT_STATE (%^FINISH_COMMAND);
+  else
+    SET_NEXT_STATE (%.READY);
+  return 0;
+
 } /* END STATE MACHINE */
-- 
2.20.1
Eric Blake
2019-Jun-18  00:07 UTC
[Libguestfs] [libnbd PATCH 2/8] states: Consolidate search for current reply's command
No need to have each state recompute which reply is current. This also
consolidates the logic when a reply has an unexpected handle -
previously, we failed for structured (even though the length makes it
easy to recover) and passed for simple (even though there is nothing
on the wire to state if this response is associated with NBD_CMD_READ
if we did not negotiate structured replies, which would allow our
state machine to start parsing arbitrary data as new responses); we're
better off just always moving to DEAD.
---
 generator/states-reply-simple.c     | 15 ++-----
 generator/states-reply-structured.c | 61 ++++++-----------------------
 generator/states-reply.c            | 31 ++++++++++++++-
 lib/internal.h                      |  2 +
 4 files changed, 45 insertions(+), 64 deletions(-)
diff --git a/generator/states-reply-simple.c b/generator/states-reply-simple.c
index 7e5340c..12536e0 100644
--- a/generator/states-reply-simple.c
+++ b/generator/states-reply-simple.c
@@ -20,24 +20,15 @@
 /* STATE MACHINE */ {
  REPLY.SIMPLE_REPLY.START:
-  struct command_in_flight *cmd;
+  struct command_in_flight *cmd = h->reply_cmd;
   uint32_t error;
   uint64_t handle;
   error = be32toh (h->sbuf.simple_reply.error);
   handle = be64toh (h->sbuf.simple_reply.handle);
-  /* Find the command amongst the commands in flight. */
-  for (cmd = h->cmds_in_flight; cmd != NULL; cmd = cmd->next) {
-    if (cmd->handle == handle)
-      break;
-  }
-  if (cmd == NULL) {
-    SET_NEXT_STATE (%.READY);
-    set_error (0, "no matching handle found for server reply, "
-               "this is probably a bug in the server");
-    return -1;
-  }
+  assert (cmd);
+  assert (cmd->handle == handle);
   if (cmd->type == NBD_CMD_READ && h->structured_replies) {
     set_error (0, "server sent unexpected simple reply for read");
diff --git a/generator/states-reply-structured.c
b/generator/states-reply-structured.c
index 2125e41..9bb165b 100644
--- a/generator/states-reply-structured.c
+++ b/generator/states-reply-structured.c
@@ -43,7 +43,7 @@
   return 0;
  REPLY.STRUCTURED_REPLY.CHECK:
-  struct command_in_flight *cmd;
+  struct command_in_flight *cmd = h->reply_cmd;
   uint16_t flags, type;
   uint64_t handle;
   uint32_t length;
@@ -53,20 +53,8 @@
   handle = be64toh (h->sbuf.sr.structured_reply.handle);
   length = be32toh (h->sbuf.sr.structured_reply.length);
-  /* Find the command amongst the commands in flight. */
-  for (cmd = h->cmds_in_flight; cmd != NULL; cmd = cmd->next) {
-    if (cmd->handle == handle)
-      break;
-  }
-  if (cmd == NULL) {
-    /* Unlike for simple replies, this is difficult to recover from.  We
-     * would need an extra state to read and ignore length bytes. XXX
-     */
-    SET_NEXT_STATE (%.DEAD);
-    set_error (0, "no matching handle found for server reply, "
-               "this is probably a bug in the server");
-    return -1;
-  }
+  assert (cmd);
+  assert (cmd->handle == handle);
   /* Reject a server that replies with too much information, but don't
    * reject a single structured reply to NBD_CMD_READ on the largest
@@ -224,8 +212,7 @@
   return 0;
  REPLY.STRUCTURED_REPLY.RECV_ERROR_TAIL:
-  struct command_in_flight *cmd;
-  uint64_t handle;
+  struct command_in_flight *cmd = h->reply_cmd;
   uint32_t error;
   uint64_t offset;
   uint16_t type;
@@ -233,15 +220,9 @@
   switch (recv_into_rbuf (h)) {
   case -1: SET_NEXT_STATE (%.DEAD); return -1;
   case 0:
-    handle = be64toh (h->sbuf.sr.structured_reply.handle);
     error = be32toh (h->sbuf.sr.payload.error.error.error);
     type = be16toh (h->sbuf.sr.structured_reply.type);
-    /* Find the command amongst the commands in flight. */
-    for (cmd = h->cmds_in_flight; cmd != NULL; cmd = cmd->next) {
-      if (cmd->handle == handle)
-        break;
-    }
     assert (cmd); /* guaranteed by CHECK */
     /* Sanity check that any error offset is in range */
@@ -267,23 +248,16 @@
   return 0;
  REPLY.STRUCTURED_REPLY.RECV_OFFSET_DATA:
-  struct command_in_flight *cmd;
-  uint64_t handle;
+  struct command_in_flight *cmd = h->reply_cmd;
   uint64_t offset;
   uint32_t length;
   switch (recv_into_rbuf (h)) {
   case -1: SET_NEXT_STATE (%.DEAD); return -1;
   case 0:
-    handle = be64toh (h->sbuf.sr.structured_reply.handle);
     length = be32toh (h->sbuf.sr.structured_reply.length);
     offset = be64toh (h->sbuf.sr.payload.offset_data.offset);
-    /* Find the command amongst the commands in flight. */
-    for (cmd = h->cmds_in_flight; cmd != NULL; cmd = cmd->next) {
-      if (cmd->handle == handle)
-        break;
-    }
     assert (cmd); /* guaranteed by CHECK */
     if (cmd->type != NBD_CMD_READ) {
@@ -336,23 +310,16 @@
   return 0;
  REPLY.STRUCTURED_REPLY.RECV_OFFSET_HOLE:
-  struct command_in_flight *cmd;
-  uint64_t handle;
+  struct command_in_flight *cmd = h->reply_cmd;
   uint64_t offset;
   uint32_t length;
   switch (recv_into_rbuf (h)) {
   case -1: SET_NEXT_STATE (%.DEAD); return -1;
   case 0:
-    handle = be64toh (h->sbuf.sr.structured_reply.handle);
     offset = be64toh (h->sbuf.sr.payload.offset_hole.offset);
     length = be32toh (h->sbuf.sr.payload.offset_hole.length);
-    /* Find the command amongst the commands in flight. */
-    for (cmd = h->cmds_in_flight; cmd != NULL; cmd = cmd->next) {
-      if (cmd->handle == handle)
-        break;
-    }
     assert (cmd); /* guaranteed by CHECK */
     if (cmd->type != NBD_CMD_READ) {
@@ -394,8 +361,7 @@
   return 0;
  REPLY.STRUCTURED_REPLY.RECV_BS_ENTRIES:
-  struct command_in_flight *cmd;
-  uint64_t handle;
+  struct command_in_flight *cmd = h->reply_cmd;
   uint32_t length;
   size_t i;
   uint32_t context_id;
@@ -404,16 +370,9 @@
   switch (recv_into_rbuf (h)) {
   case -1: SET_NEXT_STATE (%.DEAD); return -1;
   case 0:
-    handle = be64toh (h->sbuf.sr.structured_reply.handle);
     length = be32toh (h->sbuf.sr.structured_reply.length);
-    /* Find the command amongst the commands in flight. */
-    for (cmd = h->cmds_in_flight; cmd != NULL; cmd = cmd->next) {
-      if (cmd->handle == handle)
-        break;
-    }
-    /* guaranteed by CHECK */
-    assert (cmd);
+    assert (cmd); /* guaranteed by CHECK */
     assert (cmd->extent_fn);
     assert (h->bs_entries);
     assert (length >= 12);
@@ -460,8 +419,10 @@
   flags = be16toh (h->sbuf.sr.structured_reply.flags);
   if (flags & NBD_REPLY_FLAG_DONE)
     SET_NEXT_STATE (%^FINISH_COMMAND);
-  else
+  else {
+    h->reply_cmd = NULL;
     SET_NEXT_STATE (%.READY);
+  }
   return 0;
 } /* END STATE MACHINE */
diff --git a/generator/states-reply.c b/generator/states-reply.c
index f0ef47c..54f98c5 100644
--- a/generator/states-reply.c
+++ b/generator/states-reply.c
@@ -36,6 +36,8 @@
   h->rbuf = &h->sbuf;
   h->rlen = sizeof h->sbuf.simple_reply;
+  assert (h->reply_cmd == NULL);
+
   r = h->sock->ops->recv (h, h->sock, h->rbuf, h->rlen);
   if (r == -1) {
     /* This should never happen because when we enter this state we
@@ -69,16 +71,16 @@
   return 0;
  REPLY.CHECK_SIMPLE_OR_STRUCTURED_REPLY:
+  struct command_in_flight *cmd;
   uint32_t magic;
+  uint64_t handle;
   magic = be32toh (h->sbuf.simple_reply.magic);
   if (magic == NBD_SIMPLE_REPLY_MAGIC) {
     SET_NEXT_STATE (%SIMPLE_REPLY.START);
-    return 0;
   }
   else if (magic == NBD_STRUCTURED_REPLY_MAGIC) {
     SET_NEXT_STATE (%STRUCTURED_REPLY.START);
-    return 0;
   }
   else {
     SET_NEXT_STATE (%.DEAD); /* We've probably lost synchronization. */
@@ -86,6 +88,29 @@
     return -1;
   }
+  /* NB: This works for both simple and structured replies because the
+   * handle is stored at the same offset.
+   */
+  handle = be64toh (h->sbuf.simple_reply.handle);
+  /* Find the command amongst the commands in flight. */
+  for (cmd = h->cmds_in_flight; cmd != NULL; cmd = cmd->next) {
+    if (cmd->handle == handle)
+      break;
+  }
+  if (cmd == NULL) {
+    /* An unexpected structured reply could be skipped, since it
+     * includes a length; similarly an unexpected simple reply can be
+     * skipped if we assume it was not a read. However, it's more
+     * likely we've lost synchronization with the server.
+     */
+    SET_NEXT_STATE (%.DEAD);
+    set_error (0, "no matching handle found for server reply, "
+               "this is probably a bug in the server");
+    return -1;
+  }
+  h->reply_cmd = cmd;
+  return 0;
+
  REPLY.FINISH_COMMAND:
   struct command_in_flight *prev_cmd, *cmd;
   uint64_t handle;
@@ -102,6 +127,8 @@
       break;
   }
   assert (cmd != NULL);
+  assert (h->reply_cmd == cmd);
+  h->reply_cmd = NULL;
   /* Move it to the end of the cmds_done list. */
   if (prev_cmd != NULL)
diff --git a/lib/internal.h b/lib/internal.h
index 9404d65..6fde06c 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -189,6 +189,8 @@ struct nbd_handle {
    * acknowledge them.
    */
   struct command_in_flight *cmds_to_issue, *cmds_in_flight, *cmds_done;
+  /* Current command during a REPLY cycle */
+  struct command_in_flight *reply_cmd;
 };
 struct meta_context {
-- 
2.20.1
Eric Blake
2019-Jun-18  00:07 UTC
[Libguestfs] [libnbd PATCH 3/8] pread: Reject server SR read response with no data chunks
The NBD spec requires that a server doing structured reads must not
succeed unless it covers the entire buffer with reply chunks. In the
general case, this requires a lot of bookkeeping to check whether
offsets were non-overlapping and sufficient, and we'd rather defer
such checking to an optional callback function. But a callback
function will only be reached per chunk, while we still want to fail
the overall read if the callback function was never called because the
server erroneously replied with NBD_REPLY_TYPE_NONE with no other
chunks instead of an expected NBD_REPLY_TYPE_ERROR*. For this specific
error case, the bookkeeping is much simpler - we merely track if we've
seen at least one data chunk.
---
 generator/states-reply-simple.c     | 1 +
 generator/states-reply-structured.c | 2 ++
 lib/aio.c                           | 2 ++
 lib/internal.h                      | 1 +
 4 files changed, 6 insertions(+)
diff --git a/generator/states-reply-simple.c b/generator/states-reply-simple.c
index 12536e0..935f6d2 100644
--- a/generator/states-reply-simple.c
+++ b/generator/states-reply-simple.c
@@ -40,6 +40,7 @@
   if (cmd->error == 0 && cmd->type == NBD_CMD_READ) {
     h->rbuf = cmd->data;
     h->rlen = cmd->count;
+    cmd->data_seen = true;
     SET_NEXT_STATE (%RECV_READ_PAYLOAD);
   }
   else {
diff --git a/generator/states-reply-structured.c
b/generator/states-reply-structured.c
index 9bb165b..6740400 100644
--- a/generator/states-reply-structured.c
+++ b/generator/states-reply-structured.c
@@ -269,6 +269,7 @@
       return -1;
     }
     assert (cmd->data);
+    cmd->data_seen = true;
     /* Length of the data following. */
     length -= 8;
@@ -331,6 +332,7 @@
       return -1;
     }
     assert (cmd->data);
+    cmd->data_seen = true;
     /* Is the data within bounds? */
     if (offset < cmd->offset) {
diff --git a/lib/aio.c b/lib/aio.c
index 52e8892..7fb0fdf 100644
--- a/lib/aio.c
+++ b/lib/aio.c
@@ -73,6 +73,8 @@ nbd_unlocked_aio_command_completed (struct nbd_handle *h,
   type = cmd->type;
   error = cmd->error;
+  if (type == NBD_CMD_READ && !cmd->data_seen && !error)
+    error = EIO;
   /* Retire it from the list and free it. */
   if (prev_cmd != NULL)
diff --git a/lib/internal.h b/lib/internal.h
index 6fde06c..1f8f789 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -242,6 +242,7 @@ struct command_in_flight {
   uint32_t count;
   void *data; /* Buffer for read/write, opaque for block status */
   extent_fn extent_fn;
+  bool data_seen; /* For read, true if at least one data chunk seen */
   uint32_t error; /* Local errno value */
 };
-- 
2.20.1
Eric Blake
2019-Jun-18  00:07 UTC
[Libguestfs] [libnbd PATCH 4/8] states: Prepare for read callback
The next patch will add a new 'nbd_aio_pread_callback' function for
allowing the user more immediate access as each chunk of a structured
reply read is received. But before we do that, let's refactor the
command code. This includes a revert of 12843a1a, since the read
callback will need both a buffer and the user's opaque object at the
same time.
---
 generator/states-reply-structured.c |  8 ++++----
 lib/internal.h                      | 17 +++++++++++++----
 lib/rw.c                            |  9 ++++++---
 3 files changed, 23 insertions(+), 11 deletions(-)
diff --git a/generator/states-reply-structured.c
b/generator/states-reply-structured.c
index 6740400..657106e 100644
--- a/generator/states-reply-structured.c
+++ b/generator/states-reply-structured.c
@@ -123,7 +123,7 @@
       set_error (0, "invalid length in NBD_REPLY_TYPE_BLOCK_STATUS");
       return -1;
     }
-    if (cmd->extent_fn == NULL) {
+    if (cmd->cb.fn.extent == NULL) {
       SET_NEXT_STATE (%.DEAD);
       set_error (0, "not expecting NBD_REPLY_TYPE_BLOCK_STATUS
here");
       return -1;
@@ -375,7 +375,7 @@
     length = be32toh (h->sbuf.sr.structured_reply.length);
     assert (cmd); /* guaranteed by CHECK */
-    assert (cmd->extent_fn);
+    assert (cmd->cb.fn.extent);
     assert (h->bs_entries);
     assert (length >= 12);
@@ -401,8 +401,8 @@
       if (meta_context) {
         /* Call the caller's extent function. */
         errno = 0;
-        if (cmd->extent_fn (cmd->data, meta_context->name,
cmd->offset,
-                            &h->bs_entries[1], (length-4) / 4) == -1)
+        if (cmd->cb.fn.extent (cmd->cb.opaque, meta_context->name,
cmd->offset,
+                               &h->bs_entries[1], (length-4) / 4) == -1)
           cmd->error = errno ? errno : EPROTO;
       }
       else
diff --git a/lib/internal.h b/lib/internal.h
index 1f8f789..cb0e170 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -231,7 +231,16 @@ struct socket {
   const struct socket_ops *ops;
 };
-typedef int (*extent_fn) (void *data, const char *metacontext, uint64_t offset,
uint32_t *entries, size_t nr_entries);
+typedef int (*extent_fn) (void *data, const char *metacontext, uint64_t offset,
+                          uint32_t *entries, size_t nr_entries);
+
+struct command_cb {
+  void *opaque;
+  union {
+    extent_fn extent;
+    /* More to come */
+  } fn;
+};
 struct command_in_flight {
   struct command_in_flight *next;
@@ -240,8 +249,8 @@ struct command_in_flight {
   uint64_t handle;
   uint64_t offset;
   uint32_t count;
-  void *data; /* Buffer for read/write, opaque for block status */
-  extent_fn extent_fn;
+  void *data; /* Buffer for read/write */
+  struct command_cb cb;
   bool data_seen; /* For read, true if at least one data chunk seen */
   uint32_t error; /* Local errno value */
 };
@@ -300,7 +309,7 @@ extern const char *nbd_internal_name_of_nbd_cmd (uint16_t
type);
 extern int64_t nbd_internal_command_common (struct nbd_handle *h,
                                             uint16_t flags, uint16_t type,
                                             uint64_t offset, uint64_t count,
-                                            void *data, extent_fn extent);
+                                            void *data, struct command_cb *cb);
 /* socket.c */
 struct socket *nbd_internal_socket_create (int fd);
diff --git a/lib/rw.c b/lib/rw.c
index ad9c8a0..dc81c57 100644
--- a/lib/rw.c
+++ b/lib/rw.c
@@ -143,7 +143,7 @@ int64_t
 nbd_internal_command_common (struct nbd_handle *h,
                              uint16_t flags, uint16_t type,
                              uint64_t offset, uint64_t count, void *data,
-                             extent_fn extent)
+                             struct command_cb *cb)
 {
   struct command_in_flight *cmd, *prev_cmd;
@@ -183,7 +183,8 @@ nbd_internal_command_common (struct nbd_handle *h,
   cmd->offset = offset;
   cmd->count = count;
   cmd->data = data;
-  cmd->extent_fn = extent;
+  if (cb)
+    cmd->cb = *cb;
   /* If structured replies were negotiated then we trust the server to
    * send back sufficient data to cover the whole buffer.  It's tricky
@@ -360,6 +361,8 @@ nbd_unlocked_aio_block_status (struct nbd_handle *h,
                                void *data, extent_fn extent,
                                uint32_t flags)
 {
+  struct command_cb cb = { .opaque = data, .fn.extent = extent, };
+
   if (!h->structured_replies) {
     set_error (ENOTSUP, "server does not support structured
replies");
     return -1;
@@ -383,5 +386,5 @@ nbd_unlocked_aio_block_status (struct nbd_handle *h,
   }
   return nbd_internal_command_common (h, flags, NBD_CMD_BLOCK_STATUS, offset,
-                                      count, data, extent);
+                                      count, NULL, &cb);
 }
-- 
2.20.1
Eric Blake
2019-Jun-18  00:07 UTC
[Libguestfs] [libnbd PATCH 5/8] states: Wire in a read callback
When a server supports structured reads, knowing where the holes are
in the responses can aid the client's behavior.  Furthermore, since
the server can send data out of order or interleaved with other
replies, a client may be able to start operating on the partial
results as soon as they are available rather than waiting for the
entire buffer to be reconstructed.  As such, it makes sense to have an
optional callback invoked each time we finish reading a chunk (for
simple replies, we behave as if everything was a single data chunk).
This also requires special handling of NBD_REPLY_TYPE_ERROR_OFFSET, as
that is the one situation where we want to call the callback to inform
them that the server pointed out a specific offset for the error; all
other errors continue to affect only the final pread return value
without any use of the callback.  If any callback fails, and if no
prior error was set, then the callback's failure becomes the failure
reason for the overall read.
Nothing actually passes a callback function yet, so for now this is no
functional change; but this will make it possible for the next patch
to add an 'nbd_aio_pread_callback' API.
---
 generator/generator                 |  4 +++
 generator/states-reply-simple.c     | 15 +++++++++-
 generator/states-reply-structured.c | 43 +++++++++++++++++++++++++++--
 lib/internal.h                      |  4 ++-
 4 files changed, 62 insertions(+), 4 deletions(-)
diff --git a/generator/generator b/generator/generator
index 4c81859..2614689 100755
--- a/generator/generator
+++ b/generator/generator
@@ -1934,6 +1934,10 @@ let constants = [
   "CMD_FLAG_FUA",        1 lsl 0;
   "CMD_FLAG_NO_HOLE",    1 lsl 1;
   "CMD_FLAG_REQ_ONE",    1 lsl 3;
+
+  "READ_DATA",           1;
+  "READ_HOLE",           2;
+  "READ_ERROR",          3;
 ]
 (*----------------------------------------------------------------------*)
diff --git a/generator/states-reply-simple.c b/generator/states-reply-simple.c
index 935f6d2..ddc91ce 100644
--- a/generator/states-reply-simple.c
+++ b/generator/states-reply-simple.c
@@ -49,9 +49,22 @@
   return 0;
  REPLY.SIMPLE_REPLY.RECV_READ_PAYLOAD:
+  struct command_in_flight *cmd = h->reply_cmd;
+
   switch (recv_into_rbuf (h)) {
   case -1: SET_NEXT_STATE (%.DEAD); return -1;
-  case 0:  SET_NEXT_STATE (%^FINISH_COMMAND);
+  case 0:
+    /* guaranteed by START */
+    assert (cmd);
+    if (cmd->cb.fn.read) {
+      assert (cmd->error == 0);
+      errno = 0;
+      if (cmd->cb.fn.read (cmd->cb.opaque, cmd->data, cmd->count,
+                           cmd->offset, LIBNBD_READ_DATA) == -1)
+        cmd->error = errno ? errno : EPROTO;
+    }
+
+    SET_NEXT_STATE (%^FINISH_COMMAND);
   }
   return 0;
diff --git a/generator/states-reply-structured.c
b/generator/states-reply-structured.c
index 657106e..00659de 100644
--- a/generator/states-reply-structured.c
+++ b/generator/states-reply-structured.c
@@ -225,7 +225,9 @@
     assert (cmd); /* guaranteed by CHECK */
-    /* Sanity check that any error offset is in range */
+    /* Sanity check that any error offset is in range, then invoke
+     * user callback if present.
+     */
     if (type == NBD_REPLY_TYPE_ERROR_OFFSET) {
       offset = be64toh (h->sbuf.sr.payload.error.offset);
       if (offset < cmd->offset || offset >= cmd->offset +
cmd->count) {
@@ -237,6 +239,18 @@
                    offset, cmd->offset, cmd->count);
         return -1;
       }
+      if (cmd->cb.fn.read) {
+        /* Different from successful reads: if the callback clears
+         * errno but still fails, then use the server's error below.
+         */
+        errno = nbd_internal_errno_of_nbd_error (error);
+        set_error (errno, "server reported read failure at offset
0x%" PRIx64,
+                   offset);
+        if (cmd->cb.fn.read (cmd->cb.opaque, cmd->data + (offset -
cmd->offset),
+                             0, offset, LIBNBD_READ_ERROR) == -1)
+          if (cmd->error == 0 && errno)
+            cmd->error = errno;
+      }
     }
     /* Preserve first error encountered */
@@ -304,9 +318,27 @@
   return 0;
  REPLY.STRUCTURED_REPLY.RECV_OFFSET_DATA_DATA:
+  struct command_in_flight *cmd = h->reply_cmd;
+  uint64_t offset;
+  uint32_t length;
+
   switch (recv_into_rbuf (h)) {
   case -1: SET_NEXT_STATE (%.DEAD); return -1;
-  case 0:  SET_NEXT_STATE (%FINISH);
+  case 0:
+    length = be32toh (h->sbuf.sr.structured_reply.length);
+    offset = be64toh (h->sbuf.sr.payload.offset_data.offset);
+
+    assert (cmd); /* guaranteed by CHECK */
+    if (cmd->cb.fn.read) {
+      errno = 0;
+      if (cmd->cb.fn.read (cmd->cb.opaque, cmd->data + (offset -
cmd->offset),
+                           length - sizeof offset, offset,
+                           LIBNBD_READ_DATA) == -1)
+        if (cmd->error == 0)
+          cmd->error = errno ? errno : EPROTO;
+    }
+
+    SET_NEXT_STATE (%FINISH);
   }
   return 0;
@@ -357,6 +389,13 @@
     }
     memset (cmd->data + offset, 0, length);
+    if (cmd->cb.fn.read) {
+      errno = 0;
+      if (cmd->cb.fn.read (cmd->cb.opaque, cmd->data + offset, length,
+                           cmd->offset + offset, LIBNBD_READ_HOLE) == -1)
+        if (cmd->error == 0)
+          cmd->error = errno ? errno : EPROTO;
+    }
     SET_NEXT_STATE(%FINISH);
   }
diff --git a/lib/internal.h b/lib/internal.h
index cb0e170..a1e27df 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -233,12 +233,14 @@ struct socket {
 typedef int (*extent_fn) (void *data, const char *metacontext, uint64_t offset,
                           uint32_t *entries, size_t nr_entries);
+typedef int (*read_fn) (void *data, const void *buf, size_t count,
+                        uint64_t offset, int status);
 struct command_cb {
   void *opaque;
   union {
     extent_fn extent;
-    /* More to come */
+    read_fn read;
   } fn;
 };
-- 
2.20.1
Eric Blake
2019-Jun-18  00:07 UTC
[Libguestfs] [libnbd PATCH 6/8] states: Add nbd_pread_callback API
Time to wire up user access to react to structured reads as the chunks
come in, exposing the framework added in the last patch.
I chose to use an int for the callback status rather than an enum for
ease of wiring things up to the various language bindings.  Still, it
required quite a few tweaks to the generator to support an Int type in
a callback.
It's easy to test callback behavior for LIBNBD_READ_DATA/HOLE (the
next patch will add interop tests with qemu-nbd), but at present, I
don't know of any server that responds with
NBD_REPLY_TYPE_ERROR_OFFSET, which in turn makes testing
LIBNBD_READ_ERROR difficult. I used the following hack on top of
qemu.git commit d1bf88e5 to trigger an offset error for local testing:
| diff --git i/nbd/server.c w/nbd/server.c
| index 10faedcfc55d..804dced0bc8d 100644
| --- i/nbd/server.c
| +++ w/nbd/server.c
| @@ -1838,12 +1838,30 @@ static int coroutine_fn
nbd_co_send_sparse_read(NBDClient *client,
|
|              trace_nbd_co_send_structured_read_hole(handle, offset + progress,
|                                                     pnum);
| -            set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
| +            set_be_chunk(&chunk.h, 0,
|                           NBD_REPLY_TYPE_OFFSET_HOLE,
|                           handle, sizeof(chunk) - sizeof(chunk.h));
|              stq_be_p(&chunk.offset, offset + progress);
|              stl_be_p(&chunk.length, pnum);
|              ret = nbd_co_send_iov(client, iov, 1, errp);
| +            if (ret == 0) {
| +                NBDStructuredError chunk;
| +                int64_t off;
| +                struct iovec iov[] = {
| +                    {.iov_base = &chunk, .iov_len = sizeof(chunk)},
| +                    {.iov_base = (char*)"MSG", .iov_len = 3},
| +                    {.iov_base = &off, .iov_len = sizeof(off)},
| +                };
| +
| +                set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
| +                             NBD_REPLY_TYPE_ERROR_OFFSET,
| +                             handle, sizeof(chunk) - sizeof(chunk.h) +
| +                             3 + sizeof(off));
| +                stl_be_p(&chunk.error, NBD_EPERM);
| +                stw_be_p(&chunk.message_length, 3);
| +                stq_be_p(&off, offset + progress);
| +                ret = nbd_co_send_iov(client, iov, 3, errp);
| +            }
|          } else {
|              ret = blk_pread(exp->blk, offset + progress +
exp->dev_offset,
|                              data + progress, pnum);
---
 generator/generator | 113 +++++++++++++++++++++++++++++++++++++++-----
 lib/rw.c            |  32 +++++++++++++
 2 files changed, 134 insertions(+), 11 deletions(-)
diff --git a/generator/generator b/generator/generator
index 2614689..ce77f17 100755
--- a/generator/generator
+++ b/generator/generator
@@ -1305,7 +1305,72 @@ Issue a read command to the NBD server for the range
starting
 at C<offset> and ending at C<offset> + C<count> - 1.  NBD
 can only read all or nothing using this call.  The call
 returns when the data has been read fully into C<buf> or there is an
-error.
+error.  See also C<nbd_pread_callback>, if finer visibility is
+required into the server's replies.
+
+The C<flags> parameter must be C<0> for now (it exists for future
NBD
+protocol extensions).";
+  };
+
+  "pread_callback", {
+    default_call with
+    args = [ BytesOut ("buf", "count"); UInt64
"offset";
+             Opaque "data";
+             Callback ("chunk", [Opaque "data";
+                                 BytesIn ("buf", "count");
UInt64 "offset";
+                                 Int "status";]);
+             Flags "flags" ];
+    ret = RErr;
+    permitted_states = [ Connected ];
+    shortdesc = "read from the NBD server";
+    longdesc = "\
+Issue a read command to the NBD server for the range starting
+at C<offset> and ending at C<offset> + C<count> - 1.  The
server's
+response may be subdivided into chunks which may arrive out of order
+before reassembly into the original buffer; the C<chunk> callback
+is used for notification after each chunk arrives, and may perform
+additional sanity checking on the server's reply. The callback cannot
+call C<nbd_*> APIs on the same handle since it holds the handle lock
+and will cause a deadlock.  If the callback returns C<-1>, and no
+earlier error has been detected, then the overall read command will
+fail with the same value of C<errno> left after the failing callback;
+but any further chunks will still invoke the callback.
+
+The C<chunk> function is called once per chunk of data received.
+The C<buf> and C<count> parameters represent the subset of the
original
+buffer which has just been populated by results from the server. The
+C<offset> parameter represents the absolute offset at which C<buf>
+begins within the image (note that this is not the relative offset
+of C<buf> within the original buffer). The C<status> parameter is
+one of
+
+=over 4
+
+=item C<LIBNBD_READ_DATA> = 1
+
+C<buf> was populated with C<count> bytes of data.
+
+=item C<LIBNBD_READ_HOLE> = 2
+
+C<buf> represents a hole, and contains C<count> NUL bytes.
+
+=item C<LIBNBD_READ_ERR> = 3
+
+C<count> is 0, and C<buf> represents the offset of where an error
+occurred. The error is visible in C<errno> or by calling
+C<nbd_get_errno>.
+
+=back
+
+It is possible for the C<chunk> function to be called more times than
+you expect (if the server is buggy). It is also possible that the
+C<chunk> function is not called at all (in particular,
+C<LIBNBD_READ_ERROR> is used only when an error is associated with a
+particular offset), but you are guaranteed that the callback was
+called at least once if the overall read succeeds. Libnbd does not
+validate that the server obeyed the requirement that a read call must
+not have overlapping chunks and must not succeed without enough chunks
+to cover the entire request.
 The C<flags> parameter must be C<0> for now (it exists for future
NBD
 protocol extensions).";
@@ -1591,6 +1656,26 @@ C<buf> is valid until the command has completed. 
Other
 parameters behave as documented in C<nbd_pread>.";
   };
+  "aio_pread_callback", {
+    default_call with
+    args = [ BytesPersistOut ("buf", "count"); UInt64
"offset";
+             Opaque "data";
+             CallbackPersist ("chunk", [Opaque "data";
+                                        BytesIn ("buf",
"count");
+                                        UInt64 "offset";
+                                        Int "status";]);
+             Flags "flags" ];
+    ret = RInt64;
+    permitted_states = [ Connected ];
+    shortdesc = "read from the NBD server";
+    longdesc = "\
+Issue a read command to the NBD server.  This returns the
+unique positive 64 bit handle for this command, or C<-1> on
+error.  To check if the command completed, call
+C<nbd_aio_command_completed>.  Parameters behave as documented
+in C<nbd_pread_callback>.";
+  };
+
   "aio_pwrite", {
     default_call with
     args = [ BytesPersistIn ("buf", "count"); UInt64
"offset"; Flags "flags" ];
@@ -3264,7 +3349,8 @@ let print_python_binding name { args; ret }             
pr "  PyObject *py_%s = PyList_New (%s);\n" n len;
             pr "  for (size_t i = 0; i < %s; ++i)\n" len;
             pr "    PyList_SET_ITEM (py_%s, i, PyLong_FromUnsignedLong
(%s[i]));\n" n n
-         | BytesIn _ -> ()
+         | BytesIn _
+         | Int _ -> ()
          | Opaque n ->
             pr "  struct %s_%s_data *_data = %s;\n" name cb_name n
          | String n
@@ -3272,7 +3358,7 @@ let print_python_binding name { args; ret }           (*
The following not yet implemented for callbacks XXX *)
          | ArrayAndLen _ | Bool _ | BytesOut _
          | BytesPersistIn _ | BytesPersistOut _ | Callback _ | CallbackPersist
_
-         | Flags _ | Int _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _
+         | Flags _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _
          | UInt _ | UInt32 _ -> assert false
        ) args;
        pr "\n";
@@ -3282,13 +3368,14 @@ let print_python_binding name { args; ret }          
function
          | ArrayAndLen (UInt32 n, len) -> pr " \"O\""
          | BytesIn (n, len) -> pr " \"y#\""
+         | Int n -> pr " \"i\""
          | Opaque n -> pr " \"O\""
          | String n -> pr " \"s\""
          | UInt64 n -> pr " \"K\""
          (* The following not yet implemented for callbacks XXX *)
          | ArrayAndLen _ | Bool _ | BytesOut _
          | BytesPersistIn _ | BytesPersistOut _ | Callback _ | CallbackPersist
_
-         | Flags _ | Int _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _
+         | Flags _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _
          | UInt _ | UInt32 _ -> assert false
        ) args;
        pr " \")\"";
@@ -3297,12 +3384,13 @@ let print_python_binding name { args; ret }           |
ArrayAndLen (UInt32 n, _) -> pr ", py_%s" n
          | BytesIn (n, len) -> pr ", %s, (int) %s" n len
          | Opaque _ -> pr ", _data->data"
+         | Int n
          | String n
          | UInt64 n -> pr ", %s" n
          (* The following not yet implemented for callbacks XXX *)
          | ArrayAndLen _ | Bool _ | BytesOut _
          | BytesPersistIn _ | BytesPersistOut _ | Callback _ | CallbackPersist
_
-         | Flags _ | Int _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _
+         | Flags _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _
          | UInt _ | UInt32 _ -> assert false
        ) args;
        pr ");\n";
@@ -3332,13 +3420,14 @@ let print_python_binding name { args; ret }           |
ArrayAndLen (UInt32 n, _) ->
             pr "  Py_DECREF (py_%s);\n" n
          | BytesIn _
+         | Int _
+         | Opaque _
          | String _
-         | UInt64 _
-         | Opaque _ -> ()
+         | UInt64 _ -> ()
          (* The following not yet implemented for callbacks XXX *)
          | ArrayAndLen _ | Bool _ | BytesOut _
          | BytesPersistIn _ | BytesPersistOut _ | Callback _ | CallbackPersist
_
-         | Flags _ | Int _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _
+         | Flags _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _
          | UInt _ | UInt32 _ -> assert false
        ) args;
        pr "  return ret;\n";
@@ -3989,13 +4078,13 @@ let print_ocaml_binding (name, { args; ret })          
List.map (
            function
            | ArrayAndLen (UInt32 n, _) | BytesIn (n, _)
-           | String n | UInt64 n | Opaque n ->
+           | Int n | Opaque n | String n | UInt64 n ->
               n ^ "v"
            (* The following not yet implemented for callbacks XXX *)
            | ArrayAndLen _ | Bool _ | BytesOut _
            | BytesPersistIn _ | BytesPersistOut _
            | Callback _ | CallbackPersist _
-           | Flags _ | Int _ | Int64 _ | Path _
+           | Flags _ | Int64 _ | Path _
            | SockAddrAndLen _ | StringList _
            | UInt _ | UInt32 _ -> assert false
          ) args in
@@ -4021,6 +4110,8 @@ let print_ocaml_binding (name, { args; ret })           |
BytesIn (n, len) ->
             pr "  %sv = caml_alloc_string (%s);\n" n len;
             pr "  memcpy (String_val (%sv), %s, %s);\n" n n len
+         | Int n ->
+            pr "  %sv = caml_copy_int32 (%s);\n" n n
          | String n ->
             pr "  %sv = caml_copy_string (%s);\n" n n
          | UInt64 n ->
@@ -4032,7 +4123,7 @@ let print_ocaml_binding (name, { args; ret })           (*
The following not yet implemented for callbacks XXX *)
          | ArrayAndLen _ | Bool _ | BytesOut _
          | BytesPersistIn _ | BytesPersistOut _ | Callback _ | CallbackPersist
_
-         | Flags _ | Int _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _
+         | Flags _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _
          | UInt _ | UInt32 _ -> assert false
        ) args;
diff --git a/lib/rw.c b/lib/rw.c
index dc81c57..669987e 100644
--- a/lib/rw.c
+++ b/lib/rw.c
@@ -55,6 +55,22 @@ nbd_unlocked_pread (struct nbd_handle *h, void *buf,
   return wait_for_command (h, ch);
 }
+/* Issue a read command with callbacks and wait for the reply. */
+int
+nbd_unlocked_pread_callback (struct nbd_handle *h, void *buf,
+                             size_t count, uint64_t offset,
+                             void *opaque, read_fn read, uint32_t flags)
+{
+  int64_t ch;
+
+  ch = nbd_unlocked_aio_pread_callback (h, buf, count, offset,
+                                        opaque, read, flags);
+  if (ch == -1)
+    return -1;
+
+  return wait_for_command (h, ch);
+}
+
 /* Issue a write command and wait for the reply. */
 int
 nbd_unlocked_pwrite (struct nbd_handle *h, const void *buf,
@@ -231,6 +247,22 @@ nbd_unlocked_aio_pread (struct nbd_handle *h, void *buf,
                                       buf, NULL);
 }
+int64_t
+nbd_unlocked_aio_pread_callback (struct nbd_handle *h, void *buf,
+                                 size_t count, uint64_t offset,
+                                 void *opaque, read_fn read, uint32_t flags)
+{
+  struct command_cb cb = { .opaque = opaque, .fn.read = read, };
+
+  if (flags != 0) {
+    set_error (EINVAL, "invalid flag: %" PRIu32, flags);
+    return -1;
+  }
+
+  return nbd_internal_command_common (h, 0, NBD_CMD_READ, offset, count,
+                                      buf, &cb);
+}
+
 int64_t
 nbd_unlocked_aio_pwrite (struct nbd_handle *h, const void *buf,
                          size_t count, uint64_t offset,
-- 
2.20.1
Eric Blake
2019-Jun-18  00:07 UTC
[Libguestfs] [libnbd PATCH 7/8] states: Add tests for nbd_pread_callback
With a new enough qemu-nbd and a file system that supports a hole, we
can test that nbd_pread_callback for sane operation with
interop/structured-read.
It is also possible to test that the callback behaves sanely even for
a connection lacks structured replies, using nbdkit in tests/oldstyle.
---
 .gitignore                     |   1 +
 interop/Makefile.am            |  11 ++-
 interop/structured-read.c      | 165 +++++++++++++++++++++++++++++++++
 interop/structured-read.sh     |  57 ++++++++++++
 python/t/405-pread-callback.py |  36 +++++++
 tests/oldstyle.c               |  74 ++++++++++++++-
 6 files changed, 339 insertions(+), 5 deletions(-)
 create mode 100644 interop/structured-read.c
 create mode 100755 interop/structured-read.sh
 create mode 100644 python/t/405-pread-callback.py
diff --git a/.gitignore b/.gitignore
index 30438c1..ea496ac 100644
--- a/.gitignore
+++ b/.gitignore
@@ -53,6 +53,7 @@ Makefile.in
 /interop/interop-qemu-nbd
 /interop/interop-qemu-nbd-tls-certs
 /interop/interop-qemu-nbd-tls-psk
+/interop/structured-read
 /lib/api.c
 /lib/libnbd.pc
 /lib/libnbd.syms
diff --git a/interop/Makefile.am b/interop/Makefile.am
index eb4b52b..6e1156d 100644
--- a/interop/Makefile.am
+++ b/interop/Makefile.am
@@ -47,10 +47,12 @@ check_PROGRAMS += \
 	interop-qemu-nbd \
 	interop-qemu-nbd-tls-certs \
 	interop-qemu-nbd-tls-psk \
-	dirty-bitmap
+	dirty-bitmap \
+	structured-read
 TESTS += \
 	interop-qemu-nbd \
-	dirty-bitmap.sh
+	dirty-bitmap.sh \
+	structured-read.sh
 # tls tests assume the pre-existence of files created in ../tests/Makefile.am,
 # so we can only run them under the same conditions used there
@@ -107,6 +109,11 @@ dirty_bitmap_CPPFLAGS = -I$(top_srcdir)/include
 dirty_bitmap_CFLAGS = $(WARNINGS_CFLAGS)
 dirty_bitmap_LDADD = $(top_builddir)/lib/libnbd.la
+structured_read_SOURCES = structured-read.c
+structured_read_CPPFLAGS = -I$(top_srcdir)/include
+structured_read_CFLAGS = $(WARNINGS_CFLAGS)
+structured_read_LDADD = $(top_builddir)/lib/libnbd.la
+
 endif HAVE_QEMU_NBD
 check-valgrind:
diff --git a/interop/structured-read.c b/interop/structured-read.c
new file mode 100644
index 0000000..b740e98
--- /dev/null
+++ b/interop/structured-read.c
@@ -0,0 +1,165 @@
+/* NBD client library in userspace
+ * Copyright (C) 2013-2019 Red Hat Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Test structured reply read callback. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <stdbool.h>
+#include <errno.h>
+
+#include <libnbd.h>
+
+static const char *unixsocket;
+
+/* Depends on structured-read.sh setting things up so that qemu-nbd
+ * exposes an image with a 512-byte hole at offset 2048 followed by a
+ * 512-byte data section containing all '1' bytes at offset 2560
+ * (non-zero offsets to test that everything is calculated correctly).
+ */
+static char rbuf[1024];
+
+struct data {
+  //XXX  bool df;    /* input: true if DF flag was passed to request */
+  int count;       /* input: count of expected remaining calls */
+  bool fail;       /* input: true to return failure */
+  bool seen_hole;  /* output: true if hole encountered */
+  bool seen_data;  /* output: true if data encountered */
+};
+
+static int
+read_cb (void *opaque, const void *bufv, size_t count, uint64_t offset,
+         int status)
+{
+  struct data *data = opaque;
+  const char *buf = bufv;
+
+  /* The NBD spec allows chunks to be reordered; we are relying on the
+   * fact that qemu-nbd does not do so.
+   */
+  assert (data->count-- > 0);
+
+  switch (status) {
+  case LIBNBD_READ_DATA:
+    // XXX if (df...)
+    assert (buf == rbuf + 512);
+    assert (count == 512);
+    assert (offset == 2048 + 512);
+    assert (buf[0] == 1 && memcmp (buf, buf + 1, 511) == 0);
+    assert (!data->seen_data);
+    data->seen_data = true;
+    break;
+  case LIBNBD_READ_HOLE:
+    assert (buf == rbuf);
+    assert (count == 512);
+    assert (offset == 2048);
+    assert (buf[0] == 0 && memcmp (buf, buf + 1, 511) == 0);
+    assert (!data->seen_hole);
+    data->seen_hole = true;
+    break;
+  case LIBNBD_READ_ERROR:
+    /* For now, qemu-nbd cannot provoke this status. */
+  default:
+    assert (false);
+  }
+
+  if (data->fail) {
+    /* Something NBD servers can't send */
+    errno = data->count == 1 ? EPROTO : ECONNREFUSED;
+    return -1;
+  }
+  return 0;
+}
+
+int
+main (int argc, char *argv[])
+{
+  struct nbd_handle *nbd;
+  int64_t exportsize;
+  struct data data;
+  char c;
+
+  if (argc != 2) {
+    fprintf (stderr, "%s unixsocket\n", argv[0]);
+    exit (EXIT_FAILURE);
+  }
+  unixsocket = argv[1];
+
+  nbd = nbd_create ();
+  if (nbd == NULL) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    exit (EXIT_FAILURE);
+  }
+
+  if (nbd_connect_unix (nbd, unixsocket) == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    exit (EXIT_FAILURE);
+  }
+
+  exportsize = nbd_get_size (nbd);
+  if (exportsize == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    exit (EXIT_FAILURE);
+  }
+  if (exportsize != 3072) {
+    fprintf (stderr, "unexpected file size\n");
+    exit (EXIT_FAILURE);
+  }
+
+  memset (rbuf, 2, sizeof rbuf);
+  data = (struct data) { .count = 2, };
+  if (nbd_pread_callback (nbd, rbuf, sizeof rbuf, 2048, &data, read_cb,
+                          0) == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    exit (EXIT_FAILURE);
+  }
+  assert (data.seen_data && data.seen_hole);
+
+  // XXX Repeat with DF flag
+
+  /* Trigger a failed callback, to prove connection stays up. With
+   * reads, all chunks trigger a callback even after failure, but the
+   * first errno sticks.
+   */
+  memset (rbuf, 2, sizeof rbuf);
+  data = (struct data) { .count = 2, .fail = true, };
+  if (nbd_pread_callback (nbd, rbuf, sizeof rbuf, 2048, &data, read_cb,
+                          0) != -1) {
+    fprintf (stderr, "unexpected pread callback success\n");
+    exit (EXIT_FAILURE);
+  }
+  assert (nbd_get_errno () == EPROTO && nbd_aio_is_ready (nbd));
+  assert (data.seen_data && data.seen_hole);
+
+  if (nbd_pread (nbd, &c, 1, 0, 0) == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    exit (EXIT_FAILURE);
+  }
+
+  if (nbd_shutdown (nbd) == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    exit (EXIT_FAILURE);
+  }
+
+  nbd_close (nbd);
+
+  exit (EXIT_SUCCESS);
+}
diff --git a/interop/structured-read.sh b/interop/structured-read.sh
new file mode 100755
index 0000000..15a81c0
--- /dev/null
+++ b/interop/structured-read.sh
@@ -0,0 +1,57 @@
+#!/usr/bin/env bash
+# nbd client library in userspace
+# Copyright (C) 2019 Red Hat Inc.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+# Test structured read callbacks.
+
+source ../tests/functions.sh
+set -e
+set -x
+
+requires qemu-img --version
+requires qemu-io --version
+requires qemu-nbd --version
+
+files="structured-read.sock structured-read.qcow2"
+rm -f $files
+cleanup_fn rm -f $files
+
+# Create file with cluster size 512 and contents all 1 except for single
+# 512-byte hole at offset 2048
+qemu-img create -f qcow2 -o cluster_size=512,compat=v3 structured-read.qcow2 3k
+qemu-io -d unmap -f qcow2 -c 'w -P 1 0 3k' -c 'w -zu 2k 512' \
+	structured-read.qcow2
+
+qemu-nbd -k $PWD/structured-read.sock -f qcow2 structured-read.qcow2 &
+qemu_pid=$!
+cleanup_fn kill $qemu_pid
+
+# qemu-nbd --pid not available before 4.1, so ...
+for ((i = 0; i < 300; i++)); do
+  if [ -r $PWD/structured-read.sock ]; then
+    break
+  fi
+  kill -s 0 $qemu_pid 2>/dev/null
+  if test $? != 0; then
+    echo "qemu-nbd unexpectedly quit" 2>&1
+    exit 1
+  fi
+  sleep 0.1
+done
+
+# Run the test.
+$VG ./structured-read structured-read.sock
diff --git a/python/t/405-pread-callback.py b/python/t/405-pread-callback.py
new file mode 100644
index 0000000..7946dac
--- /dev/null
+++ b/python/t/405-pread-callback.py
@@ -0,0 +1,36 @@
+# libnbd Python bindings
+# Copyright (C) 2010-2019 Red Hat Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+import nbd
+
+h = nbd.NBD ()
+h.connect_command (["nbdkit", "-s",
"--exit-with-parent", "-v",
+                    "pattern", "size=512"])
+
+expected =
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00
\x00\x00\x00\x00\x00\x00\x00(\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x00\x008\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00H\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00X\x00\x00\x00\x00\x00\x00\x00`\x00\x00\x00\x00\x00\x00\x00h\x00\x00\x00\x00\x00\x00\x00p\x00\x00\x00\x00\x00\x00\x00x\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x88\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00\x98\x00\x00\x00\x00\x00\x00\x00\xa0\x00\x00\x00\x00\x00\x00\x00\xa8\x00\x00\x00\x00\x00\x00\x00\xb0\x00\x00\x00\x00\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x00\xc8\x00\x00\x00\x00\x00\x00\x00\xd0\x00\x00\x00\x00\x00\x00\x00\xd8\x00\x00\x00\x00\x00\x00\x00\xe0\x00\x00\x00\x00\x00\x00\x00\xe8\x00\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\xf8\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x08\x00\x00\x00\x00\x00\x00\x01\x10\x00\x00\x00\x00\x00\x00\x01\x18\x00\x00\x00\x00\x00\x00\x01
\x00\x00\x00\x00\x00\x00\x01(\x00\x00\x00\x00\x00\x00\x010\x00\x00\x00\x00\x00\x00\x018\x00\x00\x00\x00\x00\x00\x01@\x00\x00\x00\x00\x00\x00\x01H\x00\x00\x00\x00\x00\x00\x01P\x00\x00\x00\x00\x00\x00\x01X\x00\x00\x00\x00\x00\x00\x01`\x00\x00\x00\x00\x00\x00\x01h\x00\x00\x00\x00\x00\x00\x01p\x00\x00\x00\x00\x00\x00\x01x\x00\x00\x00\x00\x00\x00\x01\x80\x00\x00\x00\x00\x00\x00\x01\x88\x00\x00\x00\x00\x00\x00\x01\x90\x00\x00\x00\x00\x00\x00\x01\x98\x00\x00\x00\x00\x00\x00\x01\xa0\x00\x00\x00\x00\x00\x00\x01\xa8\x00\x00\x00\x00\x00\x00\x01\xb0\x00\x00\x00\x00\x00\x00\x01\xb8\x00\x00\x00\x00\x00\x00\x01\xc0\x00\x00\x00\x00\x00\x00\x01\xc8\x00\x00\x00\x00\x00\x00\x01\xd0\x00\x00\x00\x00\x00\x00\x01\xd8\x00\x00\x00\x00\x00\x00\x01\xe0\x00\x00\x00\x00\x00\x00\x01\xe8\x00\x00\x00\x00\x00\x00\x01\xf0\x00\x00\x00\x00\x00\x00\x01\xf8'
+
+def f (data, buf2, offset, s):
+    assert data == 42
+    assert buf2 == expected
+    assert offset == 0
+    assert s == nbd.READ_DATA
+
+buf = h.pread_callback (512, 0, 42, f)
+
+print ("%r" % buf)
+
+assert buf == expected
diff --git a/tests/oldstyle.c b/tests/oldstyle.c
index a0b594c..920136a 100644
--- a/tests/oldstyle.c
+++ b/tests/oldstyle.c
@@ -23,6 +23,7 @@
 #include <stdint.h>
 #include <inttypes.h>
 #include <string.h>
+#include <errno.h>
 #include <libnbd.h>
@@ -30,14 +31,52 @@
 #define XSTR(s) #s
 #define STR(s) XSTR(s)
+static char wbuf[512] = { 1, 2, 3, 4 }, rbuf[512];
+static const char *progname;
+
+static int
+pread_cb (void *data, const void *buf, size_t count, uint64_t offset,
+          int status)
+{
+  int *calls = data;
+  ++*calls;
+
+  if (buf != rbuf || count != sizeof rbuf) {
+    fprintf (stderr, "%s: callback called with wrong buffer\n",
progname);
+    exit (EXIT_FAILURE);
+  }
+  if (offset != 2 * sizeof rbuf) {
+    fprintf (stderr, "%s: callback called with wrong offset\n",
progname);
+    exit (EXIT_FAILURE);
+  }
+  if (status != LIBNBD_READ_DATA) {
+    fprintf (stderr, "%s: callback called with wrong status\n",
progname);
+    exit (EXIT_FAILURE);
+  }
+
+  if (memcmp (rbuf, wbuf, sizeof rbuf) != 0) {
+    fprintf (stderr, "%s: DATA INTEGRITY ERROR!\n", progname);
+    exit (EXIT_FAILURE);
+  }
+
+  if (*calls > 1) {
+    errno = EPROTO; /* Something NBD servers can't send */
+    return -1;
+  }
+
+  return 0;
+}
+
 int
 main (int argc, char *argv[])
 {
   struct nbd_handle *nbd;
-  char wbuf[512] = { 1, 2, 3, 4 }, rbuf[512];
   int64_t r;
   char *args[] = { "nbdkit", "-s", "-o",
"--exit-with-parent", "-v",
                    "memory", "size=" STR(SIZE), NULL };
+  int calls = 0;
+
+  progname = argv[0];
   nbd = nbd_create ();
   if (nbd == NULL) {
@@ -61,12 +100,13 @@ main (int argc, char *argv[])
     exit (EXIT_FAILURE);
   }
-  if (nbd_pwrite (nbd, wbuf, sizeof wbuf, 0, 0) == -1) {
+  /* Plain I/O */
+  if (nbd_pwrite (nbd, wbuf, sizeof wbuf, 2 * sizeof wbuf, 0) == -1) {
     fprintf (stderr, "%s\n", nbd_get_error ());
     exit (EXIT_FAILURE);
   }
-  if (nbd_pread (nbd, rbuf, sizeof rbuf, 0, 0) == -1) {
+  if (nbd_pread (nbd, rbuf, sizeof rbuf, 2 * sizeof rbuf, 0) == -1) {
     fprintf (stderr, "%s\n", nbd_get_error ());
     exit (EXIT_FAILURE);
   }
@@ -76,6 +116,34 @@ main (int argc, char *argv[])
     exit (EXIT_FAILURE);
   }
+  /* Test again for callback operation. */
+  memset (rbuf, 0, sizeof rbuf);
+  if (nbd_pread_callback (nbd, rbuf, sizeof rbuf, 2 * sizeof rbuf,
+                          &calls, pread_cb, 0) == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    exit (EXIT_FAILURE);
+  }
+
+  if (calls != 1) {
+    fprintf (stderr, "%s: callback called wrong number of times\n",
argv[0]);
+    exit (EXIT_FAILURE);
+  }
+  if (memcmp (rbuf, wbuf, sizeof rbuf) != 0) {
+    fprintf (stderr, "%s: DATA INTEGRITY ERROR!\n", argv[0]);
+    exit (EXIT_FAILURE);
+  }
+
+  /* Also test that callback errors are reflected correctly. */
+  if (nbd_pread_callback (nbd, rbuf, sizeof rbuf, 2 * sizeof rbuf,
+                          &calls, pread_cb, 0) != -1) {
+    fprintf (stderr, "%s: expected failure from callback\n",
argv[0]);
+    exit (EXIT_FAILURE);
+  }
+  if (nbd_get_errno () != EPROTO) {
+    fprintf (stderr, "%s: wrong errno value after failed callback\n",
argv[0]);
+    exit (EXIT_FAILURE);
+  }
+
   if (nbd_shutdown (nbd) == -1) {
     fprintf (stderr, "%s\n", nbd_get_error ());
     exit (EXIT_FAILURE);
-- 
2.20.1
Eric Blake
2019-Jun-18  00:07 UTC
[Libguestfs] [libnbd PATCH 8/8] states: Add DF flag support for pread
When structured replies are negotiated, the server may advertise
support for the DF flag (the server promises to return at most one
data/hole chunk, or to fail with NBD_EOVERFLOW if the chunk would be
too large). As both nbdkit and qemu-nbd support this flag (the former
only trivially, but the latter by not compressing holes over the
wire), it is worth exposing to clients, if only for testing purposes.
I chose to specifically reject the flag for plain nbd_[aio_]pread
(about all it can do is cause a read to fail that would otherwise
succeed) and only accept it for nbd_[aio_]pread_callback.
---
 generator/generator            | 22 ++++++++++++++++++----
 interop/structured-read.c      | 31 ++++++++++++++++++++++++-------
 lib/flags.c                    | 11 +++++++++++
 lib/nbd-protocol.h             |  1 +
 lib/protocol.c                 |  1 +
 lib/rw.c                       | 14 ++++++++++++--
 python/t/405-pread-callback.py |  6 ++++++
 7 files changed, 73 insertions(+), 13 deletions(-)
diff --git a/generator/generator b/generator/generator
index ce77f17..7a32570 100755
--- a/generator/generator
+++ b/generator/generator
@@ -1243,6 +1243,16 @@ Returns true if the server supports the zero command
 the server does not.";
   };
+  "can_df", {
+    default_call with
+    args = []; ret = RBool;
+    shortdesc = "does the server support the don't fragment flag to
pread?";
+    longdesc = "\
+Returns true if the server supports structured reads with an
+ability to request a non-fragmented read (see C<nbd_pread_callback>,
+C<nbd_aio_pread_callback>).  Returns false if the server does not.";
+  };
+
   "can_multi_conn", {
     default_call with
     args = []; ret = RBool;
@@ -1306,9 +1316,10 @@ at C<offset> and ending at C<offset> +
C<count> - 1.  NBD
 can only read all or nothing using this call.  The call
 returns when the data has been read fully into C<buf> or there is an
 error.  See also C<nbd_pread_callback>, if finer visibility is
-required into the server's replies.
+required into the server's replies, or if you want to use
+C<LIBNBD_CMD_FLAG_DF>.
-The C<flags> parameter must be C<0> for now (it exists for future
NBD
+The C<flags> parameter should be C<0> for now (it exists for future
NBD
 protocol extensions).";
   };
@@ -1372,8 +1383,10 @@ validate that the server obeyed the requirement that a
read call must
 not have overlapping chunks and must not succeed without enough chunks
 to cover the entire request.
-The C<flags> parameter must be C<0> for now (it exists for future
NBD
-protocol extensions).";
+The C<flags> parameter may be C<0> for no flags, or may contain
+C<LIBNBD_CMD_FLAG_DF> meaning that the server should not reply with
+more than one fragment (if that is supported - some servers cannot do
+this, see C<nbd_can_df>).";
   };
   "pwrite", {
@@ -2018,6 +2031,7 @@ let constants = [
   "CMD_FLAG_FUA",        1 lsl 0;
   "CMD_FLAG_NO_HOLE",    1 lsl 1;
+  "CMD_FLAG_DF",         1 lsl 2;
   "CMD_FLAG_REQ_ONE",    1 lsl 3;
   "READ_DATA",           1;
diff --git a/interop/structured-read.c b/interop/structured-read.c
index b740e98..0d87aa8 100644
--- a/interop/structured-read.c
+++ b/interop/structured-read.c
@@ -38,7 +38,7 @@ static const char *unixsocket;
 static char rbuf[1024];
 struct data {
-  //XXX  bool df;    /* input: true if DF flag was passed to request */
+  bool df;         /* input: true if DF flag was passed to request */
   int count;       /* input: count of expected remaining calls */
   bool fail;       /* input: true to return failure */
   bool seen_hole;  /* output: true if hole encountered */
@@ -59,15 +59,24 @@ read_cb (void *opaque, const void *bufv, size_t count,
uint64_t offset,
   switch (status) {
   case LIBNBD_READ_DATA:
-    // XXX if (df...)
-    assert (buf == rbuf + 512);
-    assert (count == 512);
-    assert (offset == 2048 + 512);
-    assert (buf[0] == 1 && memcmp (buf, buf + 1, 511) == 0);
+    if (data->df) {
+      assert (buf == rbuf);
+      assert (count == 1024);
+      assert (offset == 2048);
+      assert (buf[0] == 0 && memcmp (buf, buf + 1, 511) == 0);
+      assert (buf[512] == 1 && memcmp (buf + 512, buf + 513, 511) ==
0);
+    }
+    else {
+      assert (buf == rbuf + 512);
+      assert (count == 512);
+      assert (offset == 2048 + 512);
+      assert (buf[0] == 1 && memcmp (buf, buf + 1, 511) == 0);
+    }
     assert (!data->seen_data);
     data->seen_data = true;
     break;
   case LIBNBD_READ_HOLE:
+    assert (!data->df); /* Relies on qemu-nbd's behavior */
     assert (buf == rbuf);
     assert (count == 512);
     assert (offset == 2048);
@@ -133,7 +142,15 @@ main (int argc, char *argv[])
   }
   assert (data.seen_data && data.seen_hole);
-  // XXX Repeat with DF flag
+  /* Repeat with DF flag. */
+  memset (rbuf, 2, sizeof rbuf);
+  data = (struct data) { .df = true, .count = 1, };
+  if (nbd_pread_callback (nbd, rbuf, sizeof rbuf, 2048, &data, read_cb,
+                          LIBNBD_CMD_FLAG_DF) == -1) {
+    fprintf (stderr, "%s\n", nbd_get_error ());
+    exit (EXIT_FAILURE);
+  }
+  assert (data.seen_data && !data.seen_hole);
   /* Trigger a failed callback, to prove connection stays up. With
    * reads, all chunks trigger a callback even after failure, but the
diff --git a/lib/flags.c b/lib/flags.c
index 421a7d2..cdbc28f 100644
--- a/lib/flags.c
+++ b/lib/flags.c
@@ -42,6 +42,11 @@ nbd_internal_set_size_and_flags (struct nbd_handle *h,
     return -1;
   }
+  if (eflags & NBD_FLAG_SEND_DF && !h->structured_replies) {
+    debug (h, "server lacks structured replies, ignoring claim of
df");
+    eflags &= ~NBD_FLAG_SEND_DF;
+  }
+
   h->exportsize = exportsize;
   h->eflags = eflags;
   return 0;
@@ -95,6 +100,12 @@ nbd_unlocked_can_zero (struct nbd_handle *h)
   return get_flag (h, NBD_FLAG_SEND_WRITE_ZEROES);
 }
+int
+nbd_unlocked_can_df (struct nbd_handle *h)
+{
+  return get_flag (h, NBD_FLAG_SEND_DF);
+}
+
 int
 nbd_unlocked_can_multi_conn (struct nbd_handle *h)
 {
diff --git a/lib/nbd-protocol.h b/lib/nbd-protocol.h
index 071971e..405af3e 100644
--- a/lib/nbd-protocol.h
+++ b/lib/nbd-protocol.h
@@ -245,6 +245,7 @@ struct nbd_structured_reply_error {
 #define NBD_ENOMEM     12
 #define NBD_EINVAL     22
 #define NBD_ENOSPC     28
+#define NBD_EOVERFLOW  75
 #define NBD_ESHUTDOWN 108
 #endif /* NBD_PROTOCOL_H */
diff --git a/lib/protocol.c b/lib/protocol.c
index d3ac0b4..6087887 100644
--- a/lib/protocol.c
+++ b/lib/protocol.c
@@ -35,6 +35,7 @@ nbd_internal_errno_of_nbd_error (uint32_t error)
   case NBD_ENOMEM: return ENOMEM;
   case NBD_EINVAL: return EINVAL;
   case NBD_ENOSPC: return ENOSPC;
+  case NBD_EOVERFLOW: return EOVERFLOW;
   case NBD_ESHUTDOWN: return ESHUTDOWN;
   default: return EINVAL;
   }
diff --git a/lib/rw.c b/lib/rw.c
index 669987e..d30575b 100644
--- a/lib/rw.c
+++ b/lib/rw.c
@@ -238,6 +238,10 @@ int64_t
 nbd_unlocked_aio_pread (struct nbd_handle *h, void *buf,
                         size_t count, uint64_t offset, uint32_t flags)
 {
+  /* We could silently accept flag DF, but it really only makes sense
+   * with callbacks, because otherwise there is no observable change
+   * except that the server may fail where it would otherwise succeed.
+   */
   if (flags != 0) {
     set_error (EINVAL, "invalid flag: %" PRIu32, flags);
     return -1;
@@ -254,12 +258,18 @@ nbd_unlocked_aio_pread_callback (struct nbd_handle *h,
void *buf,
 {
   struct command_cb cb = { .opaque = opaque, .fn.read = read, };
-  if (flags != 0) {
+  if ((flags & ~LIBNBD_CMD_FLAG_DF) != 0) {
     set_error (EINVAL, "invalid flag: %" PRIu32, flags);
     return -1;
   }
-  return nbd_internal_command_common (h, 0, NBD_CMD_READ, offset, count,
+  if ((flags & LIBNBD_CMD_FLAG_DF) != 0 &&
+      nbd_unlocked_can_df (h) != 1) {
+    set_error (EINVAL, "server does not support the DF flag");
+    return -1;
+  }
+
+  return nbd_internal_command_common (h, flags, NBD_CMD_READ, offset, count,
                                       buf, &cb);
 }
diff --git a/python/t/405-pread-callback.py b/python/t/405-pread-callback.py
index 7946dac..af90765 100644
--- a/python/t/405-pread-callback.py
+++ b/python/t/405-pread-callback.py
@@ -34,3 +34,9 @@ buf = h.pread_callback (512, 0, 42, f)
 print ("%r" % buf)
 assert buf == expected
+
+buf = h.pread_callback (512, 0, 42, f, nbd.CMD_FLAG_DF)
+
+print ("%r" % buf)
+
+assert buf == expected
-- 
2.20.1
Eric Blake
2019-Jun-18  01:39 UTC
[Libguestfs] [libnbd RFC PATCH 9/8] wip: generator: Add ReadStatus enum type
Exploration of what it would take to treat the nbd_pread_callback
status parameter as a language-appropriate enum rather than a bare
integer.  While I've identified a rough set of the places to change, I
have not actually got it working with Python or OCaml bindings.
---
Posting this more for an idea of whether we want to pursue it
further. I have no hard feelings if we decide to ditch it; what's
more, compilers don't necessarily guarantee ABI stability with enum
types in public headers, so sticking to Int may be better after all.
 generator/generator | 46 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 40 insertions(+), 6 deletions(-)
diff --git a/generator/generator b/generator/generator
index 630260b..d269a04 100755
--- a/generator/generator
+++ b/generator/generator
@@ -845,6 +845,7 @@ and arg  | Int64 of string          (* 64 bit signed int *)
 | Opaque of string         (* opaque object, void* in C *)
 | Path of string           (* filename or path *)
+| ReadStatus of string     (* enum of pread_callback status *)
 | SockAddrAndLen of string * string (* struct sockaddr * + socklen_t *)
 | String of string         (* string *)
 | StringList of string     (* argv-style NULL-terminated array of strings *)
@@ -2033,10 +2034,13 @@ let constants = [
   "CMD_FLAG_NO_HOLE",    1 lsl 1;
   "CMD_FLAG_DF",         1 lsl 2;
   "CMD_FLAG_REQ_ONE",    1 lsl 3;
-
-  "READ_DATA",           1;
-  "READ_HOLE",           2;
-  "READ_ERROR",          3;
+]
+let enums = [
+  "nbd_read_status", [
+    "READ_DATA",           1;
+    "READ_HOLE",           2;
+    "READ_ERROR",          3;
+    ];
 ]
 (*----------------------------------------------------------------------*)
@@ -2812,6 +2816,7 @@ let rec name_of_arg = function
 | Int64 n -> [n]
 | Opaque n -> [n]
 | Path n -> [n]
+| ReadStatus n -> [n]
 | SockAddrAndLen (n, len) -> [n; len]
 | String n -> [n]
 | StringList n -> [n]
@@ -2845,6 +2850,7 @@ let rec print_c_arg_list ?(handle = false) args        |
Int n -> pr "int %s" n
       | Int64 n -> pr "int64_t %s" n
       | Opaque n -> pr "void *%s" n
+      | ReadStatus n -> pr "enum nbd_read_status %s" n
       | Path n
       | String n -> pr "const char *%s" n
       | StringList n -> pr "char **%s" n
@@ -2896,6 +2902,13 @@ let generate_include_libnbd_h ()    pr "\n";
   List.iter (fun (n, i) -> pr "#define LIBNBD_%-30s %d\n" n i)
constants;
   pr "\n";
+  List.iter (
+    fun (name, list) ->
+      pr "enum %s {\n" name;
+      List.iter (fun (n, i) -> pr "  LIBNBD_%-30s = %d,\n" n i)
list;
+      pr "};\n"
+    ) enums;
+  pr "\n";
   pr "extern struct nbd_handle *nbd_create (void);\n";
   pr "#define LIBNBD_HAVE_NBD_CREATE 1\n";
   pr "\n";
@@ -3367,6 +3380,7 @@ let print_python_binding name { args; ret }           |
Int _ -> ()
          | Opaque n ->
             pr "  struct %s_%s_data *_data = %s;\n" name cb_name n
+         | ReadStatus n
          | String n
          | UInt64 n -> ()
          (* The following not yet implemented for callbacks XXX *)
@@ -3384,6 +3398,7 @@ let print_python_binding name { args; ret }           |
BytesIn (n, len) -> pr " \"y#\""
          | Int n -> pr " \"i\""
          | Opaque n -> pr " \"O\""
+         | ReadStatus n -> pr " XXX1"
          | String n -> pr " \"s\""
          | UInt64 n -> pr " \"K\""
          (* The following not yet implemented for callbacks XXX *)
@@ -3398,6 +3413,7 @@ let print_python_binding name { args; ret }           |
ArrayAndLen (UInt32 n, _) -> pr ", py_%s" n
          | BytesIn (n, len) -> pr ", %s, (int) %s" n len
          | Opaque _ -> pr ", _data->data"
+         | ReadStatus n -> pr ", XXX2 %s" n
          | Int n
          | String n
          | UInt64 n -> pr ", %s" n
@@ -3436,6 +3452,7 @@ let print_python_binding name { args; ret }           |
BytesIn _
          | Int _
          | Opaque _
+         | ReadStatus _
          | String _
          | UInt64 _ -> ()
          (* The following not yet implemented for callbacks XXX *)
@@ -3497,6 +3514,7 @@ let print_python_binding name { args; ret }         pr
"  long long %s; /* really int64_t */\n" n
     | Opaque _ -> ()
     | Path n -> pr "  char *%s = NULL;\n" n
+    | ReadStatus n -> pr " XXX3 %s\n" n
     | SockAddrAndLen (n, _) ->
        pr "  /* XXX Complicated - Python uses a tuple of
different\n";
        pr "   * lengths for the different socket types.\n";
@@ -3537,6 +3555,7 @@ let print_python_binding name { args; ret }      | Int64 _
     | Opaque _
     | Path _
+    | ReadStatus _
     | SockAddrAndLen _
     | String _
     | StringList _
@@ -3561,6 +3580,7 @@ let print_python_binding name { args; ret }      | Int64 n
-> pr " \"L\""
     | Opaque _ -> pr " \"O\""
     | Path n -> pr " \"O&\""
+    | ReadStatus n -> pr " XXX4"
     | SockAddrAndLen (n, _) -> pr " \"O\""
     | String n -> pr " \"s\""
     | StringList n -> pr " \"O\""
@@ -3584,6 +3604,7 @@ let print_python_binding name { args; ret }      | Int n
-> pr ", &%s" n
     | Int64 n -> pr ", &%s" n
     | Opaque n -> pr ", &%s_data->data" (find_callback n)
+    | ReadStatus n -> pr ", XXX5 &%s" n
     | Path n -> pr ", PyUnicode_FSConverter, &%s" n
     | SockAddrAndLen (n, _) -> pr ", &%s" n
     | String n -> pr ", &%s" n
@@ -3634,6 +3655,7 @@ let print_python_binding name { args; ret }      | Int64 n
-> pr "  %s_i64 = %s;\n" n n
     | Opaque n -> ()
     | Path _ -> ()
+    | ReadStatus n -> pr " XXX6 %s\n" n
     | SockAddrAndLen _ ->
        pr "  abort (); /* XXX SockAddrAndLen not implemented */\n";
     | String _ -> ()
@@ -3662,6 +3684,7 @@ let print_python_binding name { args; ret }      | Int64 n
-> pr ", %s_i64" n
     | Opaque n -> pr ", %s_data" (find_callback n)
     | Path n -> pr ", %s" n
+    | ReadStatus n -> pr ", XXX8 %s" n
     | SockAddrAndLen (n, _) -> pr ", /* XXX */ (void *) %s, 0" n
     | String n -> pr ", %s" n
     | StringList n -> pr ", %s" n
@@ -3696,6 +3719,7 @@ let print_python_binding name { args; ret }      | Int64 _
     | Opaque _
     | Path _
+    | ReadStatus _
     | SockAddrAndLen _
     | String _
     | StringList _
@@ -3741,6 +3765,7 @@ let print_python_binding name { args; ret }      | Int64 _
-> ()
     | Opaque _ -> ()
     | Path n -> pr "  free (%s);\n" n
+    | ReadStatus _ -> ()
     | SockAddrAndLen _ -> ()
     | String n -> ()
     | StringList n -> pr "  nbd_internal_py_free_string_list
(%s);\n" n
@@ -3795,6 +3820,7 @@ import libnbdmod
 ";
   List.iter (fun (n, i) -> pr "%-30s = %d\n" n i) constants;
+  (* how to represent enums? *)
   pr "\
@@ -3839,6 +3865,7 @@ class NBD (object):
                      | Int64 n -> n, None
                      | Opaque n -> n, None
                      | Path n -> n, None
+                     | ReadStatus n -> n, None
                      | SockAddrAndLen (n, _) -> n, None
                      | String n -> n, None
                      | StringList n -> n, None
@@ -3929,6 +3956,7 @@ and ocaml_arg_to_string = function
   | OCamlArg (Int64 _) -> "int64"
   | OCamlArg (Opaque n) -> sprintf "'%s" n
   | OCamlArg (Path _) -> "string"
+  | OCamlArg (ReadStatus _) -> "int"
   | OCamlArg (SockAddrAndLen _) -> "string" (* XXX not impl *)
   | OCamlArg (String _) -> "string"
   | OCamlArg (StringList _) -> "string list"
@@ -3978,6 +4006,7 @@ exception Closed of string
   List.iter (
     fun (n, _) -> pr "val %s : int32\n" (String.lowercase_ascii n)
   ) constants;
+  (* how to represent enums? *)
   pr "\n";
   pr "\
@@ -4040,6 +4069,7 @@ let ()    List.iter (
     fun (n, i) -> pr "let %s = %d_l\n" (String.lowercase_ascii n)
i
   ) constants;
+  (* how to represent enums? *)
   pr "\n";
   pr "\
@@ -4092,7 +4122,7 @@ let print_ocaml_binding (name, { args; ret })          
List.map (
            function
            | ArrayAndLen (UInt32 n, _) | BytesIn (n, _)
-           | Int n | Opaque n | String n | UInt64 n ->
+           | Int n | Opaque n | ReadStatus n | String n | UInt64 n ->
               n ^ "v"
            (* The following not yet implemented for callbacks XXX *)
            | ArrayAndLen _ | Bool _ | BytesOut _
@@ -4124,7 +4154,8 @@ let print_ocaml_binding (name, { args; ret })           |
BytesIn (n, len) ->
             pr "  %sv = caml_alloc_string (%s);\n" n len;
             pr "  memcpy (String_val (%sv), %s, %s);\n" n n len
-         | Int n ->
+         | Int n
+         | ReadStatus n ->
             pr "  %sv = caml_copy_int32 (%s);\n" n n
          | String n ->
             pr "  %sv = caml_copy_string (%s);\n" n n
@@ -4282,6 +4313,8 @@ let print_ocaml_binding (name, { args; ret })         )
     | OCamlArg (Path n) | OCamlArg (String n) ->
        pr "  const char *%s = String_val (%sv);\n" n n
+    | OCamlArg (ReadStatus n) ->
+       pr "  unsigned %s = Int_val (%sv);\n" n n
     | OCamlArg (SockAddrAndLen (n, len)) ->
        pr "  const struct sockaddr *%s;\n" n;
        pr "  socklen_t %s;\n" len;
@@ -4363,6 +4396,7 @@ let print_ocaml_binding (name, { args; ret })      |
OCamlArg (Int64 _)
     | OCamlArg (Opaque _)
     | OCamlArg (Path _)
+    | OCamlArg (ReadStatus _)
     | OCamlArg (String _)
     | OCamlArg (SockAddrAndLen _)
     | OCamlArg (UInt _)
-- 
2.20.1
Eric Blake
2019-Jun-18  03:03 UTC
Re: [Libguestfs] [libnbd PATCH 0/8] Add nbd_pread_callback
On 6/17/19 7:07 PM, Eric Blake wrote:> I've mentioned this topic before (in fact, the idea of adding > NBD_CMD_FLAG_DF was first mentioned at [1]), but finally finished > enough of an implementation to feel confident in posting it. > > I'd still like to add something under examples/ that uses the new API > to implement strict checking of a server's structured replies read > implementation (ensure that a server never sends data after an error > at the same offset, that it does not send overlapping data, and that > it does not report success unless all of the range is covered).For wrapping the synchronous API, this seems simple enough: int wrapped_nbd_pread(struct nbd_handle *h, void *buf, size_t count, uint64_t offset, uint32_t flags) { int r; struct data *data = prep (buf, count, offset, flags); r = nbd_pread_callback (h, buf, count, offset, data, check, flags); return cleanup (data, r); } where prep() mallocs an initial structure to record the entire range of [buf, buf+count) as not yet seen (probably by tracking a sorted linked list of consecutive ranges initially with a single list element), each call to the check() callback updates the list (as long as the callback covers only bytes that were previously unseen, the list is adjusted to remove those bytes, possible by adding or coalescing elements of the linked list if replies come out of order; if the callback includes any range already seen, then it returns -1), then cleanup() checks that the linked list was properly emptied as a final chance to convert success into an error if the server was buggy before cleaning up resources. But for wrapping asynchronous API, I'm wishing that we had a way to associate an arbitrary callback function to be called at the conclusion of any given command. The existing nbd_aio_command_completed() does not provide an easy hook for that. We could either have a single one-shot registration that gets called for all completions (or all completions of a given command type) by storing the callback in struct nbd_handle, as in: nbd_set_read_completion_hook (struct nbd_handle *h, void *opaque, int (*hook) (void *opaque, int64_t handle, int status)); Register a completion hook callback to be called on every command, right before the command will be available in nbd_aio_command_completed(). The hook is passed the command's handle obtained from nbd_aio_pread or similar, as well the status of the command so far (0 if successful, or a positive errno value); if the callback returns -1, the command will fail even if status had been 0. Or we can make the hook a per-handle choice, by adding counterpart aio APIs and storing the callback in struct command_in_flight: nbd_aio_pread_with_notify (struct nbd_handle *h, void *buf, size_t count, uint64_t offset, void *opaque, int (*notify) (void *opaque, int64_t handle, int status), uint32_t flags) where notify() is called for that specific command just before it completes. There's a small question of what to do with APIs that already take a callback (think nbd_aio_pread_callback - do we allow the user two separate opaque arguments, one for the per-chunk callback and one for the final cleanup callback, or do we require the user to tolerate the same callback in both calls?) If we really wanted, we could even copy POSIX aio and take a struct sigevent* parameter with each aio command (the existing nbd_aio_pread would be shorthand for calling a new nbd_aio_pread_event where struct sigevent is initialized to SIGEV_NONE). But that starts to feel a bit complex (SIGEV_SIGNAL may be better if we just tell the user to call pthread_signal from their own function callback). Thoughts? -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Richard W.M. Jones
2019-Jun-20  05:44 UTC
Re: [Libguestfs] [libnbd PATCH 1/8] states: Add state for structured reply completion
On Mon, Jun 17, 2019 at 07:07:51PM -0500, Eric Blake wrote:> Rather than repeating the logic for checking flags at multiple sites, > stick it in a new state.This is an obvious refactoring, ACK. Rich.> generator/generator | 7 +++++ > generator/states-reply-structured.c | 43 ++++++++++------------------- > 2 files changed, 22 insertions(+), 28 deletions(-) > > diff --git a/generator/generator b/generator/generator > index a289741..4c81859 100755 > --- a/generator/generator > +++ b/generator/generator > @@ -798,6 +798,13 @@ and structured_reply_state_machine = [ > comment = "Receive a structured reply block-status payload"; > external_events = [ NotifyRead, "" ]; > }; > + > + State { > + default_state with > + name = "FINISH"; > + comment = "Finish receiving a structured reply"; > + external_events = []; > + }; > ] > > (*----------------------------------------------------------------------*) > diff --git a/generator/states-reply-structured.c b/generator/states-reply-structured.c > index 6337dad..2125e41 100644 > --- a/generator/states-reply-structured.c > +++ b/generator/states-reply-structured.c > @@ -103,7 +103,7 @@ > set_error (0, "NBD_REPLY_FLAG_DONE must be set in NBD_REPLY_TYPE_NONE"); > return -1; > } > - SET_NEXT_STATE (%^FINISH_COMMAND); > + SET_NEXT_STATE (%FINISH); > return 0; > } > else if (type == NBD_REPLY_TYPE_OFFSET_DATA) { > @@ -225,7 +225,6 @@ > > REPLY.STRUCTURED_REPLY.RECV_ERROR_TAIL: > struct command_in_flight *cmd; > - uint16_t flags; > uint64_t handle; > uint32_t error; > uint64_t offset; > @@ -234,7 +233,6 @@ > switch (recv_into_rbuf (h)) { > case -1: SET_NEXT_STATE (%.DEAD); return -1; > case 0: > - flags = be16toh (h->sbuf.sr.structured_reply.flags); > handle = be64toh (h->sbuf.sr.structured_reply.handle); > error = be32toh (h->sbuf.sr.payload.error.error.error); > type = be16toh (h->sbuf.sr.structured_reply.type); > @@ -264,10 +262,7 @@ > if (cmd->error == 0) > cmd->error = nbd_internal_errno_of_nbd_error (error); > > - if (flags & NBD_REPLY_FLAG_DONE) > - SET_NEXT_STATE (%^FINISH_COMMAND); > - else > - SET_NEXT_STATE (%.READY); > + SET_NEXT_STATE(%FINISH); > } > return 0; > > @@ -334,24 +329,15 @@ > return 0; > > REPLY.STRUCTURED_REPLY.RECV_OFFSET_DATA_DATA: > - uint16_t flags; > - > switch (recv_into_rbuf (h)) { > case -1: SET_NEXT_STATE (%.DEAD); return -1; > - case 0: > - flags = be16toh (h->sbuf.sr.structured_reply.flags); > - > - if (flags & NBD_REPLY_FLAG_DONE) > - SET_NEXT_STATE (%^FINISH_COMMAND); > - else > - SET_NEXT_STATE (%.READY); > + case 0: SET_NEXT_STATE (%FINISH); > } > return 0; > > REPLY.STRUCTURED_REPLY.RECV_OFFSET_HOLE: > struct command_in_flight *cmd; > uint64_t handle; > - uint16_t flags; > uint64_t offset; > uint32_t length; > > @@ -359,7 +345,6 @@ > case -1: SET_NEXT_STATE (%.DEAD); return -1; > case 0: > handle = be64toh (h->sbuf.sr.structured_reply.handle); > - flags = be16toh (h->sbuf.sr.structured_reply.flags); > offset = be64toh (h->sbuf.sr.payload.offset_hole.offset); > length = be32toh (h->sbuf.sr.payload.offset_hole.length); > > @@ -404,17 +389,13 @@ > > memset (cmd->data + offset, 0, length); > > - if (flags & NBD_REPLY_FLAG_DONE) > - SET_NEXT_STATE (%^FINISH_COMMAND); > - else > - SET_NEXT_STATE (%.READY); > + SET_NEXT_STATE(%FINISH); > } > return 0; > > REPLY.STRUCTURED_REPLY.RECV_BS_ENTRIES: > struct command_in_flight *cmd; > uint64_t handle; > - uint16_t flags; > uint32_t length; > size_t i; > uint32_t context_id; > @@ -424,7 +405,6 @@ > case -1: SET_NEXT_STATE (%.DEAD); return -1; > case 0: > handle = be64toh (h->sbuf.sr.structured_reply.handle); > - flags = be16toh (h->sbuf.sr.structured_reply.flags); > length = be32toh (h->sbuf.sr.structured_reply.length); > > /* Find the command amongst the commands in flight. */ > @@ -470,11 +450,18 @@ > context_id); > } > > - if (flags & NBD_REPLY_FLAG_DONE) > - SET_NEXT_STATE (%^FINISH_COMMAND); > - else > - SET_NEXT_STATE (%.READY); > + SET_NEXT_STATE(%FINISH); > } > return 0; > > + REPLY.STRUCTURED_REPLY.FINISH: > + uint16_t flags; > + > + flags = be16toh (h->sbuf.sr.structured_reply.flags); > + if (flags & NBD_REPLY_FLAG_DONE) > + SET_NEXT_STATE (%^FINISH_COMMAND); > + else > + SET_NEXT_STATE (%.READY); > + return 0; > + > } /* END STATE MACHINE */ > -- > 2.20.1 > > _______________________________________________ > Libguestfs mailing list > Libguestfs@redhat.com > https://www.redhat.com/mailman/listinfo/libguestfs-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com virt-df lists disk usage of guests without needing to install any software inside the virtual machine. Supports Linux and Windows. http://people.redhat.com/~rjones/virt-df/
Richard W.M. Jones
2019-Jun-20  05:46 UTC
Re: [Libguestfs] [libnbd PATCH 2/8] states: Consolidate search for current reply's command
On Mon, Jun 17, 2019 at 07:07:52PM -0500, Eric Blake wrote:> No need to have each state recompute which reply is current. This also > consolidates the logic when a reply has an unexpected handle - > previously, we failed for structured (even though the length makes it > easy to recover) and passed for simple (even though there is nothing > on the wire to state if this response is associated with NBD_CMD_READ > if we did not negotiate structured replies, which would allow our > state machine to start parsing arbitrary data as new responses); we're > better off just always moving to DEAD.Pretty simple and obvious optimization, ACK. Rich.> generator/states-reply-simple.c | 15 ++----- > generator/states-reply-structured.c | 61 ++++++----------------------- > generator/states-reply.c | 31 ++++++++++++++- > lib/internal.h | 2 + > 4 files changed, 45 insertions(+), 64 deletions(-) > > diff --git a/generator/states-reply-simple.c b/generator/states-reply-simple.c > index 7e5340c..12536e0 100644 > --- a/generator/states-reply-simple.c > +++ b/generator/states-reply-simple.c > @@ -20,24 +20,15 @@ > > /* STATE MACHINE */ { > REPLY.SIMPLE_REPLY.START: > - struct command_in_flight *cmd; > + struct command_in_flight *cmd = h->reply_cmd; > uint32_t error; > uint64_t handle; > > error = be32toh (h->sbuf.simple_reply.error); > handle = be64toh (h->sbuf.simple_reply.handle); > > - /* Find the command amongst the commands in flight. */ > - for (cmd = h->cmds_in_flight; cmd != NULL; cmd = cmd->next) { > - if (cmd->handle == handle) > - break; > - } > - if (cmd == NULL) { > - SET_NEXT_STATE (%.READY); > - set_error (0, "no matching handle found for server reply, " > - "this is probably a bug in the server"); > - return -1; > - } > + assert (cmd); > + assert (cmd->handle == handle); > > if (cmd->type == NBD_CMD_READ && h->structured_replies) { > set_error (0, "server sent unexpected simple reply for read"); > diff --git a/generator/states-reply-structured.c b/generator/states-reply-structured.c > index 2125e41..9bb165b 100644 > --- a/generator/states-reply-structured.c > +++ b/generator/states-reply-structured.c > @@ -43,7 +43,7 @@ > return 0; > > REPLY.STRUCTURED_REPLY.CHECK: > - struct command_in_flight *cmd; > + struct command_in_flight *cmd = h->reply_cmd; > uint16_t flags, type; > uint64_t handle; > uint32_t length; > @@ -53,20 +53,8 @@ > handle = be64toh (h->sbuf.sr.structured_reply.handle); > length = be32toh (h->sbuf.sr.structured_reply.length); > > - /* Find the command amongst the commands in flight. */ > - for (cmd = h->cmds_in_flight; cmd != NULL; cmd = cmd->next) { > - if (cmd->handle == handle) > - break; > - } > - if (cmd == NULL) { > - /* Unlike for simple replies, this is difficult to recover from. We > - * would need an extra state to read and ignore length bytes. XXX > - */ > - SET_NEXT_STATE (%.DEAD); > - set_error (0, "no matching handle found for server reply, " > - "this is probably a bug in the server"); > - return -1; > - } > + assert (cmd); > + assert (cmd->handle == handle); > > /* Reject a server that replies with too much information, but don't > * reject a single structured reply to NBD_CMD_READ on the largest > @@ -224,8 +212,7 @@ > return 0; > > REPLY.STRUCTURED_REPLY.RECV_ERROR_TAIL: > - struct command_in_flight *cmd; > - uint64_t handle; > + struct command_in_flight *cmd = h->reply_cmd; > uint32_t error; > uint64_t offset; > uint16_t type; > @@ -233,15 +220,9 @@ > switch (recv_into_rbuf (h)) { > case -1: SET_NEXT_STATE (%.DEAD); return -1; > case 0: > - handle = be64toh (h->sbuf.sr.structured_reply.handle); > error = be32toh (h->sbuf.sr.payload.error.error.error); > type = be16toh (h->sbuf.sr.structured_reply.type); > > - /* Find the command amongst the commands in flight. */ > - for (cmd = h->cmds_in_flight; cmd != NULL; cmd = cmd->next) { > - if (cmd->handle == handle) > - break; > - } > assert (cmd); /* guaranteed by CHECK */ > > /* Sanity check that any error offset is in range */ > @@ -267,23 +248,16 @@ > return 0; > > REPLY.STRUCTURED_REPLY.RECV_OFFSET_DATA: > - struct command_in_flight *cmd; > - uint64_t handle; > + struct command_in_flight *cmd = h->reply_cmd; > uint64_t offset; > uint32_t length; > > switch (recv_into_rbuf (h)) { > case -1: SET_NEXT_STATE (%.DEAD); return -1; > case 0: > - handle = be64toh (h->sbuf.sr.structured_reply.handle); > length = be32toh (h->sbuf.sr.structured_reply.length); > offset = be64toh (h->sbuf.sr.payload.offset_data.offset); > > - /* Find the command amongst the commands in flight. */ > - for (cmd = h->cmds_in_flight; cmd != NULL; cmd = cmd->next) { > - if (cmd->handle == handle) > - break; > - } > assert (cmd); /* guaranteed by CHECK */ > > if (cmd->type != NBD_CMD_READ) { > @@ -336,23 +310,16 @@ > return 0; > > REPLY.STRUCTURED_REPLY.RECV_OFFSET_HOLE: > - struct command_in_flight *cmd; > - uint64_t handle; > + struct command_in_flight *cmd = h->reply_cmd; > uint64_t offset; > uint32_t length; > > switch (recv_into_rbuf (h)) { > case -1: SET_NEXT_STATE (%.DEAD); return -1; > case 0: > - handle = be64toh (h->sbuf.sr.structured_reply.handle); > offset = be64toh (h->sbuf.sr.payload.offset_hole.offset); > length = be32toh (h->sbuf.sr.payload.offset_hole.length); > > - /* Find the command amongst the commands in flight. */ > - for (cmd = h->cmds_in_flight; cmd != NULL; cmd = cmd->next) { > - if (cmd->handle == handle) > - break; > - } > assert (cmd); /* guaranteed by CHECK */ > > if (cmd->type != NBD_CMD_READ) { > @@ -394,8 +361,7 @@ > return 0; > > REPLY.STRUCTURED_REPLY.RECV_BS_ENTRIES: > - struct command_in_flight *cmd; > - uint64_t handle; > + struct command_in_flight *cmd = h->reply_cmd; > uint32_t length; > size_t i; > uint32_t context_id; > @@ -404,16 +370,9 @@ > switch (recv_into_rbuf (h)) { > case -1: SET_NEXT_STATE (%.DEAD); return -1; > case 0: > - handle = be64toh (h->sbuf.sr.structured_reply.handle); > length = be32toh (h->sbuf.sr.structured_reply.length); > > - /* Find the command amongst the commands in flight. */ > - for (cmd = h->cmds_in_flight; cmd != NULL; cmd = cmd->next) { > - if (cmd->handle == handle) > - break; > - } > - /* guaranteed by CHECK */ > - assert (cmd); > + assert (cmd); /* guaranteed by CHECK */ > assert (cmd->extent_fn); > assert (h->bs_entries); > assert (length >= 12); > @@ -460,8 +419,10 @@ > flags = be16toh (h->sbuf.sr.structured_reply.flags); > if (flags & NBD_REPLY_FLAG_DONE) > SET_NEXT_STATE (%^FINISH_COMMAND); > - else > + else { > + h->reply_cmd = NULL; > SET_NEXT_STATE (%.READY); > + } > return 0; > > } /* END STATE MACHINE */ > diff --git a/generator/states-reply.c b/generator/states-reply.c > index f0ef47c..54f98c5 100644 > --- a/generator/states-reply.c > +++ b/generator/states-reply.c > @@ -36,6 +36,8 @@ > h->rbuf = &h->sbuf; > h->rlen = sizeof h->sbuf.simple_reply; > > + assert (h->reply_cmd == NULL); > + > r = h->sock->ops->recv (h, h->sock, h->rbuf, h->rlen); > if (r == -1) { > /* This should never happen because when we enter this state we > @@ -69,16 +71,16 @@ > return 0; > > REPLY.CHECK_SIMPLE_OR_STRUCTURED_REPLY: > + struct command_in_flight *cmd; > uint32_t magic; > + uint64_t handle; > > magic = be32toh (h->sbuf.simple_reply.magic); > if (magic == NBD_SIMPLE_REPLY_MAGIC) { > SET_NEXT_STATE (%SIMPLE_REPLY.START); > - return 0; > } > else if (magic == NBD_STRUCTURED_REPLY_MAGIC) { > SET_NEXT_STATE (%STRUCTURED_REPLY.START); > - return 0; > } > else { > SET_NEXT_STATE (%.DEAD); /* We've probably lost synchronization. */ > @@ -86,6 +88,29 @@ > return -1; > } > > + /* NB: This works for both simple and structured replies because the > + * handle is stored at the same offset. > + */ > + handle = be64toh (h->sbuf.simple_reply.handle); > + /* Find the command amongst the commands in flight. */ > + for (cmd = h->cmds_in_flight; cmd != NULL; cmd = cmd->next) { > + if (cmd->handle == handle) > + break; > + } > + if (cmd == NULL) { > + /* An unexpected structured reply could be skipped, since it > + * includes a length; similarly an unexpected simple reply can be > + * skipped if we assume it was not a read. However, it's more > + * likely we've lost synchronization with the server. > + */ > + SET_NEXT_STATE (%.DEAD); > + set_error (0, "no matching handle found for server reply, " > + "this is probably a bug in the server"); > + return -1; > + } > + h->reply_cmd = cmd; > + return 0; > + > REPLY.FINISH_COMMAND: > struct command_in_flight *prev_cmd, *cmd; > uint64_t handle; > @@ -102,6 +127,8 @@ > break; > } > assert (cmd != NULL); > + assert (h->reply_cmd == cmd); > + h->reply_cmd = NULL; > > /* Move it to the end of the cmds_done list. */ > if (prev_cmd != NULL) > diff --git a/lib/internal.h b/lib/internal.h > index 9404d65..6fde06c 100644 > --- a/lib/internal.h > +++ b/lib/internal.h > @@ -189,6 +189,8 @@ struct nbd_handle { > * acknowledge them. > */ > struct command_in_flight *cmds_to_issue, *cmds_in_flight, *cmds_done; > + /* Current command during a REPLY cycle */ > + struct command_in_flight *reply_cmd; > }; > > struct meta_context { > -- > 2.20.1 > > _______________________________________________ > Libguestfs mailing list > Libguestfs@redhat.com > https://www.redhat.com/mailman/listinfo/libguestfs-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com Fedora Windows cross-compiler. Compile Windows programs, test, and build Windows installers. Over 100 libraries supported. http://fedoraproject.org/wiki/MinGW
Richard W.M. Jones
2019-Jun-20  05:49 UTC
Re: [Libguestfs] [libnbd PATCH 3/8] pread: Reject server SR read response with no data chunks
On Mon, Jun 17, 2019 at 07:07:53PM -0500, Eric Blake wrote:> The NBD spec requires that a server doing structured reads must not > succeed unless it covers the entire buffer with reply chunks. In the > general case, this requires a lot of bookkeeping to check whether > offsets were non-overlapping and sufficient, and we'd rather defer > such checking to an optional callback function. But a callback > function will only be reached per chunk, while we still want to fail > the overall read if the callback function was never called because the > server erroneously replied with NBD_REPLY_TYPE_NONE with no other > chunks instead of an expected NBD_REPLY_TYPE_ERROR*. For this specific > error case, the bookkeeping is much simpler - we merely track if we've > seen at least one data chunk.I guess the implicit assumption here is that count > 0, which IIRC the NBD protocol demands. It seems like we don't actually check this in nbd_internal_command_common so would it be worth adding a check there? In any case this patch on its own looks good, so ACK. Rich.> generator/states-reply-simple.c | 1 + > generator/states-reply-structured.c | 2 ++ > lib/aio.c | 2 ++ > lib/internal.h | 1 + > 4 files changed, 6 insertions(+) > > diff --git a/generator/states-reply-simple.c b/generator/states-reply-simple.c > index 12536e0..935f6d2 100644 > --- a/generator/states-reply-simple.c > +++ b/generator/states-reply-simple.c > @@ -40,6 +40,7 @@ > if (cmd->error == 0 && cmd->type == NBD_CMD_READ) { > h->rbuf = cmd->data; > h->rlen = cmd->count; > + cmd->data_seen = true; > SET_NEXT_STATE (%RECV_READ_PAYLOAD); > } > else { > diff --git a/generator/states-reply-structured.c b/generator/states-reply-structured.c > index 9bb165b..6740400 100644 > --- a/generator/states-reply-structured.c > +++ b/generator/states-reply-structured.c > @@ -269,6 +269,7 @@ > return -1; > } > assert (cmd->data); > + cmd->data_seen = true; > > /* Length of the data following. */ > length -= 8; > @@ -331,6 +332,7 @@ > return -1; > } > assert (cmd->data); > + cmd->data_seen = true; > > /* Is the data within bounds? */ > if (offset < cmd->offset) { > diff --git a/lib/aio.c b/lib/aio.c > index 52e8892..7fb0fdf 100644 > --- a/lib/aio.c > +++ b/lib/aio.c > @@ -73,6 +73,8 @@ nbd_unlocked_aio_command_completed (struct nbd_handle *h, > > type = cmd->type; > error = cmd->error; > + if (type == NBD_CMD_READ && !cmd->data_seen && !error) > + error = EIO; > > /* Retire it from the list and free it. */ > if (prev_cmd != NULL) > diff --git a/lib/internal.h b/lib/internal.h > index 6fde06c..1f8f789 100644 > --- a/lib/internal.h > +++ b/lib/internal.h > @@ -242,6 +242,7 @@ struct command_in_flight { > uint32_t count; > void *data; /* Buffer for read/write, opaque for block status */ > extent_fn extent_fn; > + bool data_seen; /* For read, true if at least one data chunk seen */ > uint32_t error; /* Local errno value */ > }; > > -- > 2.20.1 > > _______________________________________________ > Libguestfs mailing list > Libguestfs@redhat.com > https://www.redhat.com/mailman/listinfo/libguestfs-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com virt-p2v converts physical machines to virtual machines. Boot with a live CD or over the network (PXE) and turn machines into KVM guests. http://libguestfs.org/virt-v2v
Richard W.M. Jones
2019-Jun-20  08:29 UTC
Re: [Libguestfs] [libnbd PATCH 4/8] states: Prepare for read callback
On Mon, Jun 17, 2019 at 07:07:54PM -0500, Eric Blake wrote:> The next patch will add a new 'nbd_aio_pread_callback' function for > allowing the user more immediate access as each chunk of a structured > reply read is received. But before we do that, let's refactor the > command code. This includes a revert of 12843a1a, since the read > callback will need both a buffer and the user's opaque object at the > same time.This patch on its own is fine, although of course if you want to push it upstream now you'll need to adjust the commit message. ACK Rich.> generator/states-reply-structured.c | 8 ++++---- > lib/internal.h | 17 +++++++++++++---- > lib/rw.c | 9 ++++++--- > 3 files changed, 23 insertions(+), 11 deletions(-) > > diff --git a/generator/states-reply-structured.c b/generator/states-reply-structured.c > index 6740400..657106e 100644 > --- a/generator/states-reply-structured.c > +++ b/generator/states-reply-structured.c > @@ -123,7 +123,7 @@ > set_error (0, "invalid length in NBD_REPLY_TYPE_BLOCK_STATUS"); > return -1; > } > - if (cmd->extent_fn == NULL) { > + if (cmd->cb.fn.extent == NULL) { > SET_NEXT_STATE (%.DEAD); > set_error (0, "not expecting NBD_REPLY_TYPE_BLOCK_STATUS here"); > return -1; > @@ -375,7 +375,7 @@ > length = be32toh (h->sbuf.sr.structured_reply.length); > > assert (cmd); /* guaranteed by CHECK */ > - assert (cmd->extent_fn); > + assert (cmd->cb.fn.extent); > assert (h->bs_entries); > assert (length >= 12); > > @@ -401,8 +401,8 @@ > if (meta_context) { > /* Call the caller's extent function. */ > errno = 0; > - if (cmd->extent_fn (cmd->data, meta_context->name, cmd->offset, > - &h->bs_entries[1], (length-4) / 4) == -1) > + if (cmd->cb.fn.extent (cmd->cb.opaque, meta_context->name, cmd->offset, > + &h->bs_entries[1], (length-4) / 4) == -1) > cmd->error = errno ? errno : EPROTO; > } > else > diff --git a/lib/internal.h b/lib/internal.h > index 1f8f789..cb0e170 100644 > --- a/lib/internal.h > +++ b/lib/internal.h > @@ -231,7 +231,16 @@ struct socket { > const struct socket_ops *ops; > }; > > -typedef int (*extent_fn) (void *data, const char *metacontext, uint64_t offset, uint32_t *entries, size_t nr_entries); > +typedef int (*extent_fn) (void *data, const char *metacontext, uint64_t offset, > + uint32_t *entries, size_t nr_entries); > + > +struct command_cb { > + void *opaque; > + union { > + extent_fn extent; > + /* More to come */ > + } fn; > +}; > > struct command_in_flight { > struct command_in_flight *next; > @@ -240,8 +249,8 @@ struct command_in_flight { > uint64_t handle; > uint64_t offset; > uint32_t count; > - void *data; /* Buffer for read/write, opaque for block status */ > - extent_fn extent_fn; > + void *data; /* Buffer for read/write */ > + struct command_cb cb; > bool data_seen; /* For read, true if at least one data chunk seen */ > uint32_t error; /* Local errno value */ > }; > @@ -300,7 +309,7 @@ extern const char *nbd_internal_name_of_nbd_cmd (uint16_t type); > extern int64_t nbd_internal_command_common (struct nbd_handle *h, > uint16_t flags, uint16_t type, > uint64_t offset, uint64_t count, > - void *data, extent_fn extent); > + void *data, struct command_cb *cb); > > /* socket.c */ > struct socket *nbd_internal_socket_create (int fd); > diff --git a/lib/rw.c b/lib/rw.c > index ad9c8a0..dc81c57 100644 > --- a/lib/rw.c > +++ b/lib/rw.c > @@ -143,7 +143,7 @@ int64_t > nbd_internal_command_common (struct nbd_handle *h, > uint16_t flags, uint16_t type, > uint64_t offset, uint64_t count, void *data, > - extent_fn extent) > + struct command_cb *cb) > { > struct command_in_flight *cmd, *prev_cmd; > > @@ -183,7 +183,8 @@ nbd_internal_command_common (struct nbd_handle *h, > cmd->offset = offset; > cmd->count = count; > cmd->data = data; > - cmd->extent_fn = extent; > + if (cb) > + cmd->cb = *cb; > > /* If structured replies were negotiated then we trust the server to > * send back sufficient data to cover the whole buffer. It's tricky > @@ -360,6 +361,8 @@ nbd_unlocked_aio_block_status (struct nbd_handle *h, > void *data, extent_fn extent, > uint32_t flags) > { > + struct command_cb cb = { .opaque = data, .fn.extent = extent, }; > + > if (!h->structured_replies) { > set_error (ENOTSUP, "server does not support structured replies"); > return -1; > @@ -383,5 +386,5 @@ nbd_unlocked_aio_block_status (struct nbd_handle *h, > } > > return nbd_internal_command_common (h, flags, NBD_CMD_BLOCK_STATUS, offset, > - count, data, extent); > + count, NULL, &cb); > } > -- > 2.20.1 > > _______________________________________________ > Libguestfs mailing list > Libguestfs@redhat.com > https://www.redhat.com/mailman/listinfo/libguestfs-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com virt-top is 'top' for virtual machines. Tiny program with many powerful monitoring features, net stats, disk stats, logging, etc. http://people.redhat.com/~rjones/virt-top
Richard W.M. Jones
2019-Jun-20  08:34 UTC
Re: [Libguestfs] [libnbd PATCH 5/8] states: Wire in a read callback
On Mon, Jun 17, 2019 at 07:07:55PM -0500, Eric Blake wrote:> + errno = nbd_internal_errno_of_nbd_error (error); > + set_error (errno, "server reported read failure at offset 0x%" PRIx64, > + offset); > + if (cmd->cb.fn.read (cmd->cb.opaque, cmd->data + (offset - cmd->offset), > + 0, offset, LIBNBD_READ_ERROR) == -1)I understand that in the next commit the pread_callback callback function is meant to read errno implicitly, and this is why you are setting it before calling that callback here. However I don't understand why you didn't just pass this in as an extra (ie. explicit) parameter? As currently defined this will not work well in non-C languages, because the generator and indeed those languages know nothing about errno. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com libguestfs lets you edit virtual machines. Supports shell scripting, bindings from many languages. http://libguestfs.org
Richard W.M. Jones
2019-Jun-20  08:43 UTC
Re: [Libguestfs] [libnbd PATCH 6/8] states: Add nbd_pread_callback API
On Mon, Jun 17, 2019 at 07:07:56PM -0500, Eric Blake wrote:> diff --git a/generator/generator b/generator/generator > index 2614689..ce77f17 100755 > --- a/generator/generator > +++ b/generator/generator > @@ -1305,7 +1305,72 @@ Issue a read command to the NBD server for the range starting > at C<offset> and ending at C<offset> + C<count> - 1. NBD > can only read all or nothing using this call. The call > returns when the data has been read fully into C<buf> or there is an > -error. > +error. See also C<nbd_pread_callback>, if finer visibility is > +required into the server's replies. > + > +The C<flags> parameter must be C<0> for now (it exists for future NBD > +protocol extensions)."; > + }; > + > + "pread_callback", { > + default_call with > + args = [ BytesOut ("buf", "count"); UInt64 "offset"; > + Opaque "data"; > + Callback ("chunk", [Opaque "data"; > + BytesIn ("buf", "count"); UInt64 "offset"; > + Int "status";]); > + Flags "flags" ]; > + ret = RErr; > + permitted_states = [ Connected ]; > + shortdesc = "read from the NBD server"; > + longdesc = "\As I mentioned re the previous commit, I think the implicit errno is problematic, and an explicit int parameter would be preferable. I think I also have bikeshedding concerns about the name of this function. Do you have any better suggestions, such as 'pread_structured', 'structured_pread', 'pread_discontinuous', ...? Given that, the overall idea looks sound to me.> +of C<buf> within the original buffer). The C<status> parameter is > +one of > + > +=over 4 > + > +=item C<LIBNBD_READ_DATA> = 1 > + > +C<buf> was populated with C<count> bytes of data. > + > +=item C<LIBNBD_READ_HOLE> = 2 > + > +C<buf> represents a hole, and contains C<count> NUL bytes. > + > +=item C<LIBNBD_READ_ERR> = 3 > + > +C<count> is 0, and C<buf> represents the offset of where an error > +occurred. The error is visible in C<errno> or by calling > +C<nbd_get_errno>. > + > +=backI guess we anticipate that the status field might contain other values in future? I think we should say so either way so that callees can be aware of what they need to do if passed an unknown value. The changes which add Int support for callbacks looks like they ought to go into a separate patch (will at least make them easier to review):> @@ -3264,7 +3349,8 @@ let print_python_binding name { args; ret } > pr " PyObject *py_%s = PyList_New (%s);\n" n len; > pr " for (size_t i = 0; i < %s; ++i)\n" len; > pr " PyList_SET_ITEM (py_%s, i, PyLong_FromUnsignedLong (%s[i]));\n" n n > - | BytesIn _ -> () > + | BytesIn _ > + | Int _ -> () > | Opaque n -> > pr " struct %s_%s_data *_data = %s;\n" name cb_name n > | String n > @@ -3272,7 +3358,7 @@ let print_python_binding name { args; ret } > (* The following not yet implemented for callbacks XXX *) > | ArrayAndLen _ | Bool _ | BytesOut _ > | BytesPersistIn _ | BytesPersistOut _ | Callback _ | CallbackPersist _ > - | Flags _ | Int _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _ > + | Flags _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _ > | UInt _ | UInt32 _ -> assert false > ) args; > pr "\n"; > @@ -3282,13 +3368,14 @@ let print_python_binding name { args; ret } > function > | ArrayAndLen (UInt32 n, len) -> pr " \"O\"" > | BytesIn (n, len) -> pr " \"y#\"" > + | Int n -> pr " \"i\"" > | Opaque n -> pr " \"O\"" > | String n -> pr " \"s\"" > | UInt64 n -> pr " \"K\"" > (* The following not yet implemented for callbacks XXX *) > | ArrayAndLen _ | Bool _ | BytesOut _ > | BytesPersistIn _ | BytesPersistOut _ | Callback _ | CallbackPersist _ > - | Flags _ | Int _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _ > + | Flags _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _ > | UInt _ | UInt32 _ -> assert false > ) args; > pr " \")\""; > @@ -3297,12 +3384,13 @@ let print_python_binding name { args; ret } > | ArrayAndLen (UInt32 n, _) -> pr ", py_%s" n > | BytesIn (n, len) -> pr ", %s, (int) %s" n len > | Opaque _ -> pr ", _data->data" > + | Int n > | String n > | UInt64 n -> pr ", %s" n > (* The following not yet implemented for callbacks XXX *) > | ArrayAndLen _ | Bool _ | BytesOut _ > | BytesPersistIn _ | BytesPersistOut _ | Callback _ | CallbackPersist _ > - | Flags _ | Int _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _ > + | Flags _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _ > | UInt _ | UInt32 _ -> assert false > ) args; > pr ");\n"; > @@ -3332,13 +3420,14 @@ let print_python_binding name { args; ret } > | ArrayAndLen (UInt32 n, _) -> > pr " Py_DECREF (py_%s);\n" n > | BytesIn _ > + | Int _ > + | Opaque _ > | String _ > - | UInt64 _ > - | Opaque _ -> () > + | UInt64 _ -> () > (* The following not yet implemented for callbacks XXX *) > | ArrayAndLen _ | Bool _ | BytesOut _ > | BytesPersistIn _ | BytesPersistOut _ | Callback _ | CallbackPersist _ > - | Flags _ | Int _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _ > + | Flags _ | Int64 _ | Path _ | SockAddrAndLen _ | StringList _ > | UInt _ | UInt32 _ -> assert false > ) args; > pr " return ret;\n"; > @@ -3989,13 +4078,13 @@ let print_ocaml_binding (name, { args; ret }) > List.map ( > function > | ArrayAndLen (UInt32 n, _) | BytesIn (n, _) > - | String n | UInt64 n | Opaque n -> > + | Int n | Opaque n | String n | UInt64 n -> > n ^ "v" > (* The following not yet implemented for callbacks XXX *) > | ArrayAndLen _ | Bool _ | BytesOut _ > | BytesPersistIn _ | BytesPersistOut _ > | Callback _ | CallbackPersist _ > - | Flags _ | Int _ | Int64 _ | Path _ > + | Flags _ | Int64 _ | Path _ > | SockAddrAndLen _ | StringList _ > | UInt _ | UInt32 _ -> assert false > ) args in > @@ -4021,6 +4110,8 @@ let print_ocaml_binding (name, { args; ret }) > | BytesIn (n, len) -> > pr " %sv = caml_alloc_string (%s);\n" n len; > pr " memcpy (String_val (%sv), %s, %s);\n" n n len > + | Int n -> > + pr " %sv = caml_copy_int32 (%s);\n" n nThis is wrong, it should be: %sv = Val_int (%s); Foo_bar means convert a "bar" to a "foo". In this case you need to convert a C int to an OCaml value. This macro turns into a single bit shift, see my series of articles here: https://rwmj.wordpress.com/2009/08/04/ocaml-internals/ Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com virt-df lists disk usage of guests without needing to install any software inside the virtual machine. Supports Linux and Windows. http://people.redhat.com/~rjones/virt-df/
Richard W.M. Jones
2019-Jun-20  23:42 UTC
Re: [Libguestfs] [libnbd RFC PATCH 9/8] wip: generator: Add ReadStatus enum type
On Mon, Jun 17, 2019 at 08:39:43PM -0500, Eric Blake wrote:> Exploration of what it would take to treat the nbd_pread_callback > status parameter as a language-appropriate enum rather than a bare > integer. While I've identified a rough set of the places to change, I > have not actually got it working with Python or OCaml bindings. > --- > > Posting this more for an idea of whether we want to pursue it > further. I have no hard feelings if we decide to ditch it; what's > more, compilers don't necessarily guarantee ABI stability with enum > types in public headers, so sticking to Int may be better after all.I believe there are ABI problems with enums (something about how they change ABI if there are more than 256 cases?)> generator/generator | 46 +++++++++++++++++++++++++++++++++++++++------ > 1 file changed, 40 insertions(+), 6 deletions(-) > > diff --git a/generator/generator b/generator/generator > index 630260b..d269a04 100755 > --- a/generator/generator > +++ b/generator/generator > @@ -845,6 +845,7 @@ and arg > | Int64 of string (* 64 bit signed int *) > | Opaque of string (* opaque object, void* in C *) > | Path of string (* filename or path *) > +| ReadStatus of string (* enum of pread_callback status *) > | SockAddrAndLen of string * string (* struct sockaddr * + socklen_t *) > | String of string (* string *) > | StringList of string (* argv-style NULL-terminated array of strings *) > @@ -2033,10 +2034,13 @@ let constants = [ > "CMD_FLAG_NO_HOLE", 1 lsl 1; > "CMD_FLAG_DF", 1 lsl 2; > "CMD_FLAG_REQ_ONE", 1 lsl 3; > - > - "READ_DATA", 1; > - "READ_HOLE", 2; > - "READ_ERROR", 3; > +] > +let enums = [ > + "nbd_read_status", [ > + "READ_DATA", 1; > + "READ_HOLE", 2; > + "READ_ERROR", 3; > + ]; > ]This is an assoc-list (https://en.wikipedia.org/wiki/Association_list) so you can look up entries using: List.assoc "nbd_read_status" enums which will return the sub-list if you need to do that. It's not very time efficient but for the tiny number of cases we expect it's not a problem.> (*----------------------------------------------------------------------*) > @@ -2812,6 +2816,7 @@ let rec name_of_arg = function > | Int64 n -> [n] > | Opaque n -> [n] > | Path n -> [n] > +| ReadStatus n -> [n] > | SockAddrAndLen (n, len) -> [n; len] > | String n -> [n] > | StringList n -> [n] > @@ -2845,6 +2850,7 @@ let rec print_c_arg_list ?(handle = false) args > | Int n -> pr "int %s" n > | Int64 n -> pr "int64_t %s" n > | Opaque n -> pr "void *%s" n > + | ReadStatus n -> pr "enum nbd_read_status %s" n > | Path n > | String n -> pr "const char *%s" n > | StringList n -> pr "char **%s" n > @@ -2896,6 +2902,13 @@ let generate_include_libnbd_h () > pr "\n"; > List.iter (fun (n, i) -> pr "#define LIBNBD_%-30s %d\n" n i) constants; > pr "\n"; > + List.iter ( > + fun (name, list) -> > + pr "enum %s {\n" name; > + List.iter (fun (n, i) -> pr " LIBNBD_%-30s = %d,\n" n i) list; > + pr "};\n" > + ) enums; > + pr "\n"; > pr "extern struct nbd_handle *nbd_create (void);\n"; > pr "#define LIBNBD_HAVE_NBD_CREATE 1\n"; > pr "\n"; > @@ -3367,6 +3380,7 @@ let print_python_binding name { args; ret } > | Int _ -> () > | Opaque n -> > pr " struct %s_%s_data *_data = %s;\n" name cb_name n > + | ReadStatus n > | String n > | UInt64 n -> () > (* The following not yet implemented for callbacks XXX *) > @@ -3384,6 +3398,7 @@ let print_python_binding name { args; ret } > | BytesIn (n, len) -> pr " \"y#\"" > | Int n -> pr " \"i\"" > | Opaque n -> pr " \"O\"" > + | ReadStatus n -> pr " XXX1" > | String n -> pr " \"s\"" > | UInt64 n -> pr " \"K\"" > (* The following not yet implemented for callbacks XXX *) > @@ -3398,6 +3413,7 @@ let print_python_binding name { args; ret } > | ArrayAndLen (UInt32 n, _) -> pr ", py_%s" n > | BytesIn (n, len) -> pr ", %s, (int) %s" n len > | Opaque _ -> pr ", _data->data" > + | ReadStatus n -> pr ", XXX2 %s" n > | Int n > | String n > | UInt64 n -> pr ", %s" n > @@ -3436,6 +3452,7 @@ let print_python_binding name { args; ret } > | BytesIn _ > | Int _ > | Opaque _ > + | ReadStatus _ > | String _ > | UInt64 _ -> () > (* The following not yet implemented for callbacks XXX *) > @@ -3497,6 +3514,7 @@ let print_python_binding name { args; ret } > pr " long long %s; /* really int64_t */\n" n > | Opaque _ -> () > | Path n -> pr " char *%s = NULL;\n" n > + | ReadStatus n -> pr " XXX3 %s\n" n > | SockAddrAndLen (n, _) -> > pr " /* XXX Complicated - Python uses a tuple of different\n"; > pr " * lengths for the different socket types.\n"; > @@ -3537,6 +3555,7 @@ let print_python_binding name { args; ret } > | Int64 _ > | Opaque _ > | Path _ > + | ReadStatus _ > | SockAddrAndLen _ > | String _ > | StringList _ > @@ -3561,6 +3580,7 @@ let print_python_binding name { args; ret } > | Int64 n -> pr " \"L\"" > | Opaque _ -> pr " \"O\"" > | Path n -> pr " \"O&\"" > + | ReadStatus n -> pr " XXX4" > | SockAddrAndLen (n, _) -> pr " \"O\"" > | String n -> pr " \"s\"" > | StringList n -> pr " \"O\"" > @@ -3584,6 +3604,7 @@ let print_python_binding name { args; ret } > | Int n -> pr ", &%s" n > | Int64 n -> pr ", &%s" n > | Opaque n -> pr ", &%s_data->data" (find_callback n) > + | ReadStatus n -> pr ", XXX5 &%s" n > | Path n -> pr ", PyUnicode_FSConverter, &%s" n > | SockAddrAndLen (n, _) -> pr ", &%s" n > | String n -> pr ", &%s" n > @@ -3634,6 +3655,7 @@ let print_python_binding name { args; ret } > | Int64 n -> pr " %s_i64 = %s;\n" n n > | Opaque n -> () > | Path _ -> () > + | ReadStatus n -> pr " XXX6 %s\n" n > | SockAddrAndLen _ -> > pr " abort (); /* XXX SockAddrAndLen not implemented */\n"; > | String _ -> () > @@ -3662,6 +3684,7 @@ let print_python_binding name { args; ret } > | Int64 n -> pr ", %s_i64" n > | Opaque n -> pr ", %s_data" (find_callback n) > | Path n -> pr ", %s" n > + | ReadStatus n -> pr ", XXX8 %s" n > | SockAddrAndLen (n, _) -> pr ", /* XXX */ (void *) %s, 0" n > | String n -> pr ", %s" n > | StringList n -> pr ", %s" n > @@ -3696,6 +3719,7 @@ let print_python_binding name { args; ret } > | Int64 _ > | Opaque _ > | Path _ > + | ReadStatus _ > | SockAddrAndLen _ > | String _ > | StringList _ > @@ -3741,6 +3765,7 @@ let print_python_binding name { args; ret } > | Int64 _ -> () > | Opaque _ -> () > | Path n -> pr " free (%s);\n" n > + | ReadStatus _ -> () > | SockAddrAndLen _ -> () > | String n -> () > | StringList n -> pr " nbd_internal_py_free_string_list (%s);\n" n > @@ -3795,6 +3820,7 @@ import libnbdmod > "; > > List.iter (fun (n, i) -> pr "%-30s = %d\n" n i) constants; > + (* how to represent enums? *) > > pr "\ > > @@ -3839,6 +3865,7 @@ class NBD (object): > | Int64 n -> n, None > | Opaque n -> n, None > | Path n -> n, None > + | ReadStatus n -> n, None > | SockAddrAndLen (n, _) -> n, None > | String n -> n, None > | StringList n -> n, None > @@ -3929,6 +3956,7 @@ and ocaml_arg_to_string = function > | OCamlArg (Int64 _) -> "int64" > | OCamlArg (Opaque n) -> sprintf "'%s" n > | OCamlArg (Path _) -> "string" > + | OCamlArg (ReadStatus _) -> "int" > | OCamlArg (SockAddrAndLen _) -> "string" (* XXX not impl *) > | OCamlArg (String _) -> "string" > | OCamlArg (StringList _) -> "string list" > @@ -3978,6 +4006,7 @@ exception Closed of string > List.iter ( > fun (n, _) -> pr "val %s : int32\n" (String.lowercase_ascii n) > ) constants; > + (* how to represent enums? *) > pr "\n"; > > pr "\ > @@ -4040,6 +4069,7 @@ let () > List.iter ( > fun (n, i) -> pr "let %s = %d_l\n" (String.lowercase_ascii n) i > ) constants; > + (* how to represent enums? *)Something like: type name_of_the_enum | CaseA | CaseB You have to at least make sure the first letter is a capital (or capitalize it) unfortunately.> pr "\n"; > > pr "\ > @@ -4092,7 +4122,7 @@ let print_ocaml_binding (name, { args; ret }) > List.map ( > function > | ArrayAndLen (UInt32 n, _) | BytesIn (n, _) > - | Int n | Opaque n | String n | UInt64 n -> > + | Int n | Opaque n | ReadStatus n | String n | UInt64 n -> > n ^ "v" > (* The following not yet implemented for callbacks XXX *) > | ArrayAndLen _ | Bool _ | BytesOut _ > @@ -4124,7 +4154,8 @@ let print_ocaml_binding (name, { args; ret }) > | BytesIn (n, len) -> > pr " %sv = caml_alloc_string (%s);\n" n len; > pr " memcpy (String_val (%sv), %s, %s);\n" n n len > - | Int n -> > + | Int n > + | ReadStatus n -> > pr " %sv = caml_copy_int32 (%s);\n" n nIn OCaml, a union type where there is no parameter (as in this case) is represented by an OCaml int, so you can write: %sv = Val_int (%s);> | String n -> > pr " %sv = caml_copy_string (%s);\n" n n > @@ -4282,6 +4313,8 @@ let print_ocaml_binding (name, { args; ret }) > ) > | OCamlArg (Path n) | OCamlArg (String n) -> > pr " const char *%s = String_val (%sv);\n" n n > + | OCamlArg (ReadStatus n) -> > + pr " unsigned %s = Int_val (%sv);\n" n nAnd I believe this is correct, although I guess you meant to use enum as the C type? Rich.> | OCamlArg (SockAddrAndLen (n, len)) -> > pr " const struct sockaddr *%s;\n" n; > pr " socklen_t %s;\n" len; > @@ -4363,6 +4396,7 @@ let print_ocaml_binding (name, { args; ret }) > | OCamlArg (Int64 _) > | OCamlArg (Opaque _) > | OCamlArg (Path _) > + | OCamlArg (ReadStatus _) > | OCamlArg (String _) > | OCamlArg (SockAddrAndLen _) > | OCamlArg (UInt _) > -- > 2.20.1 > > _______________________________________________ > Libguestfs mailing list > Libguestfs@redhat.com > https://www.redhat.com/mailman/listinfo/libguestfs-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com virt-top is 'top' for virtual machines. Tiny program with many powerful monitoring features, net stats, disk stats, logging, etc. http://people.redhat.com/~rjones/virt-top
Reasonably Related Threads
- [libnbd PATCH 6/8] states: Add nbd_pread_callback API
- [libnbd PATCH v2 1/5] generator: Allow Int in callbacks
- [libnbd PATCH 3/6] generator: Allow Int64 in callbacks
- [libnbd PATCH 2/2] RFC: generator: Handle shared callbacks in Python
- [PATCH libnbd discussion only 4/5] api: Implement concurrent writer.