Eric Blake
2022-May-31 15:49 UTC
[Libguestfs] [libnbd PATCH 3/2] python: Slice pread_structured buffer from original
This patch fixes the corner-case regression introduced by the previous patch where the pread_structured callback buffer lifetime ends as soon as the callback (that is, where accessing a stashed callback parameter can result in ValueError instead of modifying a harmless copy). With careful effort, we can create a memoryview of the Python object that we read into, then slice that memoryview as part of the callback; now the slice will be valid as long as the original object is also valid. Note that this does not tackle the case of aio_pread_structured; for that, we either need to make nbd.Buffer honor the Python buffer protocol, or else change our handling of BytesPersistOut to utilize the buffer protocol directly instead of requiring the user to round-trip through nbd.Buffer. The runtime of this patch is still about the same 3.0s as in the test of the previous patch, but now you can do: $ nbdsh nbd> h.connect_command(["nbdkit","-s","memory","10"]) nbd> copy=None nbd> def f(b,o,s,e): ... global copy ... copy = b ... print(b[0]) ... nbd> print(copy) None nbd> buf = h.pread_structured(10,0,f) 0 nbd> copy <memory at 0x7fecef4e07c0> nbd> copy[0]=0x31 nbd> print(buf) bytearray(b'1\x00\x00\x00\x00\x00\x00\x00\x00\x00') That is, because we passed in a memoryview slice of the original buffer instead of a temporary slice of C memory, we can now stash that slice into a global, modify it, and see those modifications reflected back to the overall bytearray object returned by pread_structured. The generated diff is a bit more verbose, including some no-op lines added into pread, aio_pread, and aio_pread_structured. In practice, only pread_structured is actually impacted. | --- python/methods.c.bak 2022-05-31 09:51:30.406757934 -0500 | +++ python/methods.c 2022-05-31 10:26:01.797636894 -0500 | @@ -27,6 +27,7 @@ | #include <stdlib.h> | #include <stdint.h> | #include <stdbool.h> | +#include <stddef.h> | | #include <libnbd.h> | | @@ -76,8 +77,24 @@ chunk_wrapper (void *user_data, const vo | PyObject *py_subbuf = NULL; | PyObject *py_error = NULL; | | - /* Cast away const */ | - py_subbuf = PyMemoryView_FromMemory ((char *) subbuf, count, PyBUF_READ); | + if (data->buf) { | + PyObject *start, *end, *slice; | + assert (PyMemoryView_Check (data->buf)); | + ptrdiff_t offs = subbuf - PyMemoryView_GET_BUFFER (data->buf)->buf; | + start = PyLong_FromLong (offs); | + if (!start) { PyErr_PrintEx (0); goto out; } | + end = PyLong_FromLong (offs + count); | + if (!end) { Py_DECREF (start); PyErr_PrintEx (0); goto out; } | + slice = PySlice_New (start, end, NULL); | + Py_DECREF (start); | + Py_DECREF (end); | + if (!slice) { PyErr_PrintEx (0); goto out; } | + py_subbuf = PyObject_GetItem (data->buf, slice); | + } | + else { | + /* Cast away const */ | + py_subbuf = PyMemoryView_FromMemory ((char *) subbuf, count, PyBUF_READ); | + } | if (!py_subbuf) { PyErr_PrintEx (0); goto out; } | PyObject *py_error_modname = PyUnicode_FromString ("ctypes"); | if (!py_error_modname) { PyErr_PrintEx (0); goto out; } | @@ -115,11 +132,11 @@ chunk_wrapper (void *user_data, const vo | }; | | out: | - if (py_subbuf) { | + if (py_subbuf && !data->buf) { | PyObject *tmp = PyObject_CallMethod(py_subbuf, "release", NULL); | Py_XDECREF (tmp); | - Py_DECREF (py_subbuf); | } | + Py_XDECREF (py_subbuf); | if (py_error) { | PyObject *py_error_ret = PyObject_GetAttrString (py_error, "value"); | *error = PyLong_AsLong (py_error_ret); | @@ -2304,7 +2321,7 @@ nbd_internal_py_pread (PyObject *self, P | struct nbd_handle *h; | int ret; | PyObject *py_ret = NULL; | - PyObject *buf = NULL; | + PyObject *py_buf = NULL; | Py_ssize_t count; | uint64_t offset_u64; | unsigned long long offset; /* really uint64_t */ | @@ -2318,20 +2335,20 @@ nbd_internal_py_pread (PyObject *self, P | h = get_handle (py_h); | if (!h) goto out; | flags_u32 = flags; | - buf = PyByteArray_FromStringAndSize (NULL, count); | - if (buf == NULL) goto out; | + py_buf = PyByteArray_FromStringAndSize (NULL, count); | + if (py_buf == NULL) goto out; | offset_u64 = offset; | | - ret = nbd_pread (h, PyByteArray_AS_STRING (buf), count, offset_u64, flags_u32); | + ret = nbd_pread (h, PyByteArray_AS_STRING (py_buf), count, offset_u64, flags_u32); | if (ret == -1) { | raise_exception (); | goto out; | } | - py_ret = buf; | - buf = NULL; | + py_ret = py_buf; | + py_buf = NULL; | | out: | - Py_XDECREF (buf); | + Py_XDECREF (py_buf); | return py_ret; | } | | @@ -2342,7 +2359,7 @@ nbd_internal_py_pread_structured (PyObje | struct nbd_handle *h; | int ret; | PyObject *py_ret = NULL; | - PyObject *buf = NULL; | + PyObject *py_buf = NULL; | Py_ssize_t count; | uint64_t offset_u64; | unsigned long long offset; /* really uint64_t */ | @@ -2360,8 +2377,8 @@ nbd_internal_py_pread_structured (PyObje | h = get_handle (py_h); | if (!h) goto out; | flags_u32 = flags; | - buf = PyByteArray_FromStringAndSize (NULL, count); | - if (buf == NULL) goto out; | + py_buf = PyByteArray_FromStringAndSize (NULL, count); | + if (py_buf == NULL) goto out; | offset_u64 = offset; | chunk.user_data = chunk_user_data = alloc_user_data (); | if (chunk_user_data == NULL) goto out; | @@ -2373,18 +2390,20 @@ nbd_internal_py_pread_structured (PyObje | /* Increment refcount since pointer may be saved by libnbd. */ | Py_INCREF (py_chunk_fn); | chunk_user_data->fn = py_chunk_fn; | + if (py_buf) | + chunk_user_data->buf = PyMemoryView_FromObject (py_buf); | | - ret = nbd_pread_structured (h, PyByteArray_AS_STRING (buf), count, offset_u64, chunk, flags_u32); | + ret = nbd_pread_structured (h, PyByteArray_AS_STRING (py_buf), count, offset_u64, chunk, flags_u32); | chunk_user_data = NULL; | if (ret == -1) { | raise_exception (); | goto out; | } | - py_ret = buf; | - buf = NULL; | + py_ret = py_buf; | + py_buf = NULL; | | out: | - Py_XDECREF (buf); | + Py_XDECREF (py_buf); | free_user_data (chunk_user_data); | return py_ret; | } | @@ -3171,6 +3190,7 @@ nbd_internal_py_aio_pread (PyObject *sel | struct nbd_handle *h; | int64_t ret; | PyObject *py_ret = NULL; | + PyObject *py_buf = NULL; | PyObject *buf; /* PyCapsule pointing to struct py_aio_buffer */ | struct py_aio_buffer *buf_buf; | uint64_t offset_u64; | @@ -3230,6 +3250,7 @@ nbd_internal_py_aio_pread_structured (Py | struct nbd_handle *h; | int64_t ret; | PyObject *py_ret = NULL; | + PyObject *py_buf = NULL; | PyObject *buf; /* PyCapsule pointing to struct py_aio_buffer */ | struct py_aio_buffer *buf_buf; | uint64_t offset_u64; | @@ -3282,6 +3303,8 @@ nbd_internal_py_aio_pread_structured (Py | /* Increment refcount since pointer may be saved by libnbd. */ | Py_INCREF (py_chunk_fn); | chunk_user_data->fn = py_chunk_fn; | + if (py_buf) | + chunk_user_data->buf = PyMemoryView_FromObject (py_buf); | | ret = nbd_aio_pread_structured (h, buf_buf->data, buf_buf->len, offset_u64, chunk, completion, flags_u32); | chunk_user_data = NULL; --- generator/Python.ml | 60 +++++++++++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/generator/Python.ml b/generator/Python.ml index 0191f79..4160759 100644 --- a/generator/Python.ml +++ b/generator/Python.ml @@ -187,8 +187,24 @@ let pr " PyList_SET_ITEM (py_%s, i_%s, py_e_%s);\n" n n n; pr " }\n" | CBBytesIn (n, len) -> - pr " /* Cast away const */\n"; - pr " py_%s = PyMemoryView_FromMemory ((char *) %s, %s, PyBUF_READ);\n" n n len; + pr " if (data->buf) {\n"; + pr " PyObject *start, *end, *slice;\n"; + pr " assert (PyMemoryView_Check (data->buf));\n"; + pr " ptrdiff_t offs = %s - PyMemoryView_GET_BUFFER (data->buf)->buf;\n" n; + pr " start = PyLong_FromLong (offs);\n"; + pr " if (!start) { PyErr_PrintEx (0); goto out; }\n"; + pr " end = PyLong_FromLong (offs + %s);\n" len; + pr " if (!end) { Py_DECREF (start); PyErr_PrintEx (0); goto out; }\n"; + pr " slice = PySlice_New (start, end, NULL);\n"; + pr " Py_DECREF (start);\n"; + pr " Py_DECREF (end);\n"; + pr " if (!slice) { PyErr_PrintEx (0); goto out; }\n"; + pr " py_%s = PyObject_GetItem (data->buf, slice);\n" n; + pr " }\n"; + pr " else {\n"; + pr " /* Cast away const */\n"; + pr " py_%s = PyMemoryView_FromMemory ((char *) %s, %s, PyBUF_READ);\n" n n len; + pr " }\n"; pr " if (!py_%s) { PyErr_PrintEx (0); goto out; }\n" n | CBInt _ | CBInt64 _ -> () @@ -272,11 +288,11 @@ let pr " }\n" | CBBytesIn (n, _) -> (* https://stackoverflow.com/questions/58870010/how-to-call-release-on-a-memoryview-in-python-c-api *) - pr " if (py_%s) {\n" n; + pr " if (py_%s && !data->buf) {\n" n; pr " PyObject *tmp = PyObject_CallMethod(py_%s, \"release\", NULL);\n" n; pr " Py_XDECREF (tmp);\n"; - pr " Py_DECREF (py_%s);\n" n; - pr " }\n" + pr " }\n"; + pr " Py_XDECREF (py_%s);\n" n | CBInt _ | CBInt64 _ | CBString _ | CBUInt _ | CBUInt64 _ -> () @@ -288,6 +304,7 @@ let (* Generate the Python binding. *) let print_python_binding name { args; optargs; ret; may_set_error } + let store_mem = ref false in pr "PyObject *\n"; pr "nbd_internal_py_%s (PyObject *self, PyObject *args)\n" name; pr "{\n"; @@ -301,13 +318,19 @@ let | BytesIn (n, _) -> pr " Py_buffer %s = { .obj = NULL };\n" n | BytesOut (n, count) -> - pr " PyObject *%s = NULL;\n" n; - pr " Py_ssize_t %s;\n" count - | BytesPersistIn (n, _) - | BytesPersistOut (n, _) -> + pr " PyObject *py_%s = NULL;\n" n; + pr " Py_ssize_t %s;\n" count; + store_mem := true + | BytesPersistIn (n, _) -> pr " PyObject *%s; /* PyCapsule pointing to struct py_aio_buffer */\n" n; pr " struct py_aio_buffer *%s_buf;\n" n + | BytesPersistOut (n, _) -> + pr " PyObject *py_%s = NULL;\n" n; + pr " PyObject *%s; /* PyCapsule pointing to struct py_aio_buffer */\n" + n; + pr " struct py_aio_buffer *%s_buf;\n" n; + store_mem := true | Closure { cbname } -> pr " struct user_data *%s_user_data = NULL;\n" cbname; pr " PyObject *py_%s_fn;\n" cbname; @@ -440,8 +463,8 @@ let | Bool _ -> () | BytesIn _ -> () | BytesOut (n, count) -> - pr " %s = PyByteArray_FromStringAndSize (NULL, %s);\n" n count; - pr " if (%s == NULL) goto out;\n" n + pr " py_%s = PyByteArray_FromStringAndSize (NULL, %s);\n" n count; + pr " if (py_%s == NULL) goto out;\n" n | BytesPersistIn (n, _) | BytesPersistOut (n, _) -> pr " %s_buf = nbd_internal_py_get_aio_buffer (%s);\n" n n; pr " if (!%s_buf) goto out;\n" n; @@ -458,7 +481,11 @@ let pr " }\n"; pr " /* Increment refcount since pointer may be saved by libnbd. */\n"; pr " Py_INCREF (py_%s_fn);\n" cbname; - pr " %s_user_data->fn = py_%s_fn;\n" cbname cbname + pr " %s_user_data->fn = py_%s_fn;\n" cbname cbname; + if !store_mem then ( + pr " if (py_buf)\n"; + pr " %s_user_data->buf = PyMemoryView_FromObject (py_buf);\n" cbname + ) | Enum _ -> () | Flags (n, _) -> pr " %s_u32 = %s;\n" n n | Fd _ | Int _ -> () @@ -487,7 +514,7 @@ let function | Bool n -> pr ", %s" n | BytesIn (n, _) -> pr ", %s.buf, %s.len" n n - | BytesOut (n, count) -> pr ", PyByteArray_AS_STRING (%s), %s" n count + | BytesOut (n, count) -> pr ", PyByteArray_AS_STRING (py_%s), %s" n count | BytesPersistIn (n, _) | BytesPersistOut (n, _) -> pr ", %s_buf->data, %s_buf->len" n n | Closure { cbname } -> pr ", %s" cbname @@ -533,8 +560,8 @@ let List.iter ( function | BytesOut (n, _) -> - pr " py_ret = %s;\n" n; - pr " %s = NULL;\n" n; + pr " py_ret = py_%s;\n" n; + pr " py_%s = NULL;\n" n; use_ret := false | Bool _ | BytesIn _ @@ -581,7 +608,7 @@ let | BytesIn (n, _) -> pr " if (%s.obj)\n" n; pr " PyBuffer_Release (&%s);\n" n - | BytesOut (n, _) -> pr " Py_XDECREF (%s);\n" n + | BytesOut (n, _) -> pr " Py_XDECREF (py_%s);\n" n | BytesPersistIn _ | BytesPersistOut _ -> () | Closure { cbname } -> pr " free_user_data (%s_user_data);\n" cbname @@ -620,6 +647,7 @@ let pr "#include <stdlib.h>\n"; pr "#include <stdint.h>\n"; pr "#include <stdbool.h>\n"; + pr "#include <stddef.h>\n"; pr "\n"; pr "#include <libnbd.h>\n"; pr "\n"; -- 2.36.1
Eric Blake
2022-May-31 15:52 UTC
[Libguestfs] [libnbd PATCH 3/2] python: Slice pread_structured buffer from original
On Tue, May 31, 2022 at 10:49:03AM -0500, Eric Blake wrote:> This patch fixes the corner-case regression introduced by the previous > patch where the pread_structured callback buffer lifetime ends as soon > as the callback (that is, where accessing a stashed callback parameter > can result in ValueError instead of modifying a harmless copy). With > careful effort, we can create a memoryview of the Python object that > we read into, then slice that memoryview as part of the callback; now > the slice will be valid as long as the original object is also valid. >> | @@ -76,8 +77,24 @@ chunk_wrapper (void *user_data, const vo > | PyObject *py_subbuf = NULL; > | PyObject *py_error = NULL; > | > | - /* Cast away const */ > | - py_subbuf = PyMemoryView_FromMemory ((char *) subbuf, count, PyBUF_READ); > | + if (data->buf) { > | + PyObject *start, *end, *slice; > | + assert (PyMemoryView_Check (data->buf)); > | + ptrdiff_t offs = subbuf - PyMemoryView_GET_BUFFER (data->buf)->buf; > | + start = PyLong_FromLong (offs); > | + if (!start) { PyErr_PrintEx (0); goto out; } > | + end = PyLong_FromLong (offs + count); > | + if (!end) { Py_DECREF (start); PyErr_PrintEx (0); goto out; } > | + slice = PySlice_New (start, end, NULL); > | + Py_DECREF (start); > | + Py_DECREF (end); > | + if (!slice) { PyErr_PrintEx (0); goto out; } > | + py_subbuf = PyObject_GetItem (data->buf, slice);Missing a Py_DECREF (slice) here. Easy enough to add...> +++ b/generator/Python.ml > @@ -187,8 +187,24 @@ let > pr " PyList_SET_ITEM (py_%s, i_%s, py_e_%s);\n" n n n; > pr " }\n" > | CBBytesIn (n, len) -> > - pr " /* Cast away const */\n"; > - pr " py_%s = PyMemoryView_FromMemory ((char *) %s, %s, PyBUF_READ);\n" n n len; > + pr " if (data->buf) {\n"; > + pr " PyObject *start, *end, *slice;\n"; > + pr " assert (PyMemoryView_Check (data->buf));\n"; > + pr " ptrdiff_t offs = %s - PyMemoryView_GET_BUFFER (data->buf)->buf;\n" n; > + pr " start = PyLong_FromLong (offs);\n"; > + pr " if (!start) { PyErr_PrintEx (0); goto out; }\n"; > + pr " end = PyLong_FromLong (offs + %s);\n" len; > + pr " if (!end) { Py_DECREF (start); PyErr_PrintEx (0); goto out; }\n"; > + pr " slice = PySlice_New (start, end, NULL);\n"; > + pr " Py_DECREF (start);\n"; > + pr " Py_DECREF (end);\n"; > + pr " if (!slice) { PyErr_PrintEx (0); goto out; }\n"; > + pr " py_%s = PyObject_GetItem (data->buf, slice);\n" n;...here And I really wish python didn't make it so hard to grab a slice of another object using C code. Having to create 3 temporary PyObjects instead of having a utility C function that takes normal integers was annoying. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
Richard W.M. Jones
2022-Jun-01 08:47 UTC
[Libguestfs] [libnbd PATCH 3/2] python: Slice pread_structured buffer from original
On Tue, May 31, 2022 at 10:52:35AM -0500, Eric Blake wrote:> And I really wish python didn't make it so hard to grab a slice of > another object using C code. Having to create 3 temporary PyObjects > instead of having a utility C function that takes normal integers was > annoying.You can add hand-written utility functions to python/utils.c if that makes things easier. Note you will need to add a prototype for the new function(s) to the generated code in python/methods.h. See nbd_internal_py_get_string_list for an example of this. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com libguestfs lets you edit virtual machines. Supports shell scripting, bindings from many languages. http://libguestfs.org