Eric Blake
2022-May-31 15:52 UTC
[Libguestfs] [libnbd PATCH 3/2] python: Slice pread_structured buffer from original
On Tue, May 31, 2022 at 10:49:03AM -0500, Eric Blake wrote:> This patch fixes the corner-case regression introduced by the previous > patch where the pread_structured callback buffer lifetime ends as soon > as the callback (that is, where accessing a stashed callback parameter > can result in ValueError instead of modifying a harmless copy). With > careful effort, we can create a memoryview of the Python object that > we read into, then slice that memoryview as part of the callback; now > the slice will be valid as long as the original object is also valid. >> | @@ -76,8 +77,24 @@ chunk_wrapper (void *user_data, const vo > | PyObject *py_subbuf = NULL; > | PyObject *py_error = NULL; > | > | - /* Cast away const */ > | - py_subbuf = PyMemoryView_FromMemory ((char *) subbuf, count, PyBUF_READ); > | + if (data->buf) { > | + PyObject *start, *end, *slice; > | + assert (PyMemoryView_Check (data->buf)); > | + ptrdiff_t offs = subbuf - PyMemoryView_GET_BUFFER (data->buf)->buf; > | + start = PyLong_FromLong (offs); > | + if (!start) { PyErr_PrintEx (0); goto out; } > | + end = PyLong_FromLong (offs + count); > | + if (!end) { Py_DECREF (start); PyErr_PrintEx (0); goto out; } > | + slice = PySlice_New (start, end, NULL); > | + Py_DECREF (start); > | + Py_DECREF (end); > | + if (!slice) { PyErr_PrintEx (0); goto out; } > | + py_subbuf = PyObject_GetItem (data->buf, slice);Missing a Py_DECREF (slice) here. Easy enough to add...> +++ b/generator/Python.ml > @@ -187,8 +187,24 @@ let > pr " PyList_SET_ITEM (py_%s, i_%s, py_e_%s);\n" n n n; > pr " }\n" > | CBBytesIn (n, len) -> > - pr " /* Cast away const */\n"; > - pr " py_%s = PyMemoryView_FromMemory ((char *) %s, %s, PyBUF_READ);\n" n n len; > + pr " if (data->buf) {\n"; > + pr " PyObject *start, *end, *slice;\n"; > + pr " assert (PyMemoryView_Check (data->buf));\n"; > + pr " ptrdiff_t offs = %s - PyMemoryView_GET_BUFFER (data->buf)->buf;\n" n; > + pr " start = PyLong_FromLong (offs);\n"; > + pr " if (!start) { PyErr_PrintEx (0); goto out; }\n"; > + pr " end = PyLong_FromLong (offs + %s);\n" len; > + pr " if (!end) { Py_DECREF (start); PyErr_PrintEx (0); goto out; }\n"; > + pr " slice = PySlice_New (start, end, NULL);\n"; > + pr " Py_DECREF (start);\n"; > + pr " Py_DECREF (end);\n"; > + pr " if (!slice) { PyErr_PrintEx (0); goto out; }\n"; > + pr " py_%s = PyObject_GetItem (data->buf, slice);\n" n;...here And I really wish python didn't make it so hard to grab a slice of another object using C code. Having to create 3 temporary PyObjects instead of having a utility C function that takes normal integers was annoying. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
Nir Soffer
2022-May-31 23:20 UTC
[Libguestfs] [libnbd PATCH 3/2] python: Slice pread_structured buffer from original
On Tue, May 31, 2022 at 6:52 PM Eric Blake <eblake at redhat.com> wrote:> > On Tue, May 31, 2022 at 10:49:03AM -0500, Eric Blake wrote: > > This patch fixes the corner-case regression introduced by the previous > > patch where the pread_structured callback buffer lifetime ends as soon > > as the callback (that is, where accessing a stashed callback parameter > > can result in ValueError instead of modifying a harmless copy). With > > careful effort, we can create a memoryview of the Python object that > > we read into, then slice that memoryview as part of the callback; now > > the slice will be valid as long as the original object is also valid. > > > > > | @@ -76,8 +77,24 @@ chunk_wrapper (void *user_data, const vo > > | PyObject *py_subbuf = NULL; > > | PyObject *py_error = NULL; > > | > > | - /* Cast away const */ > > | - py_subbuf = PyMemoryView_FromMemory ((char *) subbuf, count, PyBUF_READ); > > | + if (data->buf) {In which case we don't have data->buf?> > | + PyObject *start, *end, *slice; > > | + assert (PyMemoryView_Check (data->buf));Why do we need data->buf to be a memoryview? We can create a new memoryview form data->buf - it only needs to be an object supporting the buffer protocol. Basically we need the C version of: memoryview(buf)[start:stop] buf can be bytes, bytearray, mmap, or another memoryview.> > | + ptrdiff_t offs = subbuf - PyMemoryView_GET_BUFFER (data->buf)->buf;Because PyMemoryView_Check is inside the assert, build with NDEBUG will remove the check, and this call may crash if data->buf is not a memoryview. It would be nicer if we could get the offset without looking into the internal buffer of the memoryview.> > | + start = PyLong_FromLong (offs); > > | + if (!start) { PyErr_PrintEx (0); goto out; } > > | + end = PyLong_FromLong (offs + count); > > | + if (!end) { Py_DECREF (start); PyErr_PrintEx (0); goto out; } > > | + slice = PySlice_New (start, end, NULL); > > | + Py_DECREF (start); > > | + Py_DECREF (end); > > | + if (!slice) { PyErr_PrintEx (0); goto out; } > > | + py_subbuf = PyObject_GetItem (data->buf, slice); > > Missing a Py_DECREF (slice) here. Easy enough to add... > > > +++ b/generator/Python.ml > > @@ -187,8 +187,24 @@ let > > pr " PyList_SET_ITEM (py_%s, i_%s, py_e_%s);\n" n n n; > > pr " }\n" > > | CBBytesIn (n, len) -> > > - pr " /* Cast away const */\n"; > > - pr " py_%s = PyMemoryView_FromMemory ((char *) %s, %s, PyBUF_READ);\n" n n len; > > + pr " if (data->buf) {\n"; > > + pr " PyObject *start, *end, *slice;\n"; > > + pr " assert (PyMemoryView_Check (data->buf));\n"; > > + pr " ptrdiff_t offs = %s - PyMemoryView_GET_BUFFER (data->buf)->buf;\n" n; > > + pr " start = PyLong_FromLong (offs);\n"; > > + pr " if (!start) { PyErr_PrintEx (0); goto out; }\n"; > > + pr " end = PyLong_FromLong (offs + %s);\n" len; > > + pr " if (!end) { Py_DECREF (start); PyErr_PrintEx (0); goto out; }\n"; > > + pr " slice = PySlice_New (start, end, NULL);\n"; > > + pr " Py_DECREF (start);\n"; > > + pr " Py_DECREF (end);\n"; > > + pr " if (!slice) { PyErr_PrintEx (0); goto out; }\n"; > > + pr " py_%s = PyObject_GetItem (data->buf, slice);\n" n; > > ...here > > And I really wish python didn't make it so hard to grab a slice of > another object using C code. Having to create 3 temporary PyObjects > instead of having a utility C function that takes normal integers was > annoying.Does this work? PySlice_New(NULL, NULL, NULL); PySlice_AdjustIndices(length, start, stop, step); Nir
Eric Blake
2022-Jun-01 00:24 UTC
[Libguestfs] [libnbd PATCH 3/2] python: Slice pread_structured buffer from original
On Wed, Jun 01, 2022 at 02:20:48AM +0300, Nir Soffer wrote:> On Tue, May 31, 2022 at 6:52 PM Eric Blake <eblake at redhat.com> wrote: > > > > On Tue, May 31, 2022 at 10:49:03AM -0500, Eric Blake wrote: > > > This patch fixes the corner-case regression introduced by the previous > > > patch where the pread_structured callback buffer lifetime ends as soon > > > as the callback (that is, where accessing a stashed callback parameter > > > can result in ValueError instead of modifying a harmless copy). With > > > careful effort, we can create a memoryview of the Python object that > > > we read into, then slice that memoryview as part of the callback; now > > > the slice will be valid as long as the original object is also valid. > > > > > > > > | @@ -76,8 +77,24 @@ chunk_wrapper (void *user_data, const vo > > > | PyObject *py_subbuf = NULL; > > > | PyObject *py_error = NULL; > > > | > > > | - /* Cast away const */ > > > | - py_subbuf = PyMemoryView_FromMemory ((char *) subbuf, count, PyBUF_READ); > > > | + if (data->buf) { > > In which case we don't have data->buf?Right now, in nbd_internal_py_aio_read_structured. Fixing that will eventually become patch 4/2 for this series (the idea is that instead of requiring the user to pass in an nbd.Buffer object, we should take any buffer-like object, populate data->buf with zero-copy semantics, and we're good to go. But to avoid breaking back-compat, we either have to also special-case existing code using nbd.Buffer, or enhance the nbd.Buffer class to implement the buffer-like interface).> > > > | + PyObject *start, *end, *slice; > > > | + assert (PyMemoryView_Check (data->buf)); > > Why do we need data->buf to be a memoryview?Maybe it doesn't. It looks like (at least from python, rather than in the C coding side of things) that you can apply the slice operation to bytes and bytearray. But there may be other buffer-like objects that don't directly support slice while memoryview always does; and one of the reasons memoryview is part of the standard python library is to make it easy to add operations on top of any buffer-like object. memoryview also takes care of doing a temporary copy to consolidate out a contiguous view if the original buffer is not contiguous; you don't need that with bytes or bytearray, but definitely need it with array.array and friends.> > We can create a new memoryview form data->buf - it only needs to be an object > supporting the buffer protocol. Basically we need the C version of: > > memoryview(buf)[start:stop] > > buf can be bytes, bytearray, mmap, or another memoryview.Yes - and that's what this C code is. memoryview(buf) was created when populating data->buf (whether the original was bytes, bytearray, mmap, or other buffer-like object), then this code follows up with creating the slice [start:stop] then pulling it altogether with view.__getitem__(slice).> > > > | + ptrdiff_t offs = subbuf - PyMemoryView_GET_BUFFER (data->buf)->buf; > > Because PyMemoryView_Check is inside the assert, build with NDEBUG will > remove the check, and this call may crash if data->buf is not a memoryview.It's more a proof that the earlier code in nbd_internal_py_pread_structured correctly set data->buf. I'm not worried about an NDEBUG build failing; this is one case where an assert() really is more for documentation.> > It would be nicer if we could get the offset without looking into the internal > buffer of the memoryview.We could pass the void* alongside the PyObject* in struct user_data. But ultimately, we HAVE to look at the internal pointer of the memoryview, in order to call nbd_pread_structured in the first place. Whether we look once (and populate two fields in data) or every time the callback is reached is less important. It would also be possible to store the original offset to nbd_pread_structured in user_data, then compute the slice start as the callback's offset - original offset, without having to do pointer math. But we still have to store the memoryview PyObject that we will be slicing. So it just becomes a question of whether struct user_data needs to grow.> > > > | + start = PyLong_FromLong (offs); > > > | + if (!start) { PyErr_PrintEx (0); goto out; } > > > | + end = PyLong_FromLong (offs + count); > > > | + if (!end) { Py_DECREF (start); PyErr_PrintEx (0); goto out; } > > > | + slice = PySlice_New (start, end, NULL); > > > | + Py_DECREF (start); > > > | + Py_DECREF (end); > > > | + if (!slice) { PyErr_PrintEx (0); goto out; } > > > | + py_subbuf = PyObject_GetItem (data->buf, slice); > > > > Missing a Py_DECREF (slice) here. Easy enough to add... > > > > > +++ b/generator/Python.ml > > > @@ -187,8 +187,24 @@ let > > > pr " PyList_SET_ITEM (py_%s, i_%s, py_e_%s);\n" n n n; > > > pr " }\n" > > > | CBBytesIn (n, len) -> > > > - pr " /* Cast away const */\n"; > > > - pr " py_%s = PyMemoryView_FromMemory ((char *) %s, %s, PyBUF_READ);\n" n n len; > > > + pr " if (data->buf) {\n"; > > > + pr " PyObject *start, *end, *slice;\n"; > > > + pr " assert (PyMemoryView_Check (data->buf));\n"; > > > + pr " ptrdiff_t offs = %s - PyMemoryView_GET_BUFFER (data->buf)->buf;\n" n; > > > + pr " start = PyLong_FromLong (offs);\n"; > > > + pr " if (!start) { PyErr_PrintEx (0); goto out; }\n"; > > > + pr " end = PyLong_FromLong (offs + %s);\n" len; > > > + pr " if (!end) { Py_DECREF (start); PyErr_PrintEx (0); goto out; }\n"; > > > + pr " slice = PySlice_New (start, end, NULL);\n"; > > > + pr " Py_DECREF (start);\n"; > > > + pr " Py_DECREF (end);\n"; > > > + pr " if (!slice) { PyErr_PrintEx (0); goto out; }\n"; > > > + pr " py_%s = PyObject_GetItem (data->buf, slice);\n" n; > > > > ...here > > > > And I really wish python didn't make it so hard to grab a slice of > > another object using C code. Having to create 3 temporary PyObjects > > instead of having a utility C function that takes normal integers was > > annoying. > > Does this work? > > PySlice_New(NULL, NULL, NULL); > PySlice_AdjustIndices(length, start, stop, step);New in python 3.6.1. README says we still target python 3.3. Worse, look at the signature - it does NOT take a PyObject *, and therefore it cannot modify an existing slice object (rather, it is for easing the computation of how to turn [-1:-2:2] into a slice with positive bounds) - you still have to go through a step that converts integers into PyObject* before creating a PySlice object. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org