Richard W.M. Jones
2020-Aug-05 16:40 UTC
[Libguestfs] [PATCH nbdkit 3/4] python: Allow thread model to be set from Python plugins.
This is working for me now, although possibly only on Python 3.9. Dan suggested PyEval_InitThreads but that was deprecated in Python 3.7. Rich.
Richard W.M. Jones
2020-Aug-05 16:40 UTC
[Libguestfs] [PATCH nbdkit 1/4] python: Make last_error into a thread-local variable.
While with the current thread model this makes no difference, it is required if we want to allow more parallel thread models. --- plugins/python/python.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/python/python.c b/plugins/python/python.c index f21a1602..9fa89964 100644 --- a/plugins/python/python.c +++ b/plugins/python/python.c @@ -63,7 +63,7 @@ static const char *script; static PyObject *module; static int py_api_version = 1; -static int last_error; +static __thread int last_error; /* Is a callback defined? */ static int -- 2.27.0
Richard W.M. Jones
2020-Aug-05 16:40 UTC
[Libguestfs] [PATCH nbdkit 2/4] python: Acquire and release the Python GIL.
With the current thread model this doesn't matter since nbdkit itself essentially enforces something like the GIL. But if we want to parallelize the Python plugin we must actually start using the GIL right. This is both complex and very poorly documented (including a lot of plainly incorrect information online). However I believe what I have written here is right, and it appears to work. --- plugins/python/python.c | 45 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/plugins/python/python.c b/plugins/python/python.c index 9fa89964..229a46f1 100644 --- a/plugins/python/python.c +++ b/plugins/python/python.c @@ -51,6 +51,19 @@ #include "cleanup.h" +/* All callbacks that want to call any Py* function should use this + * macro. See + * https://docs.python.org/3/c-api/init.html#non-python-created-threads + */ +#define ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE \ + __attribute__((cleanup (cleanup_release))) \ + PyGILState_STATE gstate = PyGILState_Ensure() +static inline void +cleanup_release (PyGILState_STATE *gstateptr) +{ + PyGILState_Release (*gstateptr); +} + /* XXX Apparently global state is technically wrong in Python 3, see: * * https://www.python.org/dev/peps/pep-3121/ @@ -304,23 +317,30 @@ create_nbdkit_module (void) return m; } +static PyThreadState *tstate; + static void py_load (void) { PyImport_AppendInittab ("nbdkit", create_nbdkit_module); Py_Initialize (); + tstate = PyEval_SaveThread (); } static void py_unload (void) { - Py_XDECREF (module); - Py_Finalize (); + if (tstate) { + PyEval_RestoreThread (tstate); + Py_XDECREF (module); + Py_Finalize (); + } } static void py_dump_plugin (void) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; PyObject *fn; PyObject *r; @@ -368,6 +388,7 @@ get_py_api_version (void) static int py_config (const char *key, const char *value) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; int fd; FILE *fp; PyObject *modname; @@ -456,6 +477,7 @@ py_config (const char *key, const char *value) static int py_config_complete (void) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; PyObject *fn; PyObject *r; @@ -475,6 +497,7 @@ py_config_complete (void) static int py_get_ready (void) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; PyObject *fn; PyObject *r; @@ -499,6 +522,7 @@ struct handle { static void * py_open (int readonly) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; PyObject *fn; struct handle *h; @@ -531,6 +555,7 @@ py_open (int readonly) static void py_close (void *handle) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; struct handle *h = handle; PyObject *fn; PyObject *r; @@ -551,6 +576,7 @@ py_close (void *handle) static int64_t py_get_size (void *handle) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; struct handle *h = handle; PyObject *fn; PyObject *r; @@ -580,6 +606,7 @@ static int py_pread (void *handle, void *buf, uint32_t count, uint64_t offset, uint32_t flags) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; struct handle *h = handle; PyObject *fn; PyObject *r; @@ -642,6 +669,7 @@ static int py_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset, uint32_t flags) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; struct handle *h = handle; PyObject *fn; PyObject *r; @@ -678,6 +706,7 @@ py_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset, static int py_flush (void *handle, uint32_t flags) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; struct handle *h = handle; PyObject *fn; PyObject *r; @@ -710,6 +739,7 @@ py_flush (void *handle, uint32_t flags) static int py_trim (void *handle, uint32_t count, uint64_t offset, uint32_t flags) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; struct handle *h = handle; PyObject *fn; PyObject *r; @@ -742,6 +772,7 @@ py_trim (void *handle, uint32_t count, uint64_t offset, uint32_t flags) static int py_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; struct handle *h = handle; PyObject *fn; PyObject *r; @@ -788,6 +819,7 @@ py_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags) static int py_cache (void *handle, uint32_t count, uint64_t offset, uint32_t flags) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; struct handle *h = handle; PyObject *fn; PyObject *r; @@ -847,36 +879,42 @@ boolean_callback (void *handle, const char *can_fn, const char *plain_fn) static int py_is_rotational (void *handle) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; return boolean_callback (handle, "is_rotational", NULL); } static int py_can_multi_conn (void *handle) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; return boolean_callback (handle, "can_multi_conn", NULL); } static int py_can_write (void *handle) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; return boolean_callback (handle, "can_write", "pwrite"); } static int py_can_flush (void *handle) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; return boolean_callback (handle, "can_flush", "flush"); } static int py_can_trim (void *handle) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; return boolean_callback (handle, "can_trim", "trim"); } static int py_can_zero (void *handle) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; struct handle *h = handle; if (h->can_zero >= 0) @@ -887,6 +925,7 @@ py_can_zero (void *handle) static int py_can_fast_zero (void *handle) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; int r; if (callback_defined ("can_fast_zero", NULL)) @@ -904,6 +943,7 @@ py_can_fast_zero (void *handle) static int py_can_fua (void *handle) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; struct handle *h = handle; PyObject *fn; PyObject *r; @@ -932,6 +972,7 @@ py_can_fua (void *handle) static int py_can_cache (void *handle) { + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; struct handle *h = handle; PyObject *fn; PyObject *r; -- 2.27.0
Richard W.M. Jones
2020-Aug-05 16:40 UTC
[Libguestfs] [PATCH nbdkit 3/4] python: Allow thread model to be set from Python code.
Thanks: Nir Soffer. --- plugins/python/nbdkit-python-plugin.pod | 26 +++++++++++++++------- plugins/python/python.c | 29 ++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/plugins/python/nbdkit-python-plugin.pod b/plugins/python/nbdkit-python-plugin.pod index 852366f0..edd56d13 100644 --- a/plugins/python/nbdkit-python-plugin.pod +++ b/plugins/python/nbdkit-python-plugin.pod @@ -163,6 +163,15 @@ There are no arguments or return value. There are no arguments or return value. +=item C<thread_model> + +(Optional, nbdkit E<ge> 1.22) + + def thread_model(): + return nbdkit.THEAD_MODEL_SERIALIZE_ALL_REQUESTS + +See L</Threads> below. + =item C<get_ready> (Optional) @@ -367,10 +376,6 @@ optionally using C<nbdkit.set_error> first. These are not needed because you can just use ordinary Python constructs. -=item Missing: C<thread_model> - -See L</Threads> below. - =item Missing: C<name>, C<version>, @@ -387,10 +392,15 @@ These are not yet supported. =head2 Threads -The thread model for Python callbacks currently cannot be set from -Python. It is hard-coded in the C part to -C<NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS>. This may change or be -settable in future. +The thread model for Python callbacks defaults to +C<NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS>. Since S<nbdkit 1.22> +it is possible to set this by implementing a C<thread_model> function +which returns one of the constants C<nbdkit.THREAD_MODEL_*>. + +The Python Global Interpreter Lock (GIL) usually means that you cannot +execute Python code in parallel, but Python code which calls into +libraries which block (eg. to make HTTP requests) might be executed in +parallel. =head1 FILES diff --git a/plugins/python/python.c b/plugins/python/python.c index 229a46f1..398473f5 100644 --- a/plugins/python/python.c +++ b/plugins/python/python.c @@ -494,6 +494,28 @@ py_config_complete (void) return 0; } +static int +py_thread_model (void) +{ + ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE; + PyObject *fn; + PyObject *r; + int ret = NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS; + + if (script && callback_defined ("thread_model", &fn)) { + PyErr_Clear (); + + r = PyObject_CallObject (fn, NULL); + Py_DECREF (fn); + if (check_python_failure ("thread_model") == -1) + return -1; + ret = PyLong_AsLong (r); + Py_DECREF (r); + } + + return ret; +} + static int py_get_ready (void) { @@ -1002,7 +1024,11 @@ py_can_cache (void *handle) "script=<FILENAME> (required) The Python plugin to run.\n" \ "[other arguments may be used by the plugin that you load]" -#define THREAD_MODEL NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS +/* This is the maximum possible, but the default for plugins is + * NBDKIT_THREAD_MODEL_SERIALIZE_ALL_REQUESTS. Plugins can override + * that by providing a thread_model() function. + */ +#define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL static struct nbdkit_plugin plugin = { .name = "python", @@ -1016,6 +1042,7 @@ static struct nbdkit_plugin plugin = { .config_complete = py_config_complete, .config_help = py_config_help, + .thread_model = py_thread_model, .get_ready = py_get_ready, .open = py_open, -- 2.27.0
Richard W.M. Jones
2020-Aug-05 16:40 UTC
[Libguestfs] [PATCH nbdkit 4/4] python: Test the parallel thread model.
--- tests/Makefile.am | 3 + tests/test-python-thread-model.sh | 91 +++++++++++++++++++++++++++++++ tests/python-thread-model.py | 50 +++++++++++++++++ 3 files changed, 144 insertions(+) diff --git a/tests/Makefile.am b/tests/Makefile.am index 79be5639..2aa9e14c 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1020,15 +1020,18 @@ TESTS += \ test-python.sh \ test-python-exception.sh \ test-python-export-name.sh \ + test-python-thread-model.sh \ test-shebang-python.sh \ $(NULL) EXTRA_DIST += \ python-exception.py \ python-export-name.py \ + python-thread-model.py \ shebang.py \ test-python-exception.sh \ test-python-export-name.sh \ test-python-plugin.py \ + test-python-thread-model.sh \ test-python.sh \ test-shebang-python.sh \ test_python.py \ diff --git a/tests/test-python-thread-model.sh b/tests/test-python-thread-model.sh new file mode 100755 index 00000000..e16ff154 --- /dev/null +++ b/tests/test-python-thread-model.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +# nbdkit +# Copyright (C) 2018-2020 Red Hat Inc. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of Red Hat nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +source ./functions.sh +set -e +set -x + +SCRIPT="$SRCDIR/python-thread-model.py" +if ! test -d "$SRCDIR" || ! test -f "$SCRIPT"; then + echo "$0: could not locate python-thread-model.py" + exit 1 +fi + +# Python has proven very difficult to valgrind, therefore it is disabled. +if [ "$NBDKIT_VALGRIND" = "1" ]; then + echo "$0: skipping Python test under valgrind." + exit 77 +fi + +requires nbdsh --version + +out=test-python-thread-model.out +pid=test-python-thread-model.pid +sock=`mktemp -u` +files="$out $pid $sock" +rm -f $files +cleanup_fn rm -f $files + +# Check the plugin is loadable and the effective thread model is parallel. +nbdkit python $SCRIPT --dump-plugin >$out +grep "^thread_model=parallel" $out + +start_nbdkit -P $pid -U $sock python $SCRIPT + +export sock +nbdsh -c ' +import os +import time + +h.connect_unix (os.environ["sock"]) + +# We should be able to issue multiple requests in parallel, +# and the total time taken should not be much more than 10 seconds +# because all sleeps in the plugin should happen in parallel. +start_t = time.time() +for i in range (10): + buf = nbd.Buffer (512) + h.aio_pread (buf, 0) + +while h.aio_in_flight() > 0: + h.poll(-1) +end_t = time.time() + +t = end_t - start_t +print (t) + +# Since we launched 10 requests, if we serialized on them we +# would have waited at least 100 seconds. We would expect to +# wait around 10 seconds, but for flexibility on slow servers +# any test < 100 should be fine. +assert t <= 50 +' diff --git a/tests/python-thread-model.py b/tests/python-thread-model.py new file mode 100644 index 00000000..67879e04 --- /dev/null +++ b/tests/python-thread-model.py @@ -0,0 +1,50 @@ +# nbdkit +# Copyright (C) 2018-2020 Red Hat Inc. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of Red Hat nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +# Python plugin which uses the parallel thread model. Note that the +# cpython implementation of time.sleep releases the GIL (see +# Modules/timemodule.c:pysleep) + +import nbdkit +import time + +def thread_model(): + return nbdkit.THREAD_MODEL_PARALLEL + +def open(readonly): + return {} + +def get_size(h): + return 512 + +def pread(h, count, offset): + time.sleep(10) + return bytearray(count) -- 2.27.0
Richard W.M. Jones
2020-Aug-06 10:16 UTC
Re: [Libguestfs] [PATCH nbdkit 4/4] python: Test the parallel thread model.
I did a bunch more testing on this series including on older versions of Python 3 (back to 3.6 released Dec 2016) and I couldn't break it. Also added a couple of new examples which use the parallelism to a lesser or greater extent: An equivalent of the "file" plugin which probably isn't able to exploit parallelism, and an equivalent of the "curl" plugin (using urllib) which is. Both can do complex things including booting whole guests without crashing. So I'm going to push this. Nir: See if you can break this one please! https://github.com/libguestfs/nbdkit/blob/master/plugins/python/examples/url.py Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com libguestfs lets you edit virtual machines. Supports shell scripting, bindings from many languages. http://libguestfs.org
Possibly Parallel Threads
- [nbdkit PATCH 0/2] More language bindings for .list_exports
- [PATCH NOT WORKING nbdkit 0/3] python: Allow thread model to be set from Python plugins.
- [nbdkit PATCH 0/2] More caching of initial setup
- [PATCH nbdkit 0/5] server: Add .get_ready callback.
- [PATCH nbdkit 0/8] Implement nbdkit API v2 for Python plugins.