Matteo Cafasso
2017-May-21 16:29 UTC
[Libguestfs] [PATCH 0/2] python: improved UTF8 decoding error handling
The Python 3 bindings currently are unable to deal with non UTF8 characters. This series continues what proposed in RHBZ#1406906. A new function 'set_decode_error_handler' allows the User to specify how to deal with decoding errors. The default behaviour will be raising a UnicodeDecodeError. If the handler is changed to 'surrogateescape', non UTF8 characters will be escaped in a similar manner as for Python 2. See PEP383 for reference. This series fixes also a bug introduced in commit 9d25b4e56471f9c33ea6229a8b620fc800c240f8. Matteo Cafasso (2): python: return bytes when return value is RBufferOut python: unicode decode handler error scheme setter generator/python.ml | 19 ++++++++++++++++++- python/handle.c | 28 ++++++++++++++++++++++++++-- python/t/test830RHBZ1406906.py | 6 ++++++ 3 files changed, 50 insertions(+), 3 deletions(-) -- 2.11.0
Matteo Cafasso
2017-May-21 16:29 UTC
[Libguestfs] [PATCH 1/2] python: return bytes when return value is RBufferOut
Signed-off-by: Matteo Cafasso <noxdafox@gmail.com> --- generator/python.ml | 3 ++- python/handle.c | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/generator/python.ml b/generator/python.ml index 4cae24757..f7c1f80bb 100644 --- a/generator/python.ml +++ b/generator/python.ml @@ -93,6 +93,7 @@ extern PyObject *guestfs_int_py_put_string_list (char * const * const argv); extern PyObject *guestfs_int_py_put_table (char * const * const argv); extern PyObject *guestfs_int_py_fromstring (const char *str); extern PyObject *guestfs_int_py_fromstringsize (const char *str, size_t size); +extern PyObject *guestfs_int_py_bytesfromstringsize (const char *str, size_t size); extern char *guestfs_int_py_asstring (PyObject *obj); "; @@ -513,7 +514,7 @@ and generate_python_actions actions () pr " guestfs_int_free_string_list (r);\n"; pr " if (py_r == NULL) goto out;\n"; | RBufferOut _ -> - pr " py_r = guestfs_int_py_fromstringsize (r, size);\n"; + pr " py_r = guestfs_int_py_bytesfromstringsize (r, size);\n"; pr " free (r);\n"; pr " if (py_r == NULL) goto out;\n"; ); diff --git a/python/handle.c b/python/handle.c index d93f2f021..52c36f1d2 100644 --- a/python/handle.c +++ b/python/handle.c @@ -400,6 +400,16 @@ guestfs_int_py_fromstringsize (const char *str, size_t size) #endif } +PyObject * +guestfs_int_py_bytesfromstringsize (const char *str, size_t size) +{ +#ifdef HAVE_PYSTRING_ASSTRING + return PyString_FromStringAndSize (str, size); +#else + return PyBytes_FromStringAndSize (str, size); +#endif +} + char * guestfs_int_py_asstring (PyObject *obj) { -- 2.11.0
Matteo Cafasso
2017-May-21 16:29 UTC
[Libguestfs] [PATCH 2/2] python: unicode decode handler error scheme setter
The set_decode_error_handler function allows the User to set the decoding error scheme to be used when non UTF8 characters are encountered in Python 3. The function has no effect in Python 2. Signed-off-by: Matteo Cafasso <noxdafox@gmail.com> --- generator/python.ml | 16 ++++++++++++++++ python/handle.c | 18 ++++++++++++++++-- python/t/test830RHBZ1406906.py | 6 ++++++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/generator/python.ml b/generator/python.ml index f7c1f80bb..66bb7f27d 100644 --- a/generator/python.ml +++ b/generator/python.ml @@ -82,6 +82,7 @@ put_handle (guestfs_h *g) } extern void guestfs_int_py_extend_module (PyObject *module); +extern PyObject *guestfs_int_py_set_decode_error_handler (PyObject *self, PyObject *args); extern PyObject *guestfs_int_py_create (PyObject *self, PyObject *args); extern PyObject *guestfs_int_py_close (PyObject *self, PyObject *args); @@ -577,6 +578,8 @@ and generate_python_module () (* Table of functions. *) pr "static PyMethodDef methods[] = {\n"; + pr " { (char *) \"set_decode_error_handler\", \n"; + pr " guestfs_int_py_set_decode_error_handler, METH_VARARGS, NULL },\n"; pr " { (char *) \"create\", guestfs_int_py_create, METH_VARARGS, NULL },\n"; pr " { (char *) \"close\", guestfs_int_py_close, METH_VARARGS, NULL },\n"; pr " { (char *) \"set_event_callback\",\n"; @@ -728,6 +731,19 @@ class ClosedHandle(ValueError): pass +def set_decode_error_handler(handler): + \"\"\"Set the error handling scheme to use for the handling + of decoding errors. + The default is 'strict' meaning that decoding errors raise a + UnicodeDecodeError. + + The other possible value is 'surrogateescape', see PEP383 for reference. + + Return the previous error handler. + \"\"\" + return libguestfsmod.set_decode_error_handler(handler) + + class GuestFS(object): \"\"\"Instances of this class are libguestfs API handles.\"\"\" diff --git a/python/handle.c b/python/handle.c index 52c36f1d2..b665bb899 100644 --- a/python/handle.c +++ b/python/handle.c @@ -35,6 +35,8 @@ #include "actions.h" +static const char *decode_error_handler = "strict"; + static PyObject **get_all_event_callbacks (guestfs_h *g, size_t *len_rtn); void @@ -45,6 +47,17 @@ guestfs_int_py_extend_module (PyObject *module) } PyObject * +guestfs_int_py_set_decode_error_handler (PyObject *self, PyObject *args) +{ + const char *previous_handler = decode_error_handler; + + if (!PyArg_ParseTuple (args, (char *) "s:set_decode_error_handler", &decode_error_handler)) + return NULL; + + return guestfs_int_py_fromstring (previous_handler); +} + +PyObject * guestfs_int_py_create (PyObject *self, PyObject *args) { guestfs_h *g; @@ -386,7 +399,8 @@ guestfs_int_py_fromstring (const char *str) #ifdef HAVE_PYSTRING_ASSTRING return PyString_FromString (str); #else - return PyUnicode_FromString (str); + Py_ssize_t size = strlen(str); + return PyUnicode_DecodeUTF8 (str, size, decode_error_handler); #endif } @@ -396,7 +410,7 @@ guestfs_int_py_fromstringsize (const char *str, size_t size) #ifdef HAVE_PYSTRING_ASSTRING return PyString_FromStringAndSize (str, size); #else - return PyUnicode_FromStringAndSize (str, size); + return PyUnicode_DecodeUTF8 (str, size, decode_error_handler); #endif } diff --git a/python/t/test830RHBZ1406906.py b/python/t/test830RHBZ1406906.py index 17b875226..0bb1ac1d0 100644 --- a/python/t/test830RHBZ1406906.py +++ b/python/t/test830RHBZ1406906.py @@ -55,3 +55,9 @@ class Test830RHBZ1406906(unittest.TestCase): elif sys.version_info >= (2, 0): self.assertTrue( any(path for path in g.find("/") if non_utf8_fname in path)) + + # change decoding error handler + self.assertEqual( + guestfs.set_decode_error_handler("surrogateescape"), 'strict') + self.assertTrue( + any(path for path in g.find("/") if non_utf8_fname in path)) -- 2.11.0
Pino Toscano
2017-Jun-16 09:43 UTC
Re: [Libguestfs] [PATCH 2/2] python: unicode decode handler error scheme setter
On Sunday, 21 May 2017 18:29:03 CEST Matteo Cafasso wrote:> The set_decode_error_handler function allows the User to set the > decoding error scheme to be used when non UTF8 characters are > encountered in Python 3.s/User/user/, and s/UTF8/UTF-8/> The function has no effect in Python 2. > > Signed-off-by: Matteo Cafasso <noxdafox@gmail.com> > --- > generator/python.ml | 16 ++++++++++++++++ > python/handle.c | 18 ++++++++++++++++-- > python/t/test830RHBZ1406906.py | 6 ++++++ > 3 files changed, 38 insertions(+), 2 deletions(-) > > diff --git a/generator/python.ml b/generator/python.ml > index f7c1f80bb..66bb7f27d 100644 > --- a/generator/python.ml > +++ b/generator/python.ml > @@ -82,6 +82,7 @@ put_handle (guestfs_h *g) > } > > extern void guestfs_int_py_extend_module (PyObject *module); > +extern PyObject *guestfs_int_py_set_decode_error_handler (PyObject *self, PyObject *args); > > extern PyObject *guestfs_int_py_create (PyObject *self, PyObject *args); > extern PyObject *guestfs_int_py_close (PyObject *self, PyObject *args); > @@ -577,6 +578,8 @@ and generate_python_module () > > (* Table of functions. *) > pr "static PyMethodDef methods[] = {\n"; > + pr " { (char *) \"set_decode_error_handler\", \n"; > + pr " guestfs_int_py_set_decode_error_handler, METH_VARARGS, NULL },\n";This is implemented as global for the whole module, which means changing the behaviour for an handle changes it for all the existing handles (and in a racy behaviour, even). This IMHO should be a per-handle setting.> pr " { (char *) \"create\", guestfs_int_py_create, METH_VARARGS, NULL },\n"; > pr " { (char *) \"close\", guestfs_int_py_close, METH_VARARGS, NULL },\n"; > pr " { (char *) \"set_event_callback\",\n"; > @@ -728,6 +731,19 @@ class ClosedHandle(ValueError): > pass > > > +def set_decode_error_handler(handler):'handler' usually is a function/callback, while in this case is a behaviour/mode, so I'd use a different naming.> + \"\"\"Set the error handling scheme to use for the handling > + of decoding errors. > + The default is 'strict' meaning that decoding errors raise a > + UnicodeDecodeError. > + > + The other possible value is 'surrogateescape', see PEP383 for reference. > + > + Return the previous error handler. > + \"\"\" > + return libguestfsmod.set_decode_error_handler(handler) > + > + > class GuestFS(object): > \"\"\"Instances of this class are libguestfs API handles.\"\"\" > > diff --git a/python/handle.c b/python/handle.c > index 52c36f1d2..b665bb899 100644 > --- a/python/handle.c > +++ b/python/handle.c > @@ -35,6 +35,8 @@ > > #include "actions.h" > > +static const char *decode_error_handler = "strict"; > + > static PyObject **get_all_event_callbacks (guestfs_h *g, size_t *len_rtn); > > void > @@ -45,6 +47,17 @@ guestfs_int_py_extend_module (PyObject *module) > } > > PyObject * > +guestfs_int_py_set_decode_error_handler (PyObject *self, PyObject *args) > +{ > + const char *previous_handler = decode_error_handler; > + > + if (!PyArg_ParseTuple (args, (char *) "s:set_decode_error_handler", &decode_error_handler)) > + return NULL;I really doubt "decode_error_handler" will hold a valid pointer after guestfs_int_py_set_decode_error_handler is done (and the args PyObject is disposed. -- Pino Toscano
Reasonably Related Threads
- [PATCH 0/2] python: improved UTF8 decoding error handling
- [PATCH v2] RHBZ#1406906: check return value of Python object functions
- [PATCH] python: use constants instead of raw values
- [Bug 1406906] [PATCH 3/3] python: add regression test for RHBZ#1406906
- [PATCH] RHBZ#1406906: check return value of Python object functions