Peter Wu
2014-Aug-08 18:39 UTC
[Libguestfs] [PATCH 1/2] Add type checking, support integers as value
Before this patch, Python would segfault once you pass a non-string key or value to node_set_value. It was also not possible to set bytes on Python 3 as Unicode was assumed (Python 2 was not affected by this). This patch fixes recognition of bytes for Python 3, but in addition it recognizes ints (includes 'long' in Python 2) for DWORD (LE + BE) and QWORDs. For this purpose, a new field was added to the py_set_values struct (words) to simplify memory allocation (otherwise, malloc must be tracked somehow). In addition, more specific exception types have been chosen (TypeError instead of RuntimeError). A test is added to avoid future regressions. Note that this test will crash older versions of hivex. All tests pass with Python 2.7.5 + 3.3.2 (Fedora 20 x86_64), 2.7.8 + 3.4.1 (Arch Linux x86_64) and 2.6.5 (Ubuntu 10.04 i686). --- configure.ac | 4 -- generator/generator.ml | 112 ++++++++++++++++++++++++++++++----------- python/t/300-setvalue-types.py | 76 ++++++++++++++++++++++++++++ 3 files changed, 160 insertions(+), 32 deletions(-) create mode 100644 python/t/300-setvalue-types.py diff --git a/configure.ac b/configure.ac index f50daff..4566a32 100644 --- a/configure.ac +++ b/configure.ac @@ -345,10 +345,6 @@ AS_IF([test "x$enable_python" != "xno"], [AC_DEFINE([HAVE_PYCAPSULE_NEW],1, [Found PyCapsule_New in libpython])], [],[$PYTHON_BLDLIBRARY]) - AC_CHECK_LIB([c],[PyString_AsString], - [AC_DEFINE([HAVE_PYSTRING_ASSTRING],1, - [Found PyString_AsString in libpython])], - [],[$PYTHON_BLDLIBRARY]) LIBS="$old_LIBS" fi diff --git a/generator/generator.ml b/generator/generator.ml index bcf1966..0aeb24e 100755 --- a/generator/generator.ml +++ b/generator/generator.ml @@ -2837,45 +2837,90 @@ put_handle (hive_h *h) * not be freed. */ static int -get_value (PyObject *v, hive_set_value *ret) +get_value (PyObject *v, hive_set_value *ret, uint64_t *word) { PyObject *obj; -#ifndef HAVE_PYSTRING_ASSTRING PyObject *bytes; -#endif + + if (!PyDict_Check (v)) { + PyErr_SetString (PyExc_TypeError, \"expected dictionary type for value\"); + return -1; + } obj = PyDict_GetItemString (v, \"key\"); if (!obj) { - PyErr_SetString (PyExc_RuntimeError, \"no 'key' element in dictionary\"); + PyErr_SetString (PyExc_KeyError, \"no 'key' element in dictionary\"); + return -1; + } + if (PyUnicode_Check (obj)) { + bytes = PyUnicode_AsUTF8String (obj); + if (bytes == NULL) { + return -1; + } + } else if (PyBytes_Check (obj)) { + bytes = obj; + } else { + PyErr_SetString (PyExc_TypeError, \"expected bytes or str type for 'key'\"); return -1; } -#ifdef HAVE_PYSTRING_ASSTRING - ret->key = PyString_AsString (obj); -#else - bytes = PyUnicode_AsUTF8String (obj); ret->key = PyBytes_AS_STRING (bytes); -#endif obj = PyDict_GetItemString (v, \"t\"); if (!obj) { - PyErr_SetString (PyExc_RuntimeError, \"no 't' element in dictionary\"); + PyErr_SetString (PyExc_KeyError, \"no 't' element in dictionary\"); return -1; } ret->t = PyLong_AsLong (obj); + if (PyErr_Occurred ()) { + PyErr_SetString (PyExc_TypeError, \"expected int type for 't'\"); + return -1; + } obj = PyDict_GetItemString (v, \"value\"); if (!obj) { - PyErr_SetString (PyExc_RuntimeError, \"no 'value' element in dictionary\"); + PyErr_SetString (PyExc_KeyError, \"no 'value' element in dictionary\"); + return -1; + } + /* Support bytes and unicode strings. For DWORD and QWORD types, long and + * integers are also supported. Caller must ensure sanity of byte buffer + * lengths */ + if (PyUnicode_Check (obj)) { + bytes = PyUnicode_AsUTF8String (obj); + if (bytes == NULL) + return -1; + ret->len = PyBytes_GET_SIZE (bytes); + ret->value = PyBytes_AS_STRING (bytes); + } else if (PyBytes_Check (obj)) { + ret->len = PyBytes_GET_SIZE (obj); + ret->value = PyBytes_AS_STRING (obj); + } else if (ret->t == hive_t_REG_DWORD || + ret->t == hive_t_REG_DWORD_BIG_ENDIAN) { + uint32_t d = PyLong_AsLong (obj); + if (PyErr_Occurred ()) { + PyErr_SetString (PyExc_TypeError, \"expected int type for DWORD value\"); + return -1; + } + + ret->len = sizeof (d); + ret->value = (char *) word; + if (ret->t == hive_t_REG_DWORD) + *(uint32_t *) ret->value = htole32 (d); + else + *(uint32_t *) ret->value = htobe32 (d); + } else if (ret->t == hive_t_REG_QWORD) { + uint64_t l = PyLong_AsLongLong (obj); + if (PyErr_Occurred ()) { + PyErr_SetString (PyExc_TypeError, \"expected int type for QWORD value\"); + return -1; + } + + ret->len = sizeof (l); + ret->value = (char *) word; + *(uint64_t *) ret->value = htole64 (l); + } else { + PyErr_SetString (PyExc_TypeError, \"expected bytes or str type for 'value'\"); return -1; } -#ifdef HAVE_PYSTRING_ASSTRING - ret->value = PyString_AsString (obj); - ret->len = PyString_Size (obj); -#else - bytes = PyUnicode_AsUTF8String (obj); - ret->value = PyBytes_AS_STRING (bytes); - ret->len = PyBytes_GET_SIZE (bytes); -#endif return 0; } @@ -2883,6 +2928,7 @@ get_value (PyObject *v, hive_set_value *ret) typedef struct py_set_values { size_t nr_values; hive_set_value *values; + uint64_t *words; } py_set_values; static int @@ -2905,13 +2951,21 @@ get_values (PyObject *v, py_set_values *ret) ret->nr_values = len; ret->values = malloc (len * sizeof (hive_set_value)); if (!ret->values) { - PyErr_SetString (PyExc_RuntimeError, strerror (errno)); + PyErr_NoMemory (); + return -1; + } + /* if the value is a dword/qword, it will be stored here */ + ret->words = malloc (len * sizeof (uint64_t)); + if (!ret->words) { + free (ret->values); + PyErr_NoMemory (); return -1; } for (i = 0; i < len; ++i) { - if (get_value (PyList_GetItem (v, i), &(ret->values[i])) == -1) { + if (get_value (PyList_GetItem (v, i), &(ret->values[i]), &(ret->words[i])) == -1) { free (ret->values); + free (ret->words); return -1; } } @@ -3070,9 +3124,10 @@ put_val_type (char *val, size_t len, hive_type t) | ASetValues -> pr " py_set_values values;\n"; pr " PyObject *py_values;\n" - | ASetValue -> - pr " hive_set_value val;\n"; - pr " PyObject *py_val;\n" + | ASetValue -> + pr " hive_set_value val;\n"; + pr " PyObject *py_val;\n"; + pr " uint64_t word;" ) (snd style); pr "\n"; @@ -3136,8 +3191,8 @@ put_val_type (char *val, size_t len, hive_type t) | ASetValues -> pr " if (get_values (py_values, &values) == -1)\n"; pr " return NULL;\n" - | ASetValue -> - pr " if (get_value (py_val, &val) == -1)\n"; + | ASetValue -> + pr " if (get_value (py_val, &val, &word) == -1)\n"; pr " return NULL;\n" ) (snd style); @@ -3151,8 +3206,9 @@ put_val_type (char *val, size_t len, hive_type t) | AString _ | AStringNullable _ | AOpenFlags | AUnusedFlags -> () | ASetValues -> - pr " free (values.values);\n" - | ASetValue -> () + pr " free (values.values);\n"; + pr " free (values.words);\n" + | ASetValue -> () ) (snd style); (* Check for errors from C library. *) diff --git a/python/t/300-setvalue-types.py b/python/t/300-setvalue-types.py new file mode 100644 index 0000000..6d72a59 --- /dev/null +++ b/python/t/300-setvalue-types.py @@ -0,0 +1,76 @@ +# Test various possible types for assignment to setvalue +# Copyright (C) 2014 Peter Wu <peter@lekensteyn.nl> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +import hivex +import os +from sys import getrefcount + +srcdir = "." +if "srcdir" in os.environ and os.environ["srcdir"]: + srcdir = os.environ["srcdir"] + +h = hivex.Hivex ("%s/../images/minimal" % srcdir, + write = True) + +REG_SZ = 1 +REG_BINARY = 3 +REG_DWORD = 4 +REG_DWORD_BIG_ENDIAN = 5 +REG_QWORD = 11 + +def set_value (key="test key", t=REG_BINARY, value=b'Val'): + global h + h.node_set_value (h.root (), { + "key": key, + "t": t, + "value": value + }) + +def test_pass (t, value, exp_bytes): + global h + key = "test_key" + set_value (key, t, value) + val = h.node_get_value (h.root (), key) + ret_type, ret_value = h.value_value (val) + assert t == ret_type, \ + "expected type {0}, got {1}".format(t, ret_type) + assert exp_bytes == ret_value, \ + "expected value {0}, got {1}".format(exp_bytes, ret_value) + +def test_exception (exception_type, **kwargs): + try: + set_value (**kwargs) + raise AssertionError("expected {0}".format(exception_type)) + except exception_type: + pass + +# Good weather tests +test_pass (REG_BINARY, b"\x01\x02", b"\x01\x02") +test_pass (REG_SZ, u"Val", b"\x56\x61\x6c") +test_pass (REG_DWORD, 0xabcdef, b'\xef\xcd\xab\x00') +test_pass (REG_DWORD_BIG_ENDIAN, 0xabcdef, b'\x00\xab\xcd\xef') +test_pass (REG_QWORD, 0x0123456789abcdef, + b'\xef\xcd\xab\x89\x67\x45\x23\x01') + +# *WORDs must still accept bytes (the length is not checked) +test_pass (REG_DWORD, b'\xaa\xbb\xcc', b'\xaa\xbb\xcc') + +# Bad weather tests +test_exception (TypeError, key = 1) +test_exception (TypeError, t = "meh") +test_exception (TypeError, t = REG_SZ, value = 1) +test_exception (TypeError, t = REG_DWORD, value = None) -- 2.0.2
Richard W.M. Jones
2014-Aug-12 09:45 UTC
Re: [Libguestfs] [PATCH 1/2] Add type checking, support integers as value
On Fri, Aug 08, 2014 at 08:39:41PM +0200, Peter Wu wrote: [...] In general the problem is this is a mega-patch which fixes and changes lots of different things. So please split it up. More comments below.> diff --git a/generator/generator.ml b/generator/generator.ml > index bcf1966..0aeb24e 100755 > --- a/generator/generator.ml > +++ b/generator/generator.ml > @@ -2837,45 +2837,90 @@ put_handle (hive_h *h) > * not be freed. > */ > static int > -get_value (PyObject *v, hive_set_value *ret) > +get_value (PyObject *v, hive_set_value *ret, uint64_t *word) > { > PyObject *obj; > -#ifndef HAVE_PYSTRING_ASSTRING > PyObject *bytes; > -#endif > + > + if (!PyDict_Check (v)) { > + PyErr_SetString (PyExc_TypeError, \"expected dictionary type for value\"); > + return -1; > + }This on its own is fine, and an example of a single change which should go in a separate patch.> obj = PyDict_GetItemString (v, \"key\"); > if (!obj) { > - PyErr_SetString (PyExc_RuntimeError, \"no 'key' element in dictionary\"); > + PyErr_SetString (PyExc_KeyError, \"no 'key' element in dictionary\");This is an unrelated change. I don't really know if it's a good or bad change, but it needs to be reviewed in a separate patch, along with other identical changes to the exception type below.> + return -1; > + } > + if (PyUnicode_Check (obj)) { > + bytes = PyUnicode_AsUTF8String (obj); > + if (bytes == NULL) { > + return -1;Don't we need to set PyErr_SetString here?> + } > + } else if (PyBytes_Check (obj)) { > + bytes = obj; > + } else { > + PyErr_SetString (PyExc_TypeError, \"expected bytes or str type for 'key'\"); > return -1; > } > -#ifdef HAVE_PYSTRING_ASSTRING > - ret->key = PyString_AsString (obj); > -#else > - bytes = PyUnicode_AsUTF8String (obj); > ret->key = PyBytes_AS_STRING (bytes); > -#endif > > obj = PyDict_GetItemString (v, \"t\"); > if (!obj) { > - PyErr_SetString (PyExc_RuntimeError, \"no 't' element in dictionary\"); > + PyErr_SetString (PyExc_KeyError, \"no 't' element in dictionary\"); > return -1; > } > ret->t = PyLong_AsLong (obj); > + if (PyErr_Occurred ()) { > + PyErr_SetString (PyExc_TypeError, \"expected int type for 't'\"); > + return -1; > + } > > obj = PyDict_GetItemString (v, \"value\"); > if (!obj) { > - PyErr_SetString (PyExc_RuntimeError, \"no 'value' element in dictionary\"); > + PyErr_SetString (PyExc_KeyError, \"no 'value' element in dictionary\"); > + return -1; > + } > + /* Support bytes and unicode strings. For DWORD and QWORD types, long and > + * integers are also supported. Caller must ensure sanity of byte buffer > + * lengths */ > + if (PyUnicode_Check (obj)) { > + bytes = PyUnicode_AsUTF8String (obj); > + if (bytes == NULL) > + return -1; > + ret->len = PyBytes_GET_SIZE (bytes); > + ret->value = PyBytes_AS_STRING (bytes); > + } else if (PyBytes_Check (obj)) { > + ret->len = PyBytes_GET_SIZE (obj); > + ret->value = PyBytes_AS_STRING (obj); > + } else if (ret->t == hive_t_REG_DWORD || > + ret->t == hive_t_REG_DWORD_BIG_ENDIAN) { > + uint32_t d = PyLong_AsLong (obj); > + if (PyErr_Occurred ()) { > + PyErr_SetString (PyExc_TypeError, \"expected int type for DWORD value\"); > + return -1; > + }This is where things start to go wrong. There's no way you can trust the type field (even though it comes from the caller, not the registry in this case). The type field is only a hint, it doesn't reflect the actual type stored. TBH I'm not sure why you're using PyLong_AsLong{,Long} here at all.> + ret->len = sizeof (d); > + ret->value = (char *) word; > + if (ret->t == hive_t_REG_DWORD) > + *(uint32_t *) ret->value = htole32 (d); > + else > + *(uint32_t *) ret->value = htobe32 (d); > + } else if (ret->t == hive_t_REG_QWORD) { > + uint64_t l = PyLong_AsLongLong (obj); > + if (PyErr_Occurred ()) { > + PyErr_SetString (PyExc_TypeError, \"expected int type for QWORD value\"); > + return -1; > + } > + > + ret->len = sizeof (l); > + ret->value = (char *) word; > + *(uint64_t *) ret->value = htole64 (l); > + } else { > + PyErr_SetString (PyExc_TypeError, \"expected bytes or str type for 'value'\"); > return -1; > } > -#ifdef HAVE_PYSTRING_ASSTRING > - ret->value = PyString_AsString (obj); > - ret->len = PyString_Size (obj); > -#else > - bytes = PyUnicode_AsUTF8String (obj); > - ret->value = PyBytes_AS_STRING (bytes); > - ret->len = PyBytes_GET_SIZE (bytes); > -#endif > > return 0; > } > @@ -2883,6 +2928,7 @@ get_value (PyObject *v, hive_set_value *ret) > typedef struct py_set_values { > size_t nr_values; > hive_set_value *values; > + uint64_t *words; > } py_set_values; > > static int > @@ -2905,13 +2951,21 @@ get_values (PyObject *v, py_set_values *ret) > ret->nr_values = len; > ret->values = malloc (len * sizeof (hive_set_value)); > if (!ret->values) { > - PyErr_SetString (PyExc_RuntimeError, strerror (errno)); > + PyErr_NoMemory (); > + return -1; > + } > + /* if the value is a dword/qword, it will be stored here */ > + ret->words = malloc (len * sizeof (uint64_t)); > + if (!ret->words) { > + free (ret->values); > + PyErr_NoMemory (); > return -1; > } > > for (i = 0; i < len; ++i) { > - if (get_value (PyList_GetItem (v, i), &(ret->values[i])) == -1) { > + if (get_value (PyList_GetItem (v, i), &(ret->values[i]), &(ret->words[i])) == -1) { > free (ret->values); > + free (ret->words); > return -1; > } > } > @@ -3070,9 +3124,10 @@ put_val_type (char *val, size_t len, hive_type t) > | ASetValues -> > pr " py_set_values values;\n"; > pr " PyObject *py_values;\n" > - | ASetValue -> > - pr " hive_set_value val;\n"; > - pr " PyObject *py_val;\n" > + | ASetValue -> > + pr " hive_set_value val;\n"; > + pr " PyObject *py_val;\n"; > + pr " uint64_t word;" > ) (snd style); > > pr "\n"; > @@ -3136,8 +3191,8 @@ put_val_type (char *val, size_t len, hive_type t) > | ASetValues -> > pr " if (get_values (py_values, &values) == -1)\n"; > pr " return NULL;\n" > - | ASetValue -> > - pr " if (get_value (py_val, &val) == -1)\n"; > + | ASetValue -> > + pr " if (get_value (py_val, &val, &word) == -1)\n"; > pr " return NULL;\n" > ) (snd style); > > @@ -3151,8 +3206,9 @@ put_val_type (char *val, size_t len, hive_type t) > | AString _ | AStringNullable _ > | AOpenFlags | AUnusedFlags -> () > | ASetValues -> > - pr " free (values.values);\n" > - | ASetValue -> () > + pr " free (values.values);\n"; > + pr " free (values.words);\n" > + | ASetValue -> () > ) (snd style); > > (* Check for errors from C library. *) > diff --git a/python/t/300-setvalue-types.py b/python/t/300-setvalue-types.py > new file mode 100644 > index 0000000..6d72a59 > --- /dev/null > +++ b/python/t/300-setvalue-types.py > @@ -0,0 +1,76 @@ > +# Test various possible types for assignment to setvalue > +# Copyright (C) 2014 Peter Wu <peter@lekensteyn.nl> > +# > +# This program is free software; you can redistribute it and/or modify > +# it under the terms of the GNU General Public License as published by > +# the Free Software Foundation; either version 2 of the License, or > +# (at your option) any later version. > +# > +# This program is distributed in the hope that it will be useful, > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > +# GNU General Public License for more details. > +# > +# You should have received a copy of the GNU General Public License > +# along with this program; if not, write to the Free Software > +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. > + > +import hivex > +import os > +from sys import getrefcount > + > +srcdir = "." > +if "srcdir" in os.environ and os.environ["srcdir"]: > + srcdir = os.environ["srcdir"] > + > +h = hivex.Hivex ("%s/../images/minimal" % srcdir, > + write = True) > + > +REG_SZ = 1 > +REG_BINARY = 3 > +REG_DWORD = 4 > +REG_DWORD_BIG_ENDIAN = 5 > +REG_QWORD = 11 > + > +def set_value (key="test key", t=REG_BINARY, value=b'Val'): > + global h > + h.node_set_value (h.root (), { > + "key": key, > + "t": t, > + "value": value > + }) > + > +def test_pass (t, value, exp_bytes): > + global h > + key = "test_key" > + set_value (key, t, value) > + val = h.node_get_value (h.root (), key) > + ret_type, ret_value = h.value_value (val) > + assert t == ret_type, \ > + "expected type {0}, got {1}".format(t, ret_type) > + assert exp_bytes == ret_value, \ > + "expected value {0}, got {1}".format(exp_bytes, ret_value) > + > +def test_exception (exception_type, **kwargs): > + try: > + set_value (**kwargs) > + raise AssertionError("expected {0}".format(exception_type)) > + except exception_type: > + pass > + > +# Good weather tests > +test_pass (REG_BINARY, b"\x01\x02", b"\x01\x02") > +test_pass (REG_SZ, u"Val", b"\x56\x61\x6c") > +test_pass (REG_DWORD, 0xabcdef, b'\xef\xcd\xab\x00') > +test_pass (REG_DWORD_BIG_ENDIAN, 0xabcdef, b'\x00\xab\xcd\xef') > +test_pass (REG_QWORD, 0x0123456789abcdef, > + b'\xef\xcd\xab\x89\x67\x45\x23\x01') > + > +# *WORDs must still accept bytes (the length is not checked) > +test_pass (REG_DWORD, b'\xaa\xbb\xcc', b'\xaa\xbb\xcc') > + > +# Bad weather tests > +test_exception (TypeError, key = 1) > +test_exception (TypeError, t = "meh") > +test_exception (TypeError, t = REG_SZ, value = 1) > +test_exception (TypeError, t = REG_DWORD, value = None)Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com libguestfs lets you edit virtual machines. Supports shell scripting, bindings from many languages. http://libguestfs.org
Peter Wu
2014-Aug-12 10:39 UTC
Re: [Libguestfs] [PATCH 1/2] Add type checking, support integers as value
On Tuesday 12 August 2014 10:45:07 Richard W.M. Jones wrote: [..]> > diff --git a/generator/generator.ml b/generator/generator.ml > > index bcf1966..0aeb24e 100755 > > --- a/generator/generator.ml > > +++ b/generator/generator.ml > > @@ -2837,45 +2837,90 @@ put_handle (hive_h *h) > > * not be freed. > > */ > > static int > > -get_value (PyObject *v, hive_set_value *ret) > > +get_value (PyObject *v, hive_set_value *ret, uint64_t *word) > > { > > PyObject *obj; > > -#ifndef HAVE_PYSTRING_ASSTRING > > PyObject *bytes; > > -#endif > > + > > + if (!PyDict_Check (v)) { > > + PyErr_SetString (PyExc_TypeError, \"expected dictionary type for value\"); > > + return -1; > > + } > > This on its own is fine, and an example of a single change which > should go in a separate patch.OK, one patch for type checking of the key and type.> > obj = PyDict_GetItemString (v, \"key\"); > > if (!obj) { > > - PyErr_SetString (PyExc_RuntimeError, \"no 'key' element in dictionary\"); > > + PyErr_SetString (PyExc_KeyError, \"no 'key' element in dictionary\"); > > This is an unrelated change. I don't really know if it's a good or > bad change, but it needs to be reviewed in a separate patch, along > with other identical changes to the exception type below.OK, another patch for changing types. RuntimeError is too generic for my taste, this better captures the actual meaning of the error.> > + return -1; > > + } > > + if (PyUnicode_Check (obj)) { > > + bytes = PyUnicode_AsUTF8String (obj); > > + if (bytes == NULL) { > > + return -1; > > Don't we need to set PyErr_SetString here?PyUnicode_AsUTF8String should set the error code when it fails. I have not triggered that case yet, but maybe it is better to be more explicit and mention the source of the error.> > + } > > + } else if (PyBytes_Check (obj)) { > > + bytes = obj; > > + } else { > > + PyErr_SetString (PyExc_TypeError, \"expected bytes or str type for 'key'\"); > > return -1; > > } > > -#ifdef HAVE_PYSTRING_ASSTRING > > - ret->key = PyString_AsString (obj); > > -#else > > - bytes = PyUnicode_AsUTF8String (obj); > > ret->key = PyBytes_AS_STRING (bytes); > > -#endif > > > > obj = PyDict_GetItemString (v, \"t\"); > > if (!obj) { > > - PyErr_SetString (PyExc_RuntimeError, \"no 't' element in dictionary\"); > > + PyErr_SetString (PyExc_KeyError, \"no 't' element in dictionary\"); > > return -1; > > } > > ret->t = PyLong_AsLong (obj); > > + if (PyErr_Occurred ()) { > > + PyErr_SetString (PyExc_TypeError, \"expected int type for 't'\"); > > + return -1; > > + } > > > > obj = PyDict_GetItemString (v, \"value\"); > > if (!obj) { > > - PyErr_SetString (PyExc_RuntimeError, \"no 'value' element in dictionary\"); > > + PyErr_SetString (PyExc_KeyError, \"no 'value' element in dictionary\"); > > + return -1; > > + } > > + /* Support bytes and unicode strings. For DWORD and QWORD types, long and > > + * integers are also supported. Caller must ensure sanity of byte buffer > > + * lengths */ > > + if (PyUnicode_Check (obj)) { > > + bytes = PyUnicode_AsUTF8String (obj); > > + if (bytes == NULL) > > + return -1; > > + ret->len = PyBytes_GET_SIZE (bytes); > > + ret->value = PyBytes_AS_STRING (bytes);I think we should outright reject Unicode strings. (u"" in Python 2 + 3, "" in Python 3.) The previous Python 3 code and the one in this patch tries to be too smart and assumes that the value can be a non-NUL terminated UTF-8 string.> > + } else if (PyBytes_Check (obj)) { > > + ret->len = PyBytes_GET_SIZE (obj); > > + ret->value = PyBytes_AS_STRING (obj); > > + } else if (ret->t == hive_t_REG_DWORD || > > + ret->t == hive_t_REG_DWORD_BIG_ENDIAN) { > > + uint32_t d = PyLong_AsLong (obj); > > + if (PyErr_Occurred ()) { > > + PyErr_SetString (PyExc_TypeError, \"expected int type for DWORD value\"); > > + return -1; > > + } > > This is where things start to go wrong. There's no way you can trust > the type field (even though it comes from the caller, not the registry > in this case). The type field is only a hint, it doesn't reflect the > actual type stored.Indeed, type is a hint, but if you really insist in pushing a QWORD in a DWORD, you can still use the byte type. In C, you cannot detect the type from a pointer, but this is Python where it is possible so this is why I added it for convenience. To make this API better follow the C API, what about dropping support for everything but bytes? If conversion from UTF-8 to nul-terminated UTF-16 is wanted, or conversion from an int to a DWORD, a second API could be created instead (that just uses the thin C <-> Python bindings).> TBH I'm not sure why you're using PyLong_AsLong{,Long} here at all.To convert a PyObject to a long.> > + ret->len = sizeof (d); > > + ret->value = (char *) word; > > + if (ret->t == hive_t_REG_DWORD) > > + *(uint32_t *) ret->value = htole32 (d); > > + else > > + *(uint32_t *) ret->value = htobe32 (d); > > + } else if (ret->t == hive_t_REG_QWORD) { > > + uint64_t l = PyLong_AsLongLong (obj); > > + if (PyErr_Occurred ()) { > > + PyErr_SetString (PyExc_TypeError, \"expected int type for QWORD value\"); > > + return -1; > > + } > > + > > + ret->len = sizeof (l); > > + ret->value = (char *) word; > > + *(uint64_t *) ret->value = htole64 (l); > > + } else { > > + PyErr_SetString (PyExc_TypeError, \"expected bytes or str type for 'value'\"); > > return -1; > > } > > -#ifdef HAVE_PYSTRING_ASSTRING > > - ret->value = PyString_AsString (obj); > > - ret->len = PyString_Size (obj); > > -#else > > - bytes = PyUnicode_AsUTF8String (obj); > > - ret->value = PyBytes_AS_STRING (bytes); > > - ret->len = PyBytes_GET_SIZE (bytes); > > -#endif > > > > return 0; > > } > > @@ -2883,6 +2928,7 @@ get_value (PyObject *v, hive_set_value *ret) > > typedef struct py_set_values { > > size_t nr_values; > > hive_set_value *values; > > + uint64_t *words; > > } py_set_values; > > > > static int > > @@ -2905,13 +2951,21 @@ get_values (PyObject *v, py_set_values *ret) > > ret->nr_values = len; > > ret->values = malloc (len * sizeof (hive_set_value)); > > if (!ret->values) { > > - PyErr_SetString (PyExc_RuntimeError, strerror (errno)); > > + PyErr_NoMemory (); > > + return -1; > > + } > > + /* if the value is a dword/qword, it will be stored here */ > > + ret->words = malloc (len * sizeof (uint64_t)); > > + if (!ret->words) { > > + free (ret->values); > > + PyErr_NoMemory (); > > return -1; > > } > > > > for (i = 0; i < len; ++i) { > > - if (get_value (PyList_GetItem (v, i), &(ret->values[i])) == -1) { > > + if (get_value (PyList_GetItem (v, i), &(ret->values[i]), &(ret->words[i])) == -1) { > > free (ret->values); > > + free (ret->words); > > return -1; > > } > > } > > @@ -3070,9 +3124,10 @@ put_val_type (char *val, size_t len, hive_type t) > > | ASetValues -> > > pr " py_set_values values;\n"; > > pr " PyObject *py_values;\n" > > - | ASetValue -> > > - pr " hive_set_value val;\n"; > > - pr " PyObject *py_val;\n" > > + | ASetValue -> > > + pr " hive_set_value val;\n"; > > + pr " PyObject *py_val;\n"; > > + pr " uint64_t word;" > > ) (snd style); > > > > pr "\n"; > > @@ -3136,8 +3191,8 @@ put_val_type (char *val, size_t len, hive_type t) > > | ASetValues -> > > pr " if (get_values (py_values, &values) == -1)\n"; > > pr " return NULL;\n" > > - | ASetValue -> > > - pr " if (get_value (py_val, &val) == -1)\n"; > > + | ASetValue -> > > + pr " if (get_value (py_val, &val, &word) == -1)\n"; > > pr " return NULL;\n" > > ) (snd style); > > > > @@ -3151,8 +3206,9 @@ put_val_type (char *val, size_t len, hive_type t) > > | AString _ | AStringNullable _ > > | AOpenFlags | AUnusedFlags -> () > > | ASetValues -> > > - pr " free (values.values);\n" > > - | ASetValue -> () > > + pr " free (values.values);\n"; > > + pr " free (values.words);\n" > > + | ASetValue -> () > > ) (snd style); > > > > (* Check for errors from C library. *) > > diff --git a/python/t/300-setvalue-types.py b/python/t/300-setvalue-types.py > > new file mode 100644 > > index 0000000..6d72a59 > > --- /dev/null > > +++ b/python/t/300-setvalue-types.py > > @@ -0,0 +1,76 @@ > > +# Test various possible types for assignment to setvalue > > +# Copyright (C) 2014 Peter Wu <peter@lekensteyn.nl> > > +# > > +# This program is free software; you can redistribute it and/or modify > > +# it under the terms of the GNU General Public License as published by > > +# the Free Software Foundation; either version 2 of the License, or > > +# (at your option) any later version. > > +# > > +# This program is distributed in the hope that it will be useful, > > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > +# GNU General Public License for more details. > > +# > > +# You should have received a copy of the GNU General Public License > > +# along with this program; if not, write to the Free Software > > +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. > > + > > +import hivex > > +import os > > +from sys import getrefcount > > + > > +srcdir = "." > > +if "srcdir" in os.environ and os.environ["srcdir"]: > > + srcdir = os.environ["srcdir"] > > + > > +h = hivex.Hivex ("%s/../images/minimal" % srcdir, > > + write = True) > > + > > +REG_SZ = 1 > > +REG_BINARY = 3 > > +REG_DWORD = 4 > > +REG_DWORD_BIG_ENDIAN = 5 > > +REG_QWORD = 11 > > + > > +def set_value (key="test key", t=REG_BINARY, value=b'Val'): > > + global h > > + h.node_set_value (h.root (), { > > + "key": key, > > + "t": t, > > + "value": value > > + }) > > + > > +def test_pass (t, value, exp_bytes): > > + global h > > + key = "test_key" > > + set_value (key, t, value) > > + val = h.node_get_value (h.root (), key) > > + ret_type, ret_value = h.value_value (val) > > + assert t == ret_type, \ > > + "expected type {0}, got {1}".format(t, ret_type) > > + assert exp_bytes == ret_value, \ > > + "expected value {0}, got {1}".format(exp_bytes, ret_value) > > + > > +def test_exception (exception_type, **kwargs): > > + try: > > + set_value (**kwargs) > > + raise AssertionError("expected {0}".format(exception_type)) > > + except exception_type: > > + pass > > + > > +# Good weather tests > > +test_pass (REG_BINARY, b"\x01\x02", b"\x01\x02") > > +test_pass (REG_SZ, u"Val", b"\x56\x61\x6c") > > +test_pass (REG_DWORD, 0xabcdef, b'\xef\xcd\xab\x00') > > +test_pass (REG_DWORD_BIG_ENDIAN, 0xabcdef, b'\x00\xab\xcd\xef') > > +test_pass (REG_QWORD, 0x0123456789abcdef, > > + b'\xef\xcd\xab\x89\x67\x45\x23\x01') > > + > > +# *WORDs must still accept bytes (the length is not checked) > > +test_pass (REG_DWORD, b'\xaa\xbb\xcc', b'\xaa\xbb\xcc') > > + > > +# Bad weather tests > > +test_exception (TypeError, key = 1) > > +test_exception (TypeError, t = "meh") > > +test_exception (TypeError, t = REG_SZ, value = 1) > > +test_exception (TypeError, t = REG_DWORD, value = None)I will come later with follow-up patches, thanks for your review! -- Kind regards, Peter https://lekensteyn.nl