Hilko Bengen
2014-Jan-08 00:26 UTC
[Libguestfs] hivex: Make node names and value names with embedded null characters accessible
On Windows, there exist at least two APIs for dealing with the Registry: The Win32 API (RegCreateKeyA, RegCreateKeyW, etc.) works with null-terminated ASCII or UTF-16 strings. The native API (ZwCreateKey, etc.), on the other hand works with UTF-16 strings that are stored as buffers+length and may contain null characters. Malware authors have been relying on the Win32 API's inability to properly work with such names for several years. These changes make such names accessible from hivex.
Hilko Bengen
2014-Jan-08 00:26 UTC
[Libguestfs] [PATCH 1/3] lib: Add internal function to calculate strlen for strings encoded in Latin1 or UTF-16LE
--- lib/hivex-internal.h | 1 + lib/utf16.c | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/lib/hivex-internal.h b/lib/hivex-internal.h index 6bc8638..7f4cc3c 100644 --- a/lib/hivex-internal.h +++ b/lib/hivex-internal.h @@ -277,6 +277,7 @@ extern char * _hivex_recode (const char *input_encoding, _hivex_recode ("LATIN1", _input, _len, "UTF-8", NULL) extern char* _hivex_encode_string(const char *str, size_t *size, int *utf16); extern size_t _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len); +extern size_t _hivex_utf8_strlen (const char* str, size_t len, int utf16); /* util.c */ extern void _hivex_free_strings (char **argv); diff --git a/lib/utf16.c b/lib/utf16.c index 3641580..7dde9e5 100644 --- a/lib/utf16.c +++ b/lib/utf16.c @@ -123,3 +123,13 @@ _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len) return ret; } + +size_t +_hivex_utf8_strlen (const char* str, size_t len, int utf16) +{ + char* encoding = utf16 ? "UTF-16LE" : "LATIN1"; + size_t ret; + char* buf = _hivex_recode(encoding, str, len, "UTF-8", &ret); + free(buf); + return ret; +} -- 1.8.5.2
Hilko Bengen
2014-Jan-08 00:26 UTC
[Libguestfs] [PATCH 2/3] lib: Mind character encoding in value_key_len, add matching node_name_len function
Since iconv() does not treat null characters as special, this makes it possible to read node names and value keys with embedded null characters. which are not displayed at all by Windows Regedit. --- generator/generator.ml | 19 ++++++++++++++----- lib/node.c | 24 ++++++++++++++++++++++++ lib/value.c | 9 +++++---- 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/generator/generator.ml b/generator/generator.ml index 54e5f0f..1c62129 100755 --- a/generator/generator.ml +++ b/generator/generator.ml @@ -179,7 +179,14 @@ C<$$$PROTO.HIV> (other names are possible: it seems to depend on the tool or program that created the hive in the first place). You can only know the \"real\" name of the root node by knowing which registry file this hive originally comes from, which is knowledge that is -outside the scope of this library."; +outside the scope of this library. + +The name is recoded to UTF-8 and may contain embedded NUL characters."; + + "node_name_len", (RSize, [AHive; ANode "node"]), + "return the length of a node's name", + "\ +Return the length of the node name as produced by C<hivex_node_name>."; "node_timestamp", (RInt64, [AHive; ANode "node"]), "return the modification time of the node", @@ -233,13 +240,15 @@ default key."; "value_key_len", (RSize, [AHive; AValue "val"]), "return the length of a value's key", "\ -Return the length of the key (name) of a (key, value) pair. The -length can legitimately be 0, so errno is the necesary mechanism -to check for errors. +Return the length of the key (name) of a (key, value) pair as produced +by C<hivex_value_key>. The length can legitimately be 0, so errno is +the necesary mechanism to check for errors. In the context of Windows Registries, a zero-length name means that this value is the default key for this node in the tree. -This is usually written as C<\"@\">."; +This is usually written as C<\"@\">. + +The key is recoded to UTF-8 and may contain embedded NUL characters."; "value_key", (RString, [AHive; AValue "val"]), "return the key of a (key, value) pair", diff --git a/lib/node.c b/lib/node.c index fcd7442..22d1861 100644 --- a/lib/node.c +++ b/lib/node.c @@ -96,6 +96,30 @@ hivex_node_name (hive_h *h, hive_node_h node) } } +size_t +hivex_node_name_len (hive_h *h, hive_node_h node) +{ + if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) { + SET_ERRNO (EINVAL, "invalid block or not an 'nk' block"); + return 0; + } + struct ntreg_nk_record *nk + (struct ntreg_nk_record *) ((char *) h->addr + node); + + /* nk->name_len is unsigned, 16 bit, so this is safe ... However + * we have to make sure the length doesn't exceed the block length. + */ + size_t len = le16toh (nk->name_len); + size_t seg_len = block_len (h, node, NULL); + if (sizeof (struct ntreg_nk_record) + len - 1 > seg_len) { + SET_ERRNO (EFAULT, "node name is too long (%zu, %zu)", len, seg_len); + return 0; + } + + return _hivex_utf8_strlen (nk->name, len, ! (le16toh (nk->flags) & 0x20)); +} + + static int64_t timestamp_check (hive_h *h, hive_node_h node, int64_t timestamp) { diff --git a/lib/value.c b/lib/value.c index 3460a8c..65404d7 100644 --- a/lib/value.c +++ b/lib/value.c @@ -186,13 +186,14 @@ hivex_value_key_len (hive_h *h, hive_value_h value) /* vk->name_len is unsigned, 16 bit, so this is safe ... However * we have to make sure the length doesn't exceed the block length. */ - size_t ret = le16toh (vk->name_len); + size_t len = le16toh (vk->name_len); + size_t seg_len = block_len (h, value, NULL); - if (sizeof (struct ntreg_vk_record) + ret - 1 > seg_len) { - SET_ERRNO (EFAULT, "key length is too long (%zu, %zu)", ret, seg_len); + if (sizeof (struct ntreg_vk_record) + len - 1 > seg_len) { + SET_ERRNO (EFAULT, "key length is too long (%zu, %zu)", len, seg_len); return 0; } - return ret; + return _hivex_utf8_strlen (vk->name, len, ! (le16toh (vk->flags) & 0x01)); } char * -- 1.8.5.2
Hilko Bengen
2014-Jan-08 00:26 UTC
[Libguestfs] [PATCH 3/3] generator: use node_name_len, value_key_len API in bindings
--- generator/generator.ml | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/generator/generator.ml b/generator/generator.ml index 1c62129..6b1eef7 100755 --- a/generator/generator.ml +++ b/generator/generator.ml @@ -380,6 +380,11 @@ new key is added. Key matching is case insensitive. C<node> is the node to modify."; ] +let f_len_exists n + List.exists + (function (cand, _, _, _) -> cand = (String.concat "" [n; "_len"])) + functions + (* Useful functions. * Note we don't want to use any external OCaml libraries which * makes this a bit harder than it should be. @@ -1839,9 +1844,7 @@ static void raise_closed (const char *) Noreturn; * call, so don't do it. XXX *) (*pr " caml_enter_blocking_section ();\n";*) - pr " r = hivex_%s (%s" name (List.hd c_params); - List.iter (pr ", %s") (List.tl c_params); - pr ");\n"; + pr " r = hivex_%s (%s);\n" name (String.concat ", " c_params); (*pr " caml_leave_blocking_section ();\n";*) pr "\n"; @@ -1890,7 +1893,13 @@ static void raise_closed (const char *) Noreturn; pr " rv = copy_int_array (r);\n"; pr " free (r);\n" | RString -> - pr " rv = caml_copy_string (r);\n"; + if f_len_exists name then ( + pr " size_t sz;\n sz = hivex_%s_len (%s);\n" + name (String.concat ", " c_params); + pr " rv = caml_alloc_string (sz);\n"; + pr " memcpy (String_val (rv), r, sz);\n" + ) else + pr " rv = caml_copy_string (r);\n"; pr " free (r);\n" | RStringList -> pr " rv = caml_copy_string_array ((const char **) r);\n"; @@ -2638,7 +2647,11 @@ DESTROY (h) pr " if (r == NULL)\n"; pr " croak (\"%%s: %%s\", \"%s\", strerror (errno));\n" name; - pr " RETVAL = newSVpv (r, 0);\n"; + if f_len_exists name then + pr " RETVAL = newSVpvn (r, hivex_%s_len (%s));\n" + name (String.concat ", " c_params) + else + pr " RETVAL = newSVpv (r, 0);\n"; pr " free (r);\n"; pr " OUTPUT:\n"; pr " RETVAL\n" @@ -3178,10 +3191,19 @@ put_val_type (char *val, size_t len, hive_type t) | RValue -> pr " py_r = PyLong_FromLongLong (r);\n" | RString -> + if f_len_exists name then + pr " size_t sz = hivex_%s_len (%s);\n" + name (String.concat ", " c_params); pr "#ifdef HAVE_PYSTRING_ASSTRING\n"; - pr " py_r = PyString_FromString (r);\n"; + if f_len_exists name then + pr " py_r = PyString_FromStringAndSize (r, sz);\n" + else + pr " py_r = PyString_FromString (r);\n"; pr "#else\n"; - pr " py_r = PyUnicode_FromString (r);\n"; + if f_len_exists name then + pr " py_r = PyUnicode_FromStringAndSize (r, sz);\n" + else + pr " py_r = PyUnicode_FromString (r);\n"; pr "#endif\n"; pr " free (r);" | RStringList -> @@ -3633,7 +3655,11 @@ get_values (VALUE valuesv, size_t *nr_values) pr " free (r);\n"; pr " return rv;\n" | RString -> - pr " VALUE rv = rb_str_new2 (r);\n"; + if f_len_exists name then ( + pr " size_t sz = hivex_%s_len (%s);\n" name (String.concat ", " c_params); + pr " VALUE rv = rb_str_new (r, sz);\n" + ) else + pr " VALUE rv = rb_str_new2 (r);\n"; pr " free (r);\n"; pr " return rv;\n" | RStringList -> -- 1.8.5.2
Richard W.M. Jones
2014-Jan-08 11:58 UTC
Re: [Libguestfs] hivex: Make node names and value names with embedded null characters accessible
On Wed, Jan 08, 2014 at 01:26:23AM +0100, Hilko Bengen wrote:> On Windows, there exist at least two APIs for dealing with the > Registry: The Win32 API (RegCreateKeyA, RegCreateKeyW, etc.) works > with null-terminated ASCII or UTF-16 strings. The native API > (ZwCreateKey, etc.), on the other hand works with UTF-16 strings that > are stored as buffers+length and may contain null characters. Malware > authors have been relying on the Win32 API's inability to properly > work with such names for several years. > > These changes make such names accessible from hivex.ACK to all 3 patches. It be nice to have some sort of test coverage of these. Thanks, Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-df lists disk usage of guests without needing to install any software inside the virtual machine. Supports Linux and Windows. http://people.redhat.com/~rjones/virt-df/
Hilko Bengen
2014-Jan-10 00:14 UTC
[Libguestfs] [PATCH] Add a minimal hive with "special" keys and values
--- images/README | 15 +++++++++++++++ images/mkzero/Makefile | 7 +++++++ images/mkzero/mkzero.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ images/special | Bin 0 -> 8192 bytes 4 files changed, 70 insertions(+) create mode 100644 images/mkzero/Makefile create mode 100644 images/mkzero/mkzero.c create mode 100644 images/special diff --git a/images/README b/images/README index 2131885..34c65f3 100644 --- a/images/README +++ b/images/README @@ -11,3 +11,18 @@ hand-crafted binary blob. tests. - Richard W.M. Jones 2010-02-24. + +'special' was created by importing 'minimal' into a VM running Windows +XP and loading it into HKEY_LOCAL_MACHINE\minimal using regedit.exe +(File/Load Hive...) + +- A subkey 'asdf_äöüß' was created in the root node + - An empty REG_STRING value 'asdf_äöüß' was created within that node. +- A subkey 'weird™' was created in the root node. + - An empty REG_STRING value 'symbols $£₤₧€' (SMALL DOLLAR SIGN, + FULLWIDTH POUND SIGN, PESETA SIGN, EURO SIGN) was created within + that node. +- A subkey 'zero\0key' with an REG_DWORD value 'zero\0val' + was created using the 'mkzero/mkzero.c'. (\0 = zero character) + +- Hilko Bengen 2014-01-10. diff --git a/images/mkzero/Makefile b/images/mkzero/Makefile new file mode 100644 index 0000000..4d33ade --- /dev/null +++ b/images/mkzero/Makefile @@ -0,0 +1,7 @@ +CROSS=i686-w64-mingw32- + +all: mkzero.exe +mkzero.exe: mkzero.o + $(CROSS)gcc -o $@ $< -lntdll +%.o: %.c + $(CROSS)gcc -o $@ -c $< diff --git a/images/mkzero/mkzero.c b/images/mkzero/mkzero.c new file mode 100644 index 0000000..1b50b22 --- /dev/null +++ b/images/mkzero/mkzero.c @@ -0,0 +1,48 @@ +/* use the NT native API to create registry key and value that contain + a zero character */ + +#include <ntdef.h> +#include <stdio.h> +#include <ddk/wdm.h> +#include <windef.h> + +int main (int argc, char **argv) +{ + NTSTATUS rc; + + UNICODE_STRING root_key_name; + RtlInitUnicodeString(&root_key_name, L"\\Registry\\Machine\\minimal"); + OBJECT_ATTRIBUTES root_key_obj; + InitializeObjectAttributes (&root_key_obj, &root_key_name, + OBJ_OPENIF | OBJ_CASE_INSENSITIVE, + NULL, NULL); + HANDLE minimal_key_handle; + rc = ZwCreateKey (&minimal_key_handle, KEY_ALL_ACCESS, &root_key_obj, + 0, NULL, REG_OPTION_NON_VOLATILE, NULL); + if (!NT_SUCCESS (rc)) { + printf("error: CreateKey <HKLM\\minimal>: 0x%08x\n", rc); + exit(1); + } + + UNICODE_STRING key_name = {16, 16, L"zero\0key"}; + OBJECT_ATTRIBUTES key_obj; + InitializeObjectAttributes (&key_obj, &key_name, + OBJ_OPENIF | OBJ_CASE_INSENSITIVE, + minimal_key_handle, NULL); + HANDLE key_handle; + rc = ZwCreateKey (&key_handle, KEY_ALL_ACCESS, &key_obj, + 0, NULL, REG_OPTION_NON_VOLATILE, NULL); + if (!NT_SUCCESS (rc)) { + printf("error: CreateKey: 0x%08x\n", rc); + exit(1); + } + + UNICODE_STRING value_name = {16, 16, L"zero\0val"}; + DWORD value = 0; + rc = ZwSetValueKey (key_handle, &value_name, 0, + REG_DWORD, &value, sizeof(value)); + if (!NT_SUCCESS (rc)) { + printf("error: SetValueKey: 0x%08x\n", rc); + exit(1); + } +} diff --git a/images/special b/images/special new file mode 100644 index 0000000000000000000000000000000000000000..8aa4f2254af7b52f0a79061a7288c2128eee7b63 GIT binary patch literal 8192 zcmeHLziU%b6h28COhb#$Ul4=fYx@MLKL!^^6)M(2Dk4tBk~GnVmnM=(q~Z`n)IUHs zhkyk;I61kvcM)}{1Q$C94muRkP^1{Y@7_GxmnJ$170!YC?z!ijckape-TNS?oV$K) zEF!{jqOm&ic<4<8s>CoT)Lkl$)hjY3XCy0gl9wVzMQ%!2(s-6IPszCCVDruT2G&8` z9AXO0qq1!fXx39_w@ooxh$5f}C<2OrBA^H;0*Zhlpa>`eioicbU^bI4nho&(K9Haf zz;(UVY8CyX^5XN4gX|5^c6x_M9~H=AzX(EF)H&DM^IjS{WDsl5^DbVRygYe);`|l5 zmwvTc6<;J7wSyZRz#}64kJx;0%#I?=7nQG);7gK}{a8it4kYH{F-$#?#6xOZ>|NqV zd&C<%;!nxWP=0qK-r1br*ioH{x3$K0vx<a4z%k7f)M#uy^M+y#w4Hl|^N6ef>#83Y zz+dBbm`$(dcCK8KYPzrn|4Y=<FmVGw%f_)pj&x=wJN>D-{t;aqV!DdB6*xGaxzF8t zo0l4V94u-aKJapH5c6RSisfa^^5XR1CDntMF7XEcF7d{W_^!Oz_^rG+U3f`_u(rH7 zp3Q5UUa@`nU5h8*=k9#*hDN7n{O|m&;rA>@e~6Ft@U?kmJ59(>9P_z7$1d1=Ci3&h zu4c7;&s_Kd0X;?x>>!)>g@(ZILg5jm@Sfd5A1;rMTvkSgX(I!L*{Io*`#r=jSYEha zBJU3JaZr}9uKKCad7-@`^XTPe(B&&Ycg2&Zvf%Qh&>MUwTv>ne?$F)8)E`Hoo%OT6 zp#A~q8|RbR?Kv*Y_Q?e5yt@40bx{9XtY3ZeKG-PmhWd-X8~R?kffF;_Mu=krkIp*C oOB)Kqjk!;LZoXkZJexA9st70oihv@Z2q*%IfFhs>{C^1i23(Tu`2YX_ literal 0 HcmV?d00001 -- 1.8.5.2
Maybe Matching Threads
- hivex: Make node names and value names with embedded null characters accessible
- Re: [PATCH 1/7] Add a minimal hive with "special" keys and values
- Fwd: [PATCH hivex] non-ASCII characters in node names
- hivex: Test failure for Perl, OCaml, Python bindings on sparc
- Re: [PATCH 1/7] Add a minimal hive with "special" keys and values