Hilko Bengen
2013-Nov-24 22:25 UTC
[Libguestfs] [PATCH 1/3] lib: Further generalize iconv wrapper function.
---
lib/hivex-internal.h | 8 +++++---
lib/utf16.c | 11 +++++++----
2 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/lib/hivex-internal.h b/lib/hivex-internal.h
index 4135f58..64fd49a 100644
--- a/lib/hivex-internal.h
+++ b/lib/hivex-internal.h
@@ -268,11 +268,13 @@ extern size_t * _hivex_return_offset_list (offset_list
*list);
extern void _hivex_print_offset_list (offset_list *list, FILE *fp);
/* utf16.c */
-extern char* _hivex_to_utf8 (/* const */ char *input, size_t len, char*
input_encoding);
+extern char* _hivex_recode (char *input_encoding,
+ const char *input, size_t input_len,
+ char *output_encoding, size_t *output_len);
#define _hivex_windows_utf16_to_utf8(_input, _len) \
- _hivex_to_utf8 (_input, _len, "UTF-16LE")
+ _hivex_recode ("UTF-16LE", _input, _len, "UTF-8", NULL)
#define _hivex_windows_latin1_to_utf8(_input, _len) \
- _hivex_to_utf8 (_input, _len, "LATIN1")
+ _hivex_recode ("LATIN1", _input, _len, "UTF-8", NULL)
extern size_t _hivex_utf16_string_len_in_bytes_max (const char *str, size_t
len);
/* util.c */
diff --git a/lib/utf16.c b/lib/utf16.c
index eca2343..6b8bf9a 100644
--- a/lib/utf16.c
+++ b/lib/utf16.c
@@ -29,18 +29,19 @@
#include "hivex-internal.h"
char *
-_hivex_to_utf8 (/* const */ char *input, size_t len, char* input_encoding)
+_hivex_recode (char *input_encoding, const char *input, size_t input_len,
+ char *output_encoding, size_t *output_len)
{
- iconv_t ic = iconv_open ("UTF-8", input_encoding);
+ iconv_t ic = iconv_open (output_encoding, input_encoding);
if (ic == (iconv_t) -1)
return NULL;
/* iconv(3) has an insane interface ... */
- size_t outalloc = len;
+ size_t outalloc = input_len;
again:;
- size_t inlen = len;
+ size_t inlen = input_len;
size_t outlen = outalloc;
char *out = malloc (outlen + 1);
if (out == NULL) {
@@ -79,6 +80,8 @@ _hivex_to_utf8 (/* const */ char *input, size_t len, char*
input_encoding)
*outp = '\0';
iconv_close (ic);
+ if (output_len != NULL)
+ *output_len = outp - out;
return out;
}
--
1.8.4.4
Hilko Bengen
2013-Nov-24 22:25 UTC
[Libguestfs] [PATCH 2/3] lib: Add function that encodes UTF-8 to "packed" strings or UTF-16LE as Windows does
---
lib/hivex-internal.h | 1 +
lib/utf16.c | 19 +++++++++++++++++++
2 files changed, 20 insertions(+)
diff --git a/lib/hivex-internal.h b/lib/hivex-internal.h
index 64fd49a..7a548c0 100644
--- a/lib/hivex-internal.h
+++ b/lib/hivex-internal.h
@@ -275,6 +275,7 @@ extern char* _hivex_recode (char *input_encoding,
_hivex_recode ("UTF-16LE", _input, _len, "UTF-8", NULL)
#define _hivex_windows_latin1_to_utf8(_input, _len) \
_hivex_recode ("LATIN1", _input, _len, "UTF-8", NULL)
+extern char* _hivex_encode_string(const char *str, size_t *size, int *utf16);
extern size_t _hivex_utf16_string_len_in_bytes_max (const char *str, size_t
len);
/* util.c */
diff --git a/lib/utf16.c b/lib/utf16.c
index 6b8bf9a..437613b 100644
--- a/lib/utf16.c
+++ b/lib/utf16.c
@@ -24,6 +24,7 @@
#include <stdlib.h>
#include <stdint.h>
#include <iconv.h>
+#include <string.h>
#include "hivex.h"
#include "hivex-internal.h"
@@ -86,6 +87,24 @@ _hivex_recode (char *input_encoding, const char *input,
size_t input_len,
return out;
}
+/* Encode a given UTF-8 string to Latin1 (preferred) or UTF-16 for
+ * storing in the hive file, as needed.
+ */
+char*
+_hivex_encode_string(const char *str, size_t *size, int *utf16)
+{
+ char* outstr;
+ *utf16 = 0;
+ outstr = _hivex_recode ("UTF-8", str, strlen(str),
+ "LATIN1", size);
+ if (outstr != NULL)
+ return outstr;
+ *utf16 = 1;
+ outstr = _hivex_recode ("UTF-8", str, strlen(str),
+ "UTF-16LE", size);
+ return outstr;
+}
+
/* Get the length of a UTF-16 format string. Handle the string as
* pairs of bytes, looking for the first \0\0 pair. Only read up to
* 'len' maximum bytes.
--
1.8.4.4
Hilko Bengen
2013-Nov-24 22:25 UTC
[Libguestfs] [PATCH 3/3] lib: Add support for creating nodes (keys) and values with UTF-16LE-encoded names
---
lib/write.c | 49 ++++++++++++++++++++++++++++++++++---------------
1 file changed, 34 insertions(+), 15 deletions(-)
diff --git a/lib/write.c b/lib/write.c
index dbb8292..72b1f8a 100644
--- a/lib/write.c
+++ b/lib/write.c
@@ -608,9 +608,17 @@ hivex_node_add_child (hive_h *h, hive_node_h parent, const
char *name)
return 0;
}
+ size_t recoded_name_len;
+ int use_utf16 = 0;
+ char* recoded_name = _hivex_encode_string (name, &recoded_name_len,
&use_utf16);
+ if (recoded_name == NULL) {
+ SET_ERRNO (EINVAL, "malformed name");
+ return 0;
+ }
+
/* Create the new nk-record. */
static const char nk_id[2] = { 'n', 'k' };
- size_t seg_len = sizeof (struct ntreg_nk_record) + strlen (name);
+ size_t seg_len = sizeof (struct ntreg_nk_record) + recoded_name_len;
hive_node_h nkoffset = allocate_block (h, seg_len, nk_id);
if (nkoffset == 0)
return 0;
@@ -619,14 +627,18 @@ hivex_node_add_child (hive_h *h, hive_node_h parent, const
char *name)
struct ntreg_nk_record *nk (struct ntreg_nk_record *) ((char *)
h->addr + nkoffset);
- nk->flags = htole16 (0x0020); /* key is ASCII. */
+ if (use_utf16)
+ nk->flags = htole16 (0x0000);
+ else
+ nk->flags = htole16 (0x0020);
nk->parent = htole32 (parent - 0x1000);
nk->subkey_lf = htole32 (0xffffffff);
nk->subkey_lf_volatile = htole32 (0xffffffff);
nk->vallist = htole32 (0xffffffff);
nk->classname = htole32 (0xffffffff);
- nk->name_len = htole16 (strlen (name));
- strcpy (nk->name, name);
+ nk->name_len = htole16 (recoded_name_len);
+ memcpy (nk->name, recoded_name, recoded_name_len);
+ free(recoded_name);
/* Inherit parent sk. */
struct ntreg_nk_record *parent_nk @@ -719,9 +731,9 @@ hivex_node_add_child
(hive_h *h, hive_node_h parent, const char *name)
parent_nk->nr_subkeys = htole32 (nr_subkeys_in_parent_nk);
/* Update max_subkey_name_len in parent nk. */
- uint16_t max = le16toh (parent_nk->max_subkey_name_len);
- if (max < strlen (name) * 2) /* *2 because "recoded" in
UTF16-LE. */
- parent_nk->max_subkey_name_len = htole16 (strlen (name) * 2);
+ size_t utf16_len = use_utf16 ? recoded_name_len : recoded_name_len * 2;
+ if (le16toh (parent_nk->max_subkey_name_len) < utf16_len)
+ parent_nk->max_subkey_name_len = htole16 (utf16_len);
return nkoffset;
}
@@ -942,7 +954,12 @@ hivex_node_set_values (hive_h *h, hive_node_h node,
for (i = 0; i < nr_values; ++i) {
/* Allocate vk record to store this (key, value) pair. */
static const char vk_id[2] = { 'v', 'k' };
- seg_len = sizeof (struct ntreg_vk_record) + strlen (values[i].key);
+ size_t name_len = strlen (values[i].key);
+ size_t recoded_name_len;
+ int use_utf16;
+ char* recoded_name = _hivex_encode_string (values[i].key,
&recoded_name_len,
+ &use_utf16);
+ seg_len = sizeof (struct ntreg_vk_record) + recoded_name_len;
size_t vk_offs = allocate_block (h, seg_len, vk_id);
if (vk_offs == 0)
return -1;
@@ -957,15 +974,17 @@ hivex_node_set_values (hive_h *h, hive_node_h node,
struct ntreg_vk_record *vk (struct ntreg_vk_record *) ((char *)
h->addr + vk_offs);
- size_t name_len = strlen (values[i].key);
- vk->name_len = htole16 (name_len);
- strcpy (vk->name, values[i].key);
+ vk->name_len = htole16 (recoded_name_len);
+ memcpy (vk->name, recoded_name, recoded_name_len);
vk->data_type = htole32 (values[i].t);
uint32_t len = values[i].len;
if (len <= 4) /* store it inline => set MSB flag */
len |= 0x80000000;
vk->data_len = htole32 (len);
- vk->flags = name_len == 0 ? 0 : 1;
+ if (recoded_name_len == 0)
+ vk->flags = 0;
+ else
+ vk->flags = htole16 (!use_utf16);
if (values[i].len <= 4) /* store it inline */
memcpy (&vk->data_offset, values[i].value, values[i].len);
@@ -985,9 +1004,9 @@ hivex_node_set_values (hive_h *h, hive_node_h node,
vk->data_offset = htole32 (offs - 0x1000);
}
- if (name_len * 2 > le32toh (nk->max_vk_name_len))
- /* * 2 for UTF16-LE "reencoding" */
- nk->max_vk_name_len = htole32 (name_len * 2);
+ size_t utf16_len = use_utf16 ? recoded_name_len : recoded_name_len * 2;
+ if (utf16_len > le32toh (nk->max_vk_name_len))
+ nk->max_vk_name_len = htole32 (utf16_len);
if (values[i].len > le32toh (nk->max_vk_data_len))
nk->max_vk_data_len = htole32 (values[i].len);
}
--
1.8.4.4
Richard W.M. Jones
2013-Nov-25 08:44 UTC
Re: [Libguestfs] [PATCH 1/3] lib: Further generalize iconv wrapper function.
On Sun, Nov 24, 2013 at 11:25:51PM +0100, Hilko Bengen wrote:> --- > lib/hivex-internal.h | 8 +++++--- > lib/utf16.c | 11 +++++++---- > 2 files changed, 12 insertions(+), 7 deletions(-) > > diff --git a/lib/hivex-internal.h b/lib/hivex-internal.h > index 4135f58..64fd49a 100644 > --- a/lib/hivex-internal.h > +++ b/lib/hivex-internal.h > @@ -268,11 +268,13 @@ extern size_t * _hivex_return_offset_list (offset_list *list); > extern void _hivex_print_offset_list (offset_list *list, FILE *fp); > > /* utf16.c */ > -extern char* _hivex_to_utf8 (/* const */ char *input, size_t len, char* input_encoding); > +extern char* _hivex_recode (char *input_encoding, > + const char *input, size_t input_len, > + char *output_encoding, size_t *output_len); > #define _hivex_windows_utf16_to_utf8(_input, _len) \ > - _hivex_to_utf8 (_input, _len, "UTF-16LE") > + _hivex_recode ("UTF-16LE", _input, _len, "UTF-8", NULL) > #define _hivex_windows_latin1_to_utf8(_input, _len) \ > - _hivex_to_utf8 (_input, _len, "LATIN1") > + _hivex_recode ("LATIN1", _input, _len, "UTF-8", NULL) > extern size_t _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len); > > /* util.c */ > diff --git a/lib/utf16.c b/lib/utf16.c > index eca2343..6b8bf9a 100644 > --- a/lib/utf16.c > +++ b/lib/utf16.c > @@ -29,18 +29,19 @@ > #include "hivex-internal.h" > > char * > -_hivex_to_utf8 (/* const */ char *input, size_t len, char* input_encoding) > +_hivex_recode (char *input_encoding, const char *input, size_t input_len, > + char *output_encoding, size_t *output_len) > { > - iconv_t ic = iconv_open ("UTF-8", input_encoding); > + iconv_t ic = iconv_open (output_encoding, input_encoding); > if (ic == (iconv_t) -1) > return NULL; > > /* iconv(3) has an insane interface ... */ > > - size_t outalloc = len; > + size_t outalloc = input_len; > > again:; > - size_t inlen = len; > + size_t inlen = input_len; > size_t outlen = outalloc; > char *out = malloc (outlen + 1); > if (out == NULL) { > @@ -79,6 +80,8 @@ _hivex_to_utf8 (/* const */ char *input, size_t len, char* input_encoding) > > *outp = '\0'; > iconv_close (ic); > + if (output_len != NULL) > + *output_len = outp - out; > > return out; > } > -- > 1.8.4.4ACK. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Fedora Windows cross-compiler. Compile Windows programs, test, and build Windows installers. Over 100 libraries supported. http://fedoraproject.org/wiki/MinGW
Richard W.M. Jones
2013-Nov-25 09:06 UTC
Re: [Libguestfs] [PATCH 3/3] lib: Add support for creating nodes (keys) and values with UTF-16LE-encoded names
On Sun, Nov 24, 2013 at 11:25:53PM +0100, Hilko Bengen wrote:> --- > lib/write.c | 49 ++++++++++++++++++++++++++++++++++--------------- > 1 file changed, 34 insertions(+), 15 deletions(-) > > diff --git a/lib/write.c b/lib/write.c > index dbb8292..72b1f8a 100644 > --- a/lib/write.c > +++ b/lib/write.c > @@ -608,9 +608,17 @@ hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name) > return 0; > } > > + size_t recoded_name_len; > + int use_utf16 = 0; > + char* recoded_name = _hivex_encode_string (name, &recoded_name_len, &use_utf16); > + if (recoded_name == NULL) { > + SET_ERRNO (EINVAL, "malformed name"); > + return 0; > + } > + > /* Create the new nk-record. */ > static const char nk_id[2] = { 'n', 'k' }; > - size_t seg_len = sizeof (struct ntreg_nk_record) + strlen (name); > + size_t seg_len = sizeof (struct ntreg_nk_record) + recoded_name_len; > hive_node_h nkoffset = allocate_block (h, seg_len, nk_id); > if (nkoffset == 0) > return 0; > @@ -619,14 +627,18 @@ hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name) > > struct ntreg_nk_record *nk > (struct ntreg_nk_record *) ((char *) h->addr + nkoffset); > - nk->flags = htole16 (0x0020); /* key is ASCII. */ > + if (use_utf16) > + nk->flags = htole16 (0x0000); > + else > + nk->flags = htole16 (0x0020); > nk->parent = htole32 (parent - 0x1000); > nk->subkey_lf = htole32 (0xffffffff); > nk->subkey_lf_volatile = htole32 (0xffffffff); > nk->vallist = htole32 (0xffffffff); > nk->classname = htole32 (0xffffffff); > - nk->name_len = htole16 (strlen (name)); > - strcpy (nk->name, name); > + nk->name_len = htole16 (recoded_name_len); > + memcpy (nk->name, recoded_name, recoded_name_len); > + free(recoded_name);Please put spaces after function names! It improves readability: http://www1.psych.purdue.edu/~zpizlo/rsteinma/Bob-FOR%20CV/Epelboim%20et%20al%201997%20Fillers%20in%20Reading.pdf> /* Inherit parent sk. */ > struct ntreg_nk_record *parent_nk > @@ -719,9 +731,9 @@ hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name) > parent_nk->nr_subkeys = htole32 (nr_subkeys_in_parent_nk); > > /* Update max_subkey_name_len in parent nk. */ > - uint16_t max = le16toh (parent_nk->max_subkey_name_len); > - if (max < strlen (name) * 2) /* *2 because "recoded" in UTF16-LE. */ > - parent_nk->max_subkey_name_len = htole16 (strlen (name) * 2); > + size_t utf16_len = use_utf16 ? recoded_name_len : recoded_name_len * 2;* 2 is probably wrong here for non-BMP characters, but the original code makes the same mistake ... Could we get the true length from the hivex_encode_string function?> + if (le16toh (parent_nk->max_subkey_name_len) < utf16_len) > + parent_nk->max_subkey_name_len = htole16 (utf16_len); > return nkoffset; > } > @@ -942,7 +954,12 @@ hivex_node_set_values (hive_h *h, hive_node_h node, > for (i = 0; i < nr_values; ++i) { > /* Allocate vk record to store this (key, value) pair. */ > static const char vk_id[2] = { 'v', 'k' }; > - seg_len = sizeof (struct ntreg_vk_record) + strlen (values[i].key); > + size_t name_len = strlen (values[i].key); > + size_t recoded_name_len; > + int use_utf16; > + char* recoded_name = _hivex_encode_string (values[i].key, &recoded_name_len, > + &use_utf16); > + seg_len = sizeof (struct ntreg_vk_record) + recoded_name_len; > size_t vk_offs = allocate_block (h, seg_len, vk_id); > if (vk_offs == 0) > return -1; > @@ -957,15 +974,17 @@ hivex_node_set_values (hive_h *h, hive_node_h node, > > struct ntreg_vk_record *vk > (struct ntreg_vk_record *) ((char *) h->addr + vk_offs); > - size_t name_len = strlen (values[i].key); > - vk->name_len = htole16 (name_len); > - strcpy (vk->name, values[i].key); > + vk->name_len = htole16 (recoded_name_len); > + memcpy (vk->name, recoded_name, recoded_name_len); > vk->data_type = htole32 (values[i].t); > uint32_t len = values[i].len; > if (len <= 4) /* store it inline => set MSB flag */ > len |= 0x80000000; > vk->data_len = htole32 (len); > - vk->flags = name_len == 0 ? 0 : 1; > + if (recoded_name_len == 0) > + vk->flags = 0; > + else > + vk->flags = htole16 (!use_utf16); > if (values[i].len <= 4) /* store it inline */ > memcpy (&vk->data_offset, values[i].value, values[i].len); > @@ -985,9 +1004,9 @@ hivex_node_set_values (hive_h *h, hive_node_h node, > vk->data_offset = htole32 (offs - 0x1000); > } > > - if (name_len * 2 > le32toh (nk->max_vk_name_len)) > - /* * 2 for UTF16-LE "reencoding" */ > - nk->max_vk_name_len = htole32 (name_len * 2); > + size_t utf16_len = use_utf16 ? recoded_name_len : recoded_name_len * 2;* 2 - see above.> + if (utf16_len > le32toh (nk->max_vk_name_len)) > + nk->max_vk_name_len = htole32 (utf16_len); > if (values[i].len > le32toh (nk->max_vk_data_len)) > nk->max_vk_data_len = htole32 (values[i].len); > } > -- > 1.8.4.42/3 & 3/3 are generally good, so ACK. Any comment on *2 issue above? Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming blog: http://rwmj.wordpress.com Fedora now supports 80 OCaml packages (the OPEN alternative to F#)
Reasonably Related Threads
- [PATCH 3/3, take 2] lib: Add support for creating nodes (keys) and values with UTF-16LE-encoded names
- Re: [PATCH 3/3] lib: Add support for creating nodes (keys) and values with UTF-16LE-encoded names
- Re: [PATCH 3/3] lib: Add support for creating nodes (keys) and values with UTF-16LE-encoded names
- [PATCH 1/3] lib: Further generalize iconv wrapper function.
- Re: [PATCH] Add a cache for iconv_t handles to hive_t