Hilko Bengen
2018-Feb-09 00:52 UTC
[Libguestfs] [PATCH] Add a cache for iconv_t handles to hive_t
It was brought to my attention that dumping a registry hive causes a lot of time spent in disk I/O activity because iconv_open() and iconv_close() are called for every key. Every iconv_open() call causes /usr/lib/.../gconv/$ENCODING.so to be opened and mapped. The iconv_t handles are now cached in the hive_h struct; they are opened on-demand and re-used. On my ~10 year old Lenovo T60, I have seen 57% savings in the overal runtime of running hivexregedit --export windows-8-enterprise-software.hive '\\' --- lib/handle.c | 43 ++++++++++++++++++++++++++++++++++++++++++- lib/hivex-internal.h | 31 ++++++++++++++++++++++--------- lib/node.c | 6 +++--- lib/utf16.c | 38 ++++++++++++++++---------------------- lib/value.c | 10 +++++----- lib/write.c | 4 ++-- 6 files changed, 90 insertions(+), 42 deletions(-) diff --git a/lib/handle.c b/lib/handle.c index 9dcf81d..7d715df 100644 --- a/lib/handle.c +++ b/lib/handle.c @@ -31,6 +31,9 @@ #include <errno.h> #include <assert.h> +#include <iconv.h> +#include <pthread.h> + #ifdef HAVE_MMAP #include <sys/mman.h> #else @@ -62,6 +65,32 @@ header_checksum (const hive_h *h) #define HIVEX_OPEN_MSGLVL_MASK (HIVEX_OPEN_VERBOSE|HIVEX_OPEN_DEBUG) +iconv_t * +_hivex_get_iconv (hive_h *h, recode_type t) +{ + pthread_mutex_lock (&h->iconv_cache[t].mutex); + if (h->iconv_cache[t].handle == NULL) { + if (t == utf8_to_latin1) + h->iconv_cache[t].handle = iconv_open ("LATIN1", "UTF-8"); + else if (t == latin1_to_utf8) + h->iconv_cache[t].handle = iconv_open ("UTF-8", "LATIN1"); + else if (t == utf8_to_utf16le) + h->iconv_cache[t].handle = iconv_open ("UTF-16LE", "UTF-8"); + else if (t == utf16le_to_utf8) + h->iconv_cache[t].handle = iconv_open ("UTF-8", "UTF-16LE"); + } else { + /* reinitialize iconv context */ + iconv (h->iconv_cache[t].handle, NULL, 0, NULL, 0); + } + return h->iconv_cache[t].handle; +} + +void +_hivex_release_iconv (hive_h *h, recode_type t) +{ + pthread_mutex_unlock (&h->iconv_cache[t].mutex); +} + hive_h * hivex_open (const char *filename, int flags) { @@ -164,11 +193,17 @@ hivex_open (const char *filename, int flags) goto error; } + for (int t=0; t<3; t++) { + pthread_mutex_init (&h->iconv_cache[t].mutex, NULL); + h->iconv_cache[t].handle = NULL; + } + /* Last modified time. */ h->last_modified = le64toh ((int64_t) h->hdr->last_modified); if (h->msglvl >= 2) { - char *name = _hivex_windows_utf16_to_utf8 (h->hdr->name, 64); + char *name = _hivex_recode (h, utf16le_to_utf8, + h->hdr->name, 64, NULL); fprintf (stderr, "hivex_open: header fields:\n" @@ -424,6 +459,12 @@ hivex_close (hive_h *h) else r = 0; free (h->filename); + for (int t=0; t<3; t++) { + if (h->iconv_cache[t].handle != NULL) { + iconv_close (h->iconv_cache[t].handle); + h->iconv_cache[t].handle = NULL; + } + } free (h); return r; diff --git a/lib/hivex-internal.h b/lib/hivex-internal.h index 9a497ed..fefdc81 100644 --- a/lib/hivex-internal.h +++ b/lib/hivex-internal.h @@ -23,6 +23,8 @@ #include <stddef.h> #include <string.h> +#include <iconv.h> + #include "byte_conversions.h" #define STREQ(a,b) (strcmp((a),(b)) == 0) @@ -35,6 +37,13 @@ #define STRCASENEQLEN(a,b,n) (strncasecmp((a),(b),(n)) != 0) #define STRPREFIX(a,b) (strncmp((a),(b),strlen((b))) == 0) +typedef enum { + utf8_to_latin1 = 0, + latin1_to_utf8, + utf8_to_utf16le, + utf16le_to_utf8, +} recode_type; + struct hive_h { char *filename; int fd; @@ -79,6 +88,11 @@ struct hive_h { /* Internal data for mmap replacement */ void *p_winmap; #endif + + struct { + pthread_mutex_t mutex; + iconv_t *handle; + } iconv_cache[4]; }; /* Format of registry blocks. NB. All fields are little endian. */ @@ -282,17 +296,16 @@ extern void _hivex_free_offset_list (offset_list *list); extern size_t * _hivex_return_offset_list (offset_list *list); extern void _hivex_print_offset_list (offset_list *list, FILE *fp); +/* handle.c */ +extern iconv_t * _hivex_get_iconv (hive_h *h, recode_type r); +extern void _hivex_release_iconv (hive_h *h, recode_type r); + /* utf16.c */ -extern char * _hivex_recode (const char *input_encoding, - const char *input, size_t input_len, - const char *output_encoding, size_t *output_len); -#define _hivex_windows_utf16_to_utf8(_input, _len) \ - _hivex_recode ("UTF-16LE", _input, _len, "UTF-8", NULL) -#define _hivex_windows_latin1_to_utf8(_input, _len) \ - _hivex_recode ("LATIN1", _input, _len, "UTF-8", NULL) -extern char* _hivex_encode_string(const char *str, size_t *size, int *utf16); +extern char * _hivex_recode (hive_h *h, recode_type r, + const char *input, size_t input_len, size_t *output_len); +extern char* _hivex_encode_string (hive_h *h, const char *str, size_t *size, int *utf16); extern size_t _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len); -extern size_t _hivex_utf8_strlen (const char* str, size_t len, int utf16); +extern size_t _hivex_utf8_strlen (hive_h *h, const char* str, size_t len, int utf16); /* util.c */ extern void _hivex_free_strings (char **argv); diff --git a/lib/node.c b/lib/node.c index 36e61c4..21cd127 100644 --- a/lib/node.c +++ b/lib/node.c @@ -90,9 +90,9 @@ hivex_node_name (hive_h *h, hive_node_h node) } size_t flags = le16toh (nk->flags); if (flags & 0x20) { - return _hivex_windows_latin1_to_utf8 (nk->name, len); + return _hivex_recode (h, latin1_to_utf8, nk->name, len, NULL); } else { - return _hivex_windows_utf16_to_utf8 (nk->name, len); + return _hivex_recode (h, utf16le_to_utf8, nk->name, len, NULL); } } @@ -116,7 +116,7 @@ hivex_node_name_len (hive_h *h, hive_node_h node) return 0; } - return _hivex_utf8_strlen (nk->name, len, ! (le16toh (nk->flags) & 0x20)); + return _hivex_utf8_strlen (h, nk->name, len, ! (le16toh (nk->flags) & 0x20)); } diff --git a/lib/utf16.c b/lib/utf16.c index 238f40a..c0f0b05 100644 --- a/lib/utf16.c +++ b/lib/utf16.c @@ -30,24 +30,21 @@ #include "hivex-internal.h" char * -_hivex_recode (const char *input_encoding, const char *input, size_t input_len, - const char *output_encoding, size_t *output_len) +_hivex_recode (hive_h *h, recode_type t, + const char *input, size_t input_len, size_t *output_len) { - iconv_t ic = iconv_open (output_encoding, input_encoding); - if (ic == (iconv_t) -1) - return NULL; - /* iconv(3) has an insane interface ... */ size_t outalloc = input_len; + iconv_t *ic = _hivex_get_iconv (h, t); again:; size_t inlen = input_len; size_t outlen = outalloc; char *out = malloc (outlen + 1); if (out == NULL) { int err = errno; - iconv_close (ic); + _hivex_release_iconv (h, t); errno = err; return NULL; } @@ -56,18 +53,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len, size_t r = iconv (ic, (ICONV_CONST char **) &inp, &inlen, &outp, &outlen); if (r == (size_t) -1) { + int err = errno; if (errno == E2BIG) { - int err = errno; /* Reset errno here because we don't want to accidentally * return E2BIG to a library caller. */ - errno = 0; size_t prev = outalloc; /* Try again with a larger output buffer. */ free (out); outalloc *= 2; if (outalloc < prev) { - iconv_close (ic); + _hivex_release_iconv (h, t); errno = err; return NULL; } @@ -75,19 +71,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len, } else { /* Else some conversion failure, eg. EILSEQ, EINVAL. */ - int err = errno; - iconv_close (ic); + _hivex_release_iconv (h, t); free (out); errno = err; return NULL; } } + _hivex_release_iconv (h, t); *outp = '\0'; - iconv_close (ic); if (output_len != NULL) *output_len = outp - out; - return out; } @@ -95,17 +89,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len, * storing in the hive file, as needed. */ char* -_hivex_encode_string(const char *str, size_t *size, int *utf16) +_hivex_encode_string (hive_h *h, const char *str, size_t *size, int *utf16) { char* outstr; *utf16 = 0; - outstr = _hivex_recode ("UTF-8", str, strlen(str), - "LATIN1", size); + outstr = _hivex_recode (h, utf8_to_latin1, + str, strlen(str), size); if (outstr != NULL) return outstr; *utf16 = 1; - outstr = _hivex_recode ("UTF-8", str, strlen(str), - "UTF-16LE", size); + outstr = _hivex_recode (h, utf8_to_utf16le, + str, strlen(str), size); return outstr; } @@ -128,11 +122,11 @@ _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len) } size_t -_hivex_utf8_strlen (const char* str, size_t len, int utf16) +_hivex_utf8_strlen (hive_h *h, const char* str, size_t len, int utf16) { - const char *encoding = utf16 ? "UTF-16LE" : "LATIN1"; + recode_type t = utf16 ? utf16le_to_utf8 : latin1_to_utf8; size_t ret = 0; - char *buf = _hivex_recode(encoding, str, len, "UTF-8", &ret); + char *buf = _hivex_recode (h, t, str, len, &ret); free(buf); return ret; } diff --git a/lib/value.c b/lib/value.c index 2dfe006..3257b53 100644 --- a/lib/value.c +++ b/lib/value.c @@ -209,7 +209,7 @@ hivex_value_key_len (hive_h *h, hive_value_h value) SET_ERRNO (EFAULT, "key length is too long (%zu, %zu)", len, seg_len); return 0; } - return _hivex_utf8_strlen (vk->name, len, ! (le16toh (vk->flags) & 0x01)); + return _hivex_utf8_strlen (h, vk->name, len, ! (le16toh (vk->flags) & 0x01)); } char * @@ -232,9 +232,9 @@ hivex_value_key (hive_h *h, hive_value_h value) return NULL; } if (flags & 0x01) { - return _hivex_windows_latin1_to_utf8 (vk->name, len); + return _hivex_recode (h, latin1_to_utf8, vk->name, len, NULL); } else { - return _hivex_windows_utf16_to_utf8 (vk->name, len); + return _hivex_recode (h, utf16le_to_utf8, vk->name, len, NULL); } } @@ -471,7 +471,7 @@ hivex_value_string (hive_h *h, hive_value_h value) if (slen < len) len = slen; - char *ret = _hivex_windows_utf16_to_utf8 (data, len); + char *ret = _hivex_recode (h, utf16le_to_utf8, data, len, NULL); free (data); if (ret == NULL) return NULL; @@ -538,7 +538,7 @@ hivex_value_multiple_strings (hive_h *h, hive_value_h value) } ret = ret2; - ret[nr_strings-1] = _hivex_windows_utf16_to_utf8 (p, plen); + ret[nr_strings-1] = _hivex_recode (h, utf16le_to_utf8, p, plen, NULL); ret[nr_strings] = NULL; if (ret[nr_strings-1] == NULL) { _hivex_free_strings (ret); diff --git a/lib/write.c b/lib/write.c index 33b64e4..70105c9 100644 --- a/lib/write.c +++ b/lib/write.c @@ -610,7 +610,7 @@ hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name) size_t recoded_name_len; int use_utf16 = 0; char *recoded_name - _hivex_encode_string (name, &recoded_name_len, &use_utf16); + _hivex_encode_string (h, name, &recoded_name_len, &use_utf16); if (recoded_name == NULL) { SET_ERRNO (EINVAL, "malformed name"); return 0; @@ -959,7 +959,7 @@ hivex_node_set_values (hive_h *h, hive_node_h node, static const char vk_id[2] = { 'v', 'k' }; size_t recoded_name_len; int use_utf16; - char* recoded_name = _hivex_encode_string (values[i].key, &recoded_name_len, + char* recoded_name = _hivex_encode_string (h, values[i].key, &recoded_name_len, &use_utf16); seg_len = sizeof (struct ntreg_vk_record) + recoded_name_len; size_t vk_offs = allocate_block (h, seg_len, vk_id); -- 2.11.0
Richard W.M. Jones
2018-Feb-09 13:30 UTC
Re: [Libguestfs] [PATCH] Add a cache for iconv_t handles to hive_t
On Fri, Feb 09, 2018 at 01:52:52AM +0100, Hilko Bengen wrote:> It was brought to my attention that dumping a registry hive causes a > lot of time spent in disk I/O activity because iconv_open() and > iconv_close() are called for every key. Every iconv_open() call causes > /usr/lib/.../gconv/$ENCODING.so to be opened and mapped. > > The iconv_t handles are now cached in the hive_h struct; they are > opened on-demand and re-used. > > On my ~10 year old Lenovo T60, I have seen 57% savings in the overal > runtime of running > > hivexregedit --export windows-8-enterprise-software.hive '\\' > --- > lib/handle.c | 43 ++++++++++++++++++++++++++++++++++++++++++- > lib/hivex-internal.h | 31 ++++++++++++++++++++++--------- > lib/node.c | 6 +++--- > lib/utf16.c | 38 ++++++++++++++++---------------------- > lib/value.c | 10 +++++----- > lib/write.c | 4 ++-- > 6 files changed, 90 insertions(+), 42 deletions(-) > > diff --git a/lib/handle.c b/lib/handle.c > index 9dcf81d..7d715df 100644 > --- a/lib/handle.c > +++ b/lib/handle.c > @@ -31,6 +31,9 @@ > #include <errno.h> > #include <assert.h> > > +#include <iconv.h> > +#include <pthread.h>This makes hivex depend unconditionally on pthread. It's possible to make this optional so that hivex would use locking when the main program is already linked to pthread but would skip locking otherwise, and it's a fairly simply change: (1) Edit ‘bootstrap’ and add ‘threadlib’ to the list of modules near the end. You'll probably need to rerun ./bootstrap after this. (2) Read ‘.gnulib/modules/threadlib’ and follow the instructions for modifying configure.ac and Makefile.am. (3) Replace #include <pthread.h> -> #include "glthread/lock.h". (4) Replace any calls to pthread_mutex_* with gl_lock_*. It's probably not necessary to check return codes, unless they need to be recursive in which case use gl_recursive_lock_* instead.> #ifdef HAVE_MMAP > #include <sys/mman.h> > #else > @@ -62,6 +65,32 @@ header_checksum (const hive_h *h) > > #define HIVEX_OPEN_MSGLVL_MASK (HIVEX_OPEN_VERBOSE|HIVEX_OPEN_DEBUG) > > +iconv_t * > +_hivex_get_iconv (hive_h *h, recode_type t) > +{ > + pthread_mutex_lock (&h->iconv_cache[t].mutex); > + if (h->iconv_cache[t].handle == NULL) { > + if (t == utf8_to_latin1) > + h->iconv_cache[t].handle = iconv_open ("LATIN1", "UTF-8"); > + else if (t == latin1_to_utf8) > + h->iconv_cache[t].handle = iconv_open ("UTF-8", "LATIN1"); > + else if (t == utf8_to_utf16le) > + h->iconv_cache[t].handle = iconv_open ("UTF-16LE", "UTF-8"); > + else if (t == utf16le_to_utf8) > + h->iconv_cache[t].handle = iconv_open ("UTF-8", "UTF-16LE"); > + } else { > + /* reinitialize iconv context */ > + iconv (h->iconv_cache[t].handle, NULL, 0, NULL, 0); > + } > + return h->iconv_cache[t].handle; > +} > + > +void > +_hivex_release_iconv (hive_h *h, recode_type t) > +{ > + pthread_mutex_unlock (&h->iconv_cache[t].mutex); > +} > + > hive_h * > hivex_open (const char *filename, int flags) > { > @@ -164,11 +193,17 @@ hivex_open (const char *filename, int flags) > goto error; > } > > + for (int t=0; t<3; t++) { > + pthread_mutex_init (&h->iconv_cache[t].mutex, NULL); > + h->iconv_cache[t].handle = NULL; > + } > + > /* Last modified time. */ > h->last_modified = le64toh ((int64_t) h->hdr->last_modified); > > if (h->msglvl >= 2) { > - char *name = _hivex_windows_utf16_to_utf8 (h->hdr->name, 64); > + char *name = _hivex_recode (h, utf16le_to_utf8, > + h->hdr->name, 64, NULL); > > fprintf (stderr, > "hivex_open: header fields:\n" > @@ -424,6 +459,12 @@ hivex_close (hive_h *h) > else > r = 0; > free (h->filename); > + for (int t=0; t<3; t++) { > + if (h->iconv_cache[t].handle != NULL) { > + iconv_close (h->iconv_cache[t].handle); > + h->iconv_cache[t].handle = NULL; > + } > + } > free (h); > > return r; > diff --git a/lib/hivex-internal.h b/lib/hivex-internal.h > index 9a497ed..fefdc81 100644 > --- a/lib/hivex-internal.h > +++ b/lib/hivex-internal.h > @@ -23,6 +23,8 @@ > #include <stddef.h> > #include <string.h> > > +#include <iconv.h> > + > #include "byte_conversions.h" > > #define STREQ(a,b) (strcmp((a),(b)) == 0) > @@ -35,6 +37,13 @@ > #define STRCASENEQLEN(a,b,n) (strncasecmp((a),(b),(n)) != 0) > #define STRPREFIX(a,b) (strncmp((a),(b),strlen((b))) == 0) > > +typedef enum { > + utf8_to_latin1 = 0, > + latin1_to_utf8, > + utf8_to_utf16le, > + utf16le_to_utf8, > +} recode_type; > + > struct hive_h { > char *filename; > int fd; > @@ -79,6 +88,11 @@ struct hive_h { > /* Internal data for mmap replacement */ > void *p_winmap; > #endif > + > + struct { > + pthread_mutex_t mutex; > + iconv_t *handle; > + } iconv_cache[4]; > }; > > /* Format of registry blocks. NB. All fields are little endian. */ > @@ -282,17 +296,16 @@ extern void _hivex_free_offset_list (offset_list *list); > extern size_t * _hivex_return_offset_list (offset_list *list); > extern void _hivex_print_offset_list (offset_list *list, FILE *fp); > > +/* handle.c */ > +extern iconv_t * _hivex_get_iconv (hive_h *h, recode_type r); > +extern void _hivex_release_iconv (hive_h *h, recode_type r); > + > /* utf16.c */ > -extern char * _hivex_recode (const char *input_encoding, > - const char *input, size_t input_len, > - const char *output_encoding, size_t *output_len); > -#define _hivex_windows_utf16_to_utf8(_input, _len) \ > - _hivex_recode ("UTF-16LE", _input, _len, "UTF-8", NULL) > -#define _hivex_windows_latin1_to_utf8(_input, _len) \ > - _hivex_recode ("LATIN1", _input, _len, "UTF-8", NULL) > -extern char* _hivex_encode_string(const char *str, size_t *size, int *utf16); > +extern char * _hivex_recode (hive_h *h, recode_type r, > + const char *input, size_t input_len, size_t *output_len); > +extern char* _hivex_encode_string (hive_h *h, const char *str, size_t *size, int *utf16); > extern size_t _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len); > -extern size_t _hivex_utf8_strlen (const char* str, size_t len, int utf16); > +extern size_t _hivex_utf8_strlen (hive_h *h, const char* str, size_t len, int utf16); > > /* util.c */ > extern void _hivex_free_strings (char **argv); > diff --git a/lib/node.c b/lib/node.c > index 36e61c4..21cd127 100644 > --- a/lib/node.c > +++ b/lib/node.c > @@ -90,9 +90,9 @@ hivex_node_name (hive_h *h, hive_node_h node) > } > size_t flags = le16toh (nk->flags); > if (flags & 0x20) { > - return _hivex_windows_latin1_to_utf8 (nk->name, len); > + return _hivex_recode (h, latin1_to_utf8, nk->name, len, NULL); > } else { > - return _hivex_windows_utf16_to_utf8 (nk->name, len); > + return _hivex_recode (h, utf16le_to_utf8, nk->name, len, NULL); > } > } > > @@ -116,7 +116,7 @@ hivex_node_name_len (hive_h *h, hive_node_h node) > return 0; > } > > - return _hivex_utf8_strlen (nk->name, len, ! (le16toh (nk->flags) & 0x20)); > + return _hivex_utf8_strlen (h, nk->name, len, ! (le16toh (nk->flags) & 0x20)); > } > > > diff --git a/lib/utf16.c b/lib/utf16.c > index 238f40a..c0f0b05 100644 > --- a/lib/utf16.c > +++ b/lib/utf16.c > @@ -30,24 +30,21 @@ > #include "hivex-internal.h" > > char * > -_hivex_recode (const char *input_encoding, const char *input, size_t input_len, > - const char *output_encoding, size_t *output_len) > +_hivex_recode (hive_h *h, recode_type t, > + const char *input, size_t input_len, size_t *output_len) > { > - iconv_t ic = iconv_open (output_encoding, input_encoding); > - if (ic == (iconv_t) -1) > - return NULL; > - > /* iconv(3) has an insane interface ... */ > > size_t outalloc = input_len; > > + iconv_t *ic = _hivex_get_iconv (h, t); > again:; > size_t inlen = input_len; > size_t outlen = outalloc; > char *out = malloc (outlen + 1); > if (out == NULL) { > int err = errno; > - iconv_close (ic); > + _hivex_release_iconv (h, t); > errno = err; > return NULL; > } > @@ -56,18 +53,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len, > > size_t r = iconv (ic, (ICONV_CONST char **) &inp, &inlen, &outp, &outlen); > if (r == (size_t) -1) { > + int err = errno; > if (errno == E2BIG) { > - int err = errno; > /* Reset errno here because we don't want to accidentally > * return E2BIG to a library caller. > */ > - errno = 0; > size_t prev = outalloc; > /* Try again with a larger output buffer. */ > free (out); > outalloc *= 2; > if (outalloc < prev) { > - iconv_close (ic); > + _hivex_release_iconv (h, t); > errno = err; > return NULL; > } > @@ -75,19 +71,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len, > } > else { > /* Else some conversion failure, eg. EILSEQ, EINVAL. */ > - int err = errno; > - iconv_close (ic); > + _hivex_release_iconv (h, t); > free (out); > errno = err; > return NULL; > } > } > > + _hivex_release_iconv (h, t); > *outp = '\0'; > - iconv_close (ic); > if (output_len != NULL) > *output_len = outp - out; > - > return out; > } > > @@ -95,17 +89,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len, > * storing in the hive file, as needed. > */ > char* > -_hivex_encode_string(const char *str, size_t *size, int *utf16) > +_hivex_encode_string (hive_h *h, const char *str, size_t *size, int *utf16) > { > char* outstr; > *utf16 = 0; > - outstr = _hivex_recode ("UTF-8", str, strlen(str), > - "LATIN1", size); > + outstr = _hivex_recode (h, utf8_to_latin1, > + str, strlen(str), size); > if (outstr != NULL) > return outstr; > *utf16 = 1; > - outstr = _hivex_recode ("UTF-8", str, strlen(str), > - "UTF-16LE", size); > + outstr = _hivex_recode (h, utf8_to_utf16le, > + str, strlen(str), size); > return outstr; > } > > @@ -128,11 +122,11 @@ _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len) > } > > size_t > -_hivex_utf8_strlen (const char* str, size_t len, int utf16) > +_hivex_utf8_strlen (hive_h *h, const char* str, size_t len, int utf16) > { > - const char *encoding = utf16 ? "UTF-16LE" : "LATIN1"; > + recode_type t = utf16 ? utf16le_to_utf8 : latin1_to_utf8; > size_t ret = 0; > - char *buf = _hivex_recode(encoding, str, len, "UTF-8", &ret); > + char *buf = _hivex_recode (h, t, str, len, &ret); > free(buf); > return ret; > } > diff --git a/lib/value.c b/lib/value.c > index 2dfe006..3257b53 100644 > --- a/lib/value.c > +++ b/lib/value.c > @@ -209,7 +209,7 @@ hivex_value_key_len (hive_h *h, hive_value_h value) > SET_ERRNO (EFAULT, "key length is too long (%zu, %zu)", len, seg_len); > return 0; > } > - return _hivex_utf8_strlen (vk->name, len, ! (le16toh (vk->flags) & 0x01)); > + return _hivex_utf8_strlen (h, vk->name, len, ! (le16toh (vk->flags) & 0x01)); > } > > char * > @@ -232,9 +232,9 @@ hivex_value_key (hive_h *h, hive_value_h value) > return NULL; > } > if (flags & 0x01) { > - return _hivex_windows_latin1_to_utf8 (vk->name, len); > + return _hivex_recode (h, latin1_to_utf8, vk->name, len, NULL); > } else { > - return _hivex_windows_utf16_to_utf8 (vk->name, len); > + return _hivex_recode (h, utf16le_to_utf8, vk->name, len, NULL); > } > } > > @@ -471,7 +471,7 @@ hivex_value_string (hive_h *h, hive_value_h value) > if (slen < len) > len = slen; > > - char *ret = _hivex_windows_utf16_to_utf8 (data, len); > + char *ret = _hivex_recode (h, utf16le_to_utf8, data, len, NULL); > free (data); > if (ret == NULL) > return NULL; > @@ -538,7 +538,7 @@ hivex_value_multiple_strings (hive_h *h, hive_value_h value) > } > ret = ret2; > > - ret[nr_strings-1] = _hivex_windows_utf16_to_utf8 (p, plen); > + ret[nr_strings-1] = _hivex_recode (h, utf16le_to_utf8, p, plen, NULL); > ret[nr_strings] = NULL; > if (ret[nr_strings-1] == NULL) { > _hivex_free_strings (ret); > diff --git a/lib/write.c b/lib/write.c > index 33b64e4..70105c9 100644 > --- a/lib/write.c > +++ b/lib/write.c > @@ -610,7 +610,7 @@ hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name) > size_t recoded_name_len; > int use_utf16 = 0; > char *recoded_name > - _hivex_encode_string (name, &recoded_name_len, &use_utf16); > + _hivex_encode_string (h, name, &recoded_name_len, &use_utf16); > if (recoded_name == NULL) { > SET_ERRNO (EINVAL, "malformed name"); > return 0; > @@ -959,7 +959,7 @@ hivex_node_set_values (hive_h *h, hive_node_h node, > static const char vk_id[2] = { 'v', 'k' }; > size_t recoded_name_len; > int use_utf16; > - char* recoded_name = _hivex_encode_string (values[i].key, &recoded_name_len, > + char* recoded_name = _hivex_encode_string (h, values[i].key, &recoded_name_len, > &use_utf16); > seg_len = sizeof (struct ntreg_vk_record) + recoded_name_len; > size_t vk_offs = allocate_block (h, seg_len, vk_id);The rest looks OK. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com virt-p2v converts physical machines to virtual machines. Boot with a live CD or over the network (PXE) and turn machines into KVM guests. http://libguestfs.org/virt-v2v
Richard W.M. Jones
2018-Feb-09 13:34 UTC
Re: [Libguestfs] [PATCH] Add a cache for iconv_t handles to hive_t
On Fri, Feb 09, 2018 at 01:30:03PM +0000, Richard W.M. Jones wrote:> (4) Replace any calls to pthread_mutex_* with gl_lock_*. It's > probably not necessary to check return codes, unless they need to be > recursive in which case use gl_recursive_lock_* instead.Two sentences jammed into one there, it should say: (4) Replace any calls to pthread_mutex_* with gl_lock_*, unless they need to be recursive in which case use gl_recursive_lock_* instead. It's probably not necessary to check return codes, Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com libguestfs lets you edit virtual machines. Supports shell scripting, bindings from many languages. http://libguestfs.org
Hilko Bengen
2018-Feb-09 14:52 UTC
[Libguestfs] [PATCH] Add a cache for iconv_t handles to hive_t
It was brought to my attention that dumping a registry hive causes a lot of time spent in disk I/O activity because iconv_open() and iconv_close() are called for every key. Every iconv_open() call causes /usr/lib/.../gconv/$ENCODING.so to be opened and mapped. The iconv_t handles are now cached in the hive_h struct; they are opened on-demand and re-used. On my ~10 year old Lenovo T60, I have seen 57% savings in the overal runtime of running hivexregedit --export windows-8-enterprise-software.hive '\\' --- bootstrap | 1 + configure.ac | 2 ++ lib/Makefile.am | 2 ++ lib/handle.c | 42 +++++++++++++++++++++++++++++++++++++++++- lib/hivex-internal.h | 31 ++++++++++++++++++++++--------- lib/node.c | 6 +++--- lib/utf16.c | 38 ++++++++++++++++---------------------- lib/value.c | 10 +++++----- lib/write.c | 4 ++-- m4/.gitignore | 2 ++ 10 files changed, 96 insertions(+), 42 deletions(-) diff --git a/bootstrap b/bootstrap index bd82477..373fad8 100755 --- a/bootstrap +++ b/bootstrap @@ -75,6 +75,7 @@ vc-list-files warnings xstrtol xstrtoll +threadlib ' $gnulib_tool \ diff --git a/configure.ac b/configure.ac index 547fb0d..8405774 100644 --- a/configure.ac +++ b/configure.ac @@ -38,7 +38,9 @@ AC_DEFINE([PACKAGE_VERSION_RELEASE],[hivex_release],[Release number]) AC_DEFINE([PACKAGE_VERSION_EXTRA],["hivex_extra"],[Extra version string]) gl_EARLY +gl_THREADLIB_EARLY gl_INIT +gl_THREADLIB AM_PROG_LIBTOOL diff --git a/lib/Makefile.am b/lib/Makefile.am index 4a7cea1..62cdf35 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -38,6 +38,8 @@ libhivex_la_SOURCES = \ visit.c \ write.c +libhivex_la_SOURCES += $(top_srcdir)/gnulib/lib/glthread/threadlib.c + libhivex_la_LIBADD = ../gnulib/lib/libgnu.la $(LTLIBOBJS) libhivex_la_LDFLAGS = \ -version-info 0:0:0 \ diff --git a/lib/handle.c b/lib/handle.c index 9dcf81d..01b8d80 100644 --- a/lib/handle.c +++ b/lib/handle.c @@ -30,6 +30,8 @@ #include <sys/stat.h> #include <errno.h> #include <assert.h> +#include <iconv.h> +#include <glthread/lock.h> #ifdef HAVE_MMAP #include <sys/mman.h> @@ -62,6 +64,32 @@ header_checksum (const hive_h *h) #define HIVEX_OPEN_MSGLVL_MASK (HIVEX_OPEN_VERBOSE|HIVEX_OPEN_DEBUG) +iconv_t * +_hivex_get_iconv (hive_h *h, recode_type t) +{ + glthread_lock_lock (&h->iconv_cache[t].mutex); + if (h->iconv_cache[t].handle == NULL) { + if (t == utf8_to_latin1) + h->iconv_cache[t].handle = iconv_open ("LATIN1", "UTF-8"); + else if (t == latin1_to_utf8) + h->iconv_cache[t].handle = iconv_open ("UTF-8", "LATIN1"); + else if (t == utf8_to_utf16le) + h->iconv_cache[t].handle = iconv_open ("UTF-16LE", "UTF-8"); + else if (t == utf16le_to_utf8) + h->iconv_cache[t].handle = iconv_open ("UTF-8", "UTF-16LE"); + } else { + /* reinitialize iconv context */ + iconv (h->iconv_cache[t].handle, NULL, 0, NULL, 0); + } + return h->iconv_cache[t].handle; +} + +void +_hivex_release_iconv (hive_h *h, recode_type t) +{ + glthread_lock_unlock (&h->iconv_cache[t].mutex); +} + hive_h * hivex_open (const char *filename, int flags) { @@ -164,11 +192,17 @@ hivex_open (const char *filename, int flags) goto error; } + for (int t=0; t<3; t++) { + glthread_lock_init (&h->iconv_cache[t].mutex); + h->iconv_cache[t].handle = NULL; + } + /* Last modified time. */ h->last_modified = le64toh ((int64_t) h->hdr->last_modified); if (h->msglvl >= 2) { - char *name = _hivex_windows_utf16_to_utf8 (h->hdr->name, 64); + char *name = _hivex_recode (h, utf16le_to_utf8, + h->hdr->name, 64, NULL); fprintf (stderr, "hivex_open: header fields:\n" @@ -424,6 +458,12 @@ hivex_close (hive_h *h) else r = 0; free (h->filename); + for (int t=0; t<3; t++) { + if (h->iconv_cache[t].handle != NULL) { + iconv_close (h->iconv_cache[t].handle); + h->iconv_cache[t].handle = NULL; + } + } free (h); return r; diff --git a/lib/hivex-internal.h b/lib/hivex-internal.h index 9a497ed..d04ae3c 100644 --- a/lib/hivex-internal.h +++ b/lib/hivex-internal.h @@ -22,6 +22,8 @@ #include <stdarg.h> #include <stddef.h> #include <string.h> +#include <iconv.h> +#include <glthread/lock.h> #include "byte_conversions.h" @@ -35,6 +37,13 @@ #define STRCASENEQLEN(a,b,n) (strncasecmp((a),(b),(n)) != 0) #define STRPREFIX(a,b) (strncmp((a),(b),strlen((b))) == 0) +typedef enum { + utf8_to_latin1 = 0, + latin1_to_utf8, + utf8_to_utf16le, + utf16le_to_utf8, +} recode_type; + struct hive_h { char *filename; int fd; @@ -79,6 +88,11 @@ struct hive_h { /* Internal data for mmap replacement */ void *p_winmap; #endif + + struct { + gl_lock_t mutex; + iconv_t *handle; + } iconv_cache[4]; }; /* Format of registry blocks. NB. All fields are little endian. */ @@ -282,17 +296,16 @@ extern void _hivex_free_offset_list (offset_list *list); extern size_t * _hivex_return_offset_list (offset_list *list); extern void _hivex_print_offset_list (offset_list *list, FILE *fp); +/* handle.c */ +extern iconv_t * _hivex_get_iconv (hive_h *h, recode_type r); +extern void _hivex_release_iconv (hive_h *h, recode_type r); + /* utf16.c */ -extern char * _hivex_recode (const char *input_encoding, - const char *input, size_t input_len, - const char *output_encoding, size_t *output_len); -#define _hivex_windows_utf16_to_utf8(_input, _len) \ - _hivex_recode ("UTF-16LE", _input, _len, "UTF-8", NULL) -#define _hivex_windows_latin1_to_utf8(_input, _len) \ - _hivex_recode ("LATIN1", _input, _len, "UTF-8", NULL) -extern char* _hivex_encode_string(const char *str, size_t *size, int *utf16); +extern char * _hivex_recode (hive_h *h, recode_type r, + const char *input, size_t input_len, size_t *output_len); +extern char* _hivex_encode_string (hive_h *h, const char *str, size_t *size, int *utf16); extern size_t _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len); -extern size_t _hivex_utf8_strlen (const char* str, size_t len, int utf16); +extern size_t _hivex_utf8_strlen (hive_h *h, const char* str, size_t len, int utf16); /* util.c */ extern void _hivex_free_strings (char **argv); diff --git a/lib/node.c b/lib/node.c index 36e61c4..21cd127 100644 --- a/lib/node.c +++ b/lib/node.c @@ -90,9 +90,9 @@ hivex_node_name (hive_h *h, hive_node_h node) } size_t flags = le16toh (nk->flags); if (flags & 0x20) { - return _hivex_windows_latin1_to_utf8 (nk->name, len); + return _hivex_recode (h, latin1_to_utf8, nk->name, len, NULL); } else { - return _hivex_windows_utf16_to_utf8 (nk->name, len); + return _hivex_recode (h, utf16le_to_utf8, nk->name, len, NULL); } } @@ -116,7 +116,7 @@ hivex_node_name_len (hive_h *h, hive_node_h node) return 0; } - return _hivex_utf8_strlen (nk->name, len, ! (le16toh (nk->flags) & 0x20)); + return _hivex_utf8_strlen (h, nk->name, len, ! (le16toh (nk->flags) & 0x20)); } diff --git a/lib/utf16.c b/lib/utf16.c index 238f40a..c0f0b05 100644 --- a/lib/utf16.c +++ b/lib/utf16.c @@ -30,24 +30,21 @@ #include "hivex-internal.h" char * -_hivex_recode (const char *input_encoding, const char *input, size_t input_len, - const char *output_encoding, size_t *output_len) +_hivex_recode (hive_h *h, recode_type t, + const char *input, size_t input_len, size_t *output_len) { - iconv_t ic = iconv_open (output_encoding, input_encoding); - if (ic == (iconv_t) -1) - return NULL; - /* iconv(3) has an insane interface ... */ size_t outalloc = input_len; + iconv_t *ic = _hivex_get_iconv (h, t); again:; size_t inlen = input_len; size_t outlen = outalloc; char *out = malloc (outlen + 1); if (out == NULL) { int err = errno; - iconv_close (ic); + _hivex_release_iconv (h, t); errno = err; return NULL; } @@ -56,18 +53,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len, size_t r = iconv (ic, (ICONV_CONST char **) &inp, &inlen, &outp, &outlen); if (r == (size_t) -1) { + int err = errno; if (errno == E2BIG) { - int err = errno; /* Reset errno here because we don't want to accidentally * return E2BIG to a library caller. */ - errno = 0; size_t prev = outalloc; /* Try again with a larger output buffer. */ free (out); outalloc *= 2; if (outalloc < prev) { - iconv_close (ic); + _hivex_release_iconv (h, t); errno = err; return NULL; } @@ -75,19 +71,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len, } else { /* Else some conversion failure, eg. EILSEQ, EINVAL. */ - int err = errno; - iconv_close (ic); + _hivex_release_iconv (h, t); free (out); errno = err; return NULL; } } + _hivex_release_iconv (h, t); *outp = '\0'; - iconv_close (ic); if (output_len != NULL) *output_len = outp - out; - return out; } @@ -95,17 +89,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len, * storing in the hive file, as needed. */ char* -_hivex_encode_string(const char *str, size_t *size, int *utf16) +_hivex_encode_string (hive_h *h, const char *str, size_t *size, int *utf16) { char* outstr; *utf16 = 0; - outstr = _hivex_recode ("UTF-8", str, strlen(str), - "LATIN1", size); + outstr = _hivex_recode (h, utf8_to_latin1, + str, strlen(str), size); if (outstr != NULL) return outstr; *utf16 = 1; - outstr = _hivex_recode ("UTF-8", str, strlen(str), - "UTF-16LE", size); + outstr = _hivex_recode (h, utf8_to_utf16le, + str, strlen(str), size); return outstr; } @@ -128,11 +122,11 @@ _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len) } size_t -_hivex_utf8_strlen (const char* str, size_t len, int utf16) +_hivex_utf8_strlen (hive_h *h, const char* str, size_t len, int utf16) { - const char *encoding = utf16 ? "UTF-16LE" : "LATIN1"; + recode_type t = utf16 ? utf16le_to_utf8 : latin1_to_utf8; size_t ret = 0; - char *buf = _hivex_recode(encoding, str, len, "UTF-8", &ret); + char *buf = _hivex_recode (h, t, str, len, &ret); free(buf); return ret; } diff --git a/lib/value.c b/lib/value.c index 2dfe006..3257b53 100644 --- a/lib/value.c +++ b/lib/value.c @@ -209,7 +209,7 @@ hivex_value_key_len (hive_h *h, hive_value_h value) SET_ERRNO (EFAULT, "key length is too long (%zu, %zu)", len, seg_len); return 0; } - return _hivex_utf8_strlen (vk->name, len, ! (le16toh (vk->flags) & 0x01)); + return _hivex_utf8_strlen (h, vk->name, len, ! (le16toh (vk->flags) & 0x01)); } char * @@ -232,9 +232,9 @@ hivex_value_key (hive_h *h, hive_value_h value) return NULL; } if (flags & 0x01) { - return _hivex_windows_latin1_to_utf8 (vk->name, len); + return _hivex_recode (h, latin1_to_utf8, vk->name, len, NULL); } else { - return _hivex_windows_utf16_to_utf8 (vk->name, len); + return _hivex_recode (h, utf16le_to_utf8, vk->name, len, NULL); } } @@ -471,7 +471,7 @@ hivex_value_string (hive_h *h, hive_value_h value) if (slen < len) len = slen; - char *ret = _hivex_windows_utf16_to_utf8 (data, len); + char *ret = _hivex_recode (h, utf16le_to_utf8, data, len, NULL); free (data); if (ret == NULL) return NULL; @@ -538,7 +538,7 @@ hivex_value_multiple_strings (hive_h *h, hive_value_h value) } ret = ret2; - ret[nr_strings-1] = _hivex_windows_utf16_to_utf8 (p, plen); + ret[nr_strings-1] = _hivex_recode (h, utf16le_to_utf8, p, plen, NULL); ret[nr_strings] = NULL; if (ret[nr_strings-1] == NULL) { _hivex_free_strings (ret); diff --git a/lib/write.c b/lib/write.c index 33b64e4..70105c9 100644 --- a/lib/write.c +++ b/lib/write.c @@ -610,7 +610,7 @@ hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name) size_t recoded_name_len; int use_utf16 = 0; char *recoded_name - _hivex_encode_string (name, &recoded_name_len, &use_utf16); + _hivex_encode_string (h, name, &recoded_name_len, &use_utf16); if (recoded_name == NULL) { SET_ERRNO (EINVAL, "malformed name"); return 0; @@ -959,7 +959,7 @@ hivex_node_set_values (hive_h *h, hive_node_h node, static const char vk_id[2] = { 'v', 'k' }; size_t recoded_name_len; int use_utf16; - char* recoded_name = _hivex_encode_string (values[i].key, &recoded_name_len, + char* recoded_name = _hivex_encode_string (h, values[i].key, &recoded_name_len, &use_utf16); seg_len = sizeof (struct ntreg_vk_record) + recoded_name_len; size_t vk_offs = allocate_block (h, seg_len, vk_id); diff --git a/m4/.gitignore b/m4/.gitignore index 05ca27c..a19035c 100644 --- a/m4/.gitignore +++ b/m4/.gitignore @@ -138,3 +138,5 @@ /xalloc.m4 /xsize.m4 /xstrtol.m4 +/thread.m4 +/yield.m4 -- 2.11.0
Seemingly Similar Threads
- [PATCH] Add a cache for iconv_t handles to hive_t
- [PATCH] Add a cache for iconv_t handles to hive_t
- [PATCH 1/3] lib: Further generalize iconv wrapper function.
- [PATCH 3/3] lib: Add support for creating nodes (keys) and values with UTF-16LE-encoded names
- [PATCH 3/3, take 2] lib: Add support for creating nodes (keys) and values with UTF-16LE-encoded names