Richard W.M. Jones
2010-Jan-19 13:37 UTC
[Libguestfs] [PATCH 0/7] Prepare for adding write support to hivex (windows registry) library
This series of patches prepares the way for a later series which will add write support for Windows Registry 'hive' files to our 'hivex' mini library. About hivex: http://libguestfs.org/hivex.3.html About the Windows Registry: Forget it - there are no reliable references. However the Wikipedia page explains some of the top level concepts: https://secure.wikimedia.org/wikipedia/en/wiki/Windows_Registry And this page explains the format, although it is frequently inaccurate (just less so than other documents): http://www.sentinelchicken.com/data/TheWindowsNTRegistryFileFormat.pdf These patches are ready for review right now. I'll post the second series which add write support later this week. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming blog: http://rwmj.wordpress.com Fedora now supports 80 OCaml packages (the OPEN alternative to F#) http://cocan.org/getting_started_with_ocaml_on_red_hat_and_fedora
Richard W.M. Jones
2010-Jan-19 13:39 UTC
[Libguestfs] [PATCH 1/7] hivex: Store filename in hive handle.
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones libguestfs lets you edit virtual machines. Supports shell scripting, bindings from many languages. http://et.redhat.com/~rjones/libguestfs/ See what it can do: http://et.redhat.com/~rjones/libguestfs/recipes.html -------------- next part -------------->From 8195635932637f863beb6cb8af6c2cf0860ea9d5 Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Mon, 18 Jan 2010 10:58:06 +0000 Subject: [PATCH 1/7] hivex: Store filename in hive handle. --- hivex/hivex.c | 7 +++++++ 1 files changed, 7 insertions(+), 0 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index 6129017..a760300 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -89,6 +89,7 @@ static char *windows_utf16_to_utf8 (/* const */ char *input, size_t len); struct hive_h { + char *filename; int fd; size_t size; int msglvl; @@ -281,6 +282,10 @@ hivex_open (const char *filename, int flags) if (h->msglvl >= 2) fprintf (stderr, "hivex_open: created handle %p\n", h); + h->filename = strdup (filename); + if (h->filename == NULL) + goto error; + h->fd = open (filename, O_RDONLY); if (h->fd == -1) goto error; @@ -482,6 +487,7 @@ hivex_open (const char *filename, int flags) munmap (h->addr, h->size); if (h->fd >= 0) close (h->fd); + free (h->filename); free (h); } errno = err; @@ -496,6 +502,7 @@ hivex_close (hive_h *h) free (h->bitmap); munmap (h->addr, h->size); r = close (h->fd); + free (h->filename); free (h); return r; -- 1.6.5.2
Richard W.M. Jones
2010-Jan-19 13:39 UTC
[Libguestfs] [PATCH 2/7] hivex: Add HIVEX_OPEN_WRITE flag to allow hive to be opened for writing.
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-df lists disk usage of guests without needing to install any software inside the virtual machine. Supports Linux and Windows. http://et.redhat.com/~rjones/virt-df/ -------------- next part -------------->From cd6838e77a7c615689ec1b872212121d9622278a Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Mon, 18 Jan 2010 11:08:56 +0000 Subject: [PATCH 2/7] hivex: Add HIVEX_OPEN_WRITE flag to allow hive to be opened for writing. If this flag is omitted (as in the case for all existing callers) then the hive is still opened read-only. We add a 'writable' flag to the hive handle, and we change the way that the hive file (data) is stored. The data is still mmapped if the file is opened read-only, since that is more efficient and allows us to handle larger hives. However if we need to write to the file then we have to read it all into memory, since if we had to extend the file we need to realloc that data. Note the manpage section L</WRITING TO HIVE FILES> comes in a later commit. --- bootstrap | 1 + hivex/Makefile.am | 10 +++++----- hivex/README | 3 --- hivex/hivex.c | 44 ++++++++++++++++++++++++++++++++++---------- hivex/hivex.h | 4 +++- hivex/hivex.pod | 15 +++++++++++---- m4/.gitignore | 4 ++++ 7 files changed, 58 insertions(+), 23 deletions(-) diff --git a/bootstrap b/bootstrap index 6007e59..32e91f0 100755 --- a/bootstrap +++ b/bootstrap @@ -60,6 +60,7 @@ modules=' arpa_inet c-ctype closeout +full-read gitlog-to-changelog gnu-make gnumakefile diff --git a/hivex/Makefile.am b/hivex/Makefile.am index b73aa2f..a2be7e3 100644 --- a/hivex/Makefile.am +++ b/hivex/Makefile.am @@ -23,16 +23,16 @@ libhivex_la_SOURCES = \ hivex.c \ hivex.h -libhivex_la_LDFLAGS = -version-info 0:0:0 -libhivex_la_CFLAGS = \ - $(WARN_CFLAGS) $(WERROR_CFLAGS) +libhivex_la_LDFLAGS = -version-info 0:0:0 $(LTLIBINTL) $(LTLIBTHREAD) +libhivex_la_CFLAGS = $(WARN_CFLAGS) $(WERROR_CFLAGS) +libhivex_la_CPPFLAGS = -I$(top_srcdir)/gnulib/lib bin_PROGRAMS = hivexml hivexget hivexml_SOURCES = \ hivexml.c -hivexml_LDADD = libhivex.la $(LIBXML2_LIBS) +hivexml_LDADD = libhivex.la $(LIBXML2_LIBS) ../gnulib/lib/libgnu.la hivexml_CFLAGS = \ $(LIBXML2_CFLAGS) \ $(WARN_CFLAGS) $(WERROR_CFLAGS) @@ -40,7 +40,7 @@ hivexml_CFLAGS = \ hivexget_SOURCES = \ hivexget.c -hivexget_LDADD = libhivex.la +hivexget_LDADD = libhivex.la ../gnulib/lib/libgnu.la hivexget_CFLAGS = \ $(WARN_CFLAGS) $(WERROR_CFLAGS) diff --git a/hivex/README b/hivex/README index 5e7d21f..583d351 100644 --- a/hivex/README +++ b/hivex/README @@ -5,9 +5,6 @@ Copyright (C) 2009-2010 Red Hat Inc. This is a self-contained library for reading Windows Registry "hive" binary files. -It is totally dedicated to reading the files and doesn't deal with -writing or modifying them in any way. - Unlike many other tools in this area, it doesn't use the textual .REG format for output, because parsing that is as much trouble as parsing the original binary format. Instead it makes the file available diff --git a/hivex/hivex.c b/hivex/hivex.c index a760300..849049c 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -40,6 +40,12 @@ #include <byteswap.h> #endif +#include "full-read.h" + +#ifndef O_CLOEXEC +#define O_CLOEXEC 0 +#endif + #define STREQ(a,b) (strcmp((a),(b)) == 0) #define STRCASEEQ(a,b) (strcasecmp((a),(b)) == 0) //#define STRNEQ(a,b) (strcmp((a),(b)) != 0) @@ -93,8 +99,9 @@ struct hive_h { int fd; size_t size; int msglvl; + int writable; - /* Memory-mapped (readonly) registry file. */ + /* Registry file, memory mapped if read-only, or malloc'd if writing. */ union { char *addr; struct ntreg_header *hdr; @@ -282,11 +289,12 @@ hivex_open (const char *filename, int flags) if (h->msglvl >= 2) fprintf (stderr, "hivex_open: created handle %p\n", h); + h->writable = !!(flags & HIVEX_OPEN_WRITE); h->filename = strdup (filename); if (h->filename == NULL) goto error; - h->fd = open (filename, O_RDONLY); + h->fd = open (filename, O_RDONLY | O_CLOEXEC); if (h->fd == -1) goto error; @@ -296,12 +304,21 @@ hivex_open (const char *filename, int flags) h->size = statbuf.st_size; - h->addr = mmap (NULL, h->size, PROT_READ, MAP_SHARED, h->fd, 0); - if (h->addr == MAP_FAILED) - goto error; + if (!h->writable) { + h->addr = mmap (NULL, h->size, PROT_READ, MAP_SHARED, h->fd, 0); + if (h->addr == MAP_FAILED) + goto error; - if (h->msglvl >= 2) - fprintf (stderr, "hivex_open: mapped file at %p\n", h->addr); + if (h->msglvl >= 2) + fprintf (stderr, "hivex_open: mapped file at %p\n", h->addr); + } else { + h->addr = malloc (h->size); + if (h->addr == NULL) + goto error; + + if (full_read (h->fd, h->addr, h->size) < h->size) + goto error; + } /* Check header. */ if (h->hdr->magic[0] != 'r' || @@ -483,8 +500,12 @@ hivex_open (const char *filename, int flags) int err = errno; if (h) { free (h->bitmap); - if (h->addr && h->size && h->addr != MAP_FAILED) - munmap (h->addr, h->size); + if (h->addr && h->size && h->addr != MAP_FAILED) { + if (!h->writable) + munmap (h->addr, h->size); + else + free (h->addr); + } if (h->fd >= 0) close (h->fd); free (h->filename); @@ -500,7 +521,10 @@ hivex_close (hive_h *h) int r; free (h->bitmap); - munmap (h->addr, h->size); + if (!h->writable) + munmap (h->addr, h->size); + else + free (h->addr); r = close (h->fd); free (h->filename); free (h); diff --git a/hivex/hivex.h b/hivex/hivex.h index 14bdcc5..b0c1c3b 100644 --- a/hivex/hivex.h +++ b/hivex/hivex.h @@ -69,9 +69,11 @@ enum hive_type { typedef enum hive_type hive_type; +/* Bitmask of flags passed to hivex_open. */ #define HIVEX_OPEN_VERBOSE 1 #define HIVEX_OPEN_DEBUG 2 -#define HIVEX_OPEN_MSGLVL_MASK 3 +#define HIVEX_OPEN_MSGLVL_MASK (HIVEX_OPEN_VERBOSE|HIVEX_OPEN_DEBUG) +#define HIVEX_OPEN_WRITE 4 extern hive_h *hivex_open (const char *filename, int flags); extern int hivex_close (hive_h *h); diff --git a/hivex/hivex.pod b/hivex/hivex.pod index 0cc91af..0de4d54 100644 --- a/hivex/hivex.pod +++ b/hivex/hivex.pod @@ -13,8 +13,7 @@ hivex - Windows Registry "hive" extraction library libhivex is a library for extracting the contents of Windows Registry "hive" files. It is designed to be secure against buggy or malicious -registry files, and to have limited functionality (writing or -modifying these files is not in the scope of this library). +registry files. Unlike many other tools in this area, it doesn't use the textual .REG format for output, because parsing that is as much trouble as parsing @@ -32,8 +31,7 @@ L<hivexget(1)>). Opens the hive named C<filename> for reading. Flags is an ORed list of the open flags (or C<0> if you don't -want to pass any flags). Currently the only -flags defined are: +want to pass any flags). These flags are defined: =over 4 @@ -49,6 +47,12 @@ itself. This is also selected if the C<HIVEX_DEBUG> environment variable is set to 1. +=item HIVEX_OPEN_WRITE + +Open the hive for writing. If omitted, the hive is read-only. + +See L</WRITING TO HIVE FILES>. + =back C<hivex_open> returns a hive handle. On error this returns NULL and @@ -58,6 +62,9 @@ sets C<errno> to indicate the error. Close a hive handle and free all associated resources. +Note that any uncommitted writes are I<not> committed by this call, +but instead are lost. See L</WRITING TO HIVE FILES>. + Returns 0 on success. On error this returns -1 and sets errno. =back diff --git a/m4/.gitignore b/m4/.gitignore index 788ec87..9ce838b 100644 --- a/m4/.gitignore +++ b/m4/.gitignore @@ -130,3 +130,7 @@ xsize.m4 /yield.m4 /fcntl-o.m4 /warn-on-use.m4 +/safe-read.m4 +/safe-write.m4 +/ssize_t.m4 +/write.m4 -- 1.6.5.2
Richard W.M. Jones
2010-Jan-19 13:41 UTC
[Libguestfs] [PATCH 3/7] hivex: Collect more statistics about registries.
Note that currently we store the statistics in the hive_h handle, although we never use them from there, so this patch also deletes those fields. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-df lists disk usage of guests without needing to install any software inside the virtual machine. Supports Linux and Windows. http://et.redhat.com/~rjones/virt-df/ -------------- next part -------------->From 2dfd3c62821039e0200933e4f034edde768add2c Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Mon, 18 Jan 2010 14:14:40 +0000 Subject: [PATCH 3/7] hivex: Collect more statistics about registries. --- hivex/hivex.c | 51 +++++++++++++++++++++++++++++++-------------------- 1 files changed, 31 insertions(+), 20 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index 849049c..365f328 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -127,12 +127,6 @@ struct hive_h { /* Fields from the header, extracted from little-endianness hell. */ size_t rootoffs; /* Root key offset (always an nk-block). */ size_t endpages; /* Offset of end of pages. */ - - /* Stats. */ - size_t pages; /* Number of hbin pages read. */ - size_t blocks; /* Total number of blocks found. */ - size_t used_blocks; /* Total number of used blocks found. */ - size_t used_size; /* Total size (bytes) of used blocks. */ }; /* NB. All fields are little endian. */ @@ -393,6 +387,14 @@ hivex_open (const char *filename, int flags) */ int seen_root_block = 0, bad_root_block = 0; + /* Collect some stats. */ + size_t pages = 0; /* Number of hbin pages read. */ + size_t smallest_page = SIZE_MAX, largest_page = 0; + size_t blocks = 0; /* Total number of blocks found. */ + size_t smallest_block = SIZE_MAX, largest_block = 0, blocks_bytes = 0; + size_t used_blocks = 0; /* Total number of used blocks found. */ + size_t used_size = 0; /* Total size (bytes) of used blocks. */ + /* Read the pages and blocks. The aim here is to be robust against * corrupt or malicious registries. So we make sure the loops * always make forward progress. We add the address of each block @@ -411,14 +413,17 @@ hivex_open (const char *filename, int flags) page->magic[2] != 'i' || page->magic[3] != 'n') { fprintf (stderr, "hivex: %s: trailing garbage at end of file (at 0x%zx, after %zu pages)\n", - filename, off, h->pages); + filename, off, pages); errno = ENOTSUP; goto error; } + size_t page_size = le32toh (page->offset_next); if (h->msglvl >= 2) - fprintf (stderr, "hivex_open: page at 0x%zx\n", off); - h->pages++; + fprintf (stderr, "hivex_open: page at 0x%zx, size %zu\n", off, page_size); + pages++; + if (page_size < smallest_page) smallest_page = page_size; + if (page_size > largest_page) largest_page = page_size; if (le32toh (page->offset_next) <= sizeof (struct ntreg_hbin_page) || (le32toh (page->offset_next) & 3) != 0) { @@ -431,11 +436,11 @@ hivex_open (const char *filename, int flags) /* Read the blocks in this page. */ size_t blkoff; struct ntreg_hbin_block *block; - int32_t seg_len; + size_t seg_len; for (blkoff = off + 0x20; blkoff < off + le32toh (page->offset_next); blkoff += seg_len) { - h->blocks++; + blocks++; int is_root = blkoff == h->rootoffs; if (is_root) @@ -452,16 +457,20 @@ hivex_open (const char *filename, int flags) } if (h->msglvl >= 2) - fprintf (stderr, "hivex_open: %s block id %d,%d at 0x%zx%s\n", + fprintf (stderr, "hivex_open: %s block id %d,%d at 0x%zx size %zu%s\n", used ? "used" : "free", block->id[0], block->id[1], blkoff, - is_root ? " (root)" : ""); + seg_len, is_root ? " (root)" : ""); + + blocks_bytes += seg_len; + if (seg_len < smallest_block) smallest_block = seg_len; + if (seg_len > largest_block) largest_block = seg_len; if (is_root && !used) bad_root_block = 1; if (used) { - h->used_blocks++; - h->used_size += seg_len; + used_blocks++; + used_size += seg_len; /* Root block must be an nk-block. */ if (is_root && (block->id[0] != 'n' || block->id[1] != 'k')) @@ -488,11 +497,13 @@ hivex_open (const char *filename, int flags) if (h->msglvl >= 1) fprintf (stderr, "hivex_open: successfully read Windows Registry hive file:\n" - " pages: %zu\n" - " blocks: %zu\n" - " blocks used: %zu\n" - " bytes used: %zu\n", - h->pages, h->blocks, h->used_blocks, h->used_size); + " pages: %zu [sml: %zu, lge: %zu]\n" + " blocks: %zu [sml: %zu, avg: %zu, lge: %zu]\n" + " blocks used: %zu\n" + " bytes used: %zu\n", + pages, smallest_page, largest_page, + blocks, smallest_block, blocks_bytes / blocks, largest_block, + used_blocks, used_size); return h; -- 1.6.5.2
Richard W.M. Jones
2010-Jan-19 13:41 UTC
[Libguestfs] [PATCH 4/7] hivex: page 'offset_next' field is really 'page_size'.
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones New in Fedora 11: Fedora Windows cross-compiler. Compile Windows programs, test, and build Windows installers. Over 70 libraries supprt'd http://fedoraproject.org/wiki/MinGW http://www.annexia.org/fedora_mingw -------------- next part -------------->From f927a65b2163723474ef5878f61d41910b7d3404 Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Mon, 18 Jan 2010 15:24:16 +0000 Subject: [PATCH 4/7] hivex: page 'offset_next' field is really 'page_size'. The documentation, as usual, is contradictory. However this field is definitely the page size in all observed registries. Furthermore the following field marked 'unknown' is always zero, although this contradicts what the sentinelchicken.com paper says. --- hivex/hivex.c | 16 ++++++++-------- 1 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index 365f328..cb6c772 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -168,7 +168,7 @@ struct ntreg_header { struct ntreg_hbin_page { char magic[4]; /* "hbin" */ uint32_t offset_first; /* offset from 1st block */ - uint32_t offset_next; /* offset of next (relative to this) */ + uint32_t page_size; /* size of this page (multiple of 4KB) */ char unknown[20]; /* Linked list of blocks follows here. */ } __attribute__((__packed__)); @@ -403,7 +403,7 @@ hivex_open (const char *filename, int flags) */ size_t off; struct ntreg_hbin_page *page; - for (off = 0x1000; off < h->size; off += le32toh (page->offset_next)) { + for (off = 0x1000; off < h->size; off += le32toh (page->page_size)) { if (off >= h->endpages) break; @@ -418,17 +418,17 @@ hivex_open (const char *filename, int flags) goto error; } - size_t page_size = le32toh (page->offset_next); + size_t page_size = le32toh (page->page_size); if (h->msglvl >= 2) fprintf (stderr, "hivex_open: page at 0x%zx, size %zu\n", off, page_size); pages++; if (page_size < smallest_page) smallest_page = page_size; if (page_size > largest_page) largest_page = page_size; - if (le32toh (page->offset_next) <= sizeof (struct ntreg_hbin_page) || - (le32toh (page->offset_next) & 3) != 0) { - fprintf (stderr, "hivex: %s: pagesize %d at %zu, bad registry\n", - filename, le32toh (page->offset_next), off); + if (page_size <= sizeof (struct ntreg_hbin_page) || + (page_size & 0x0fff) != 0) { + fprintf (stderr, "hivex: %s: page size %zu at 0x%zx, bad registry\n", + filename, page_size, off); errno = ENOTSUP; goto error; } @@ -438,7 +438,7 @@ hivex_open (const char *filename, int flags) struct ntreg_hbin_block *block; size_t seg_len; for (blkoff = off + 0x20; - blkoff < off + le32toh (page->offset_next); + blkoff < off + page_size; blkoff += seg_len) { blocks++; -- 1.6.5.2
Richard W.M. Jones
2010-Jan-19 13:42 UTC
[Libguestfs] [PATCH 5/7] hivex: Move header checksum code into a function.
This is just code motion. We will reuse this function in a later patch in the 'write' series. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-p2v converts physical machines to virtual machines. Boot with a live CD or over the network (PXE) and turn machines into Xen guests. http://et.redhat.com/~rjones/virt-p2v -------------- next part -------------->From 8b8885c16fba2c3aa51f5f2f8ea29f23d1a025c9 Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Mon, 18 Jan 2010 17:56:13 +0000 Subject: [PATCH 5/7] hivex: Move header checksum code into a function. This function can be reused later. --- hivex/hivex.c | 24 ++++++++++++++++-------- 1 files changed, 16 insertions(+), 8 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index cb6c772..cb6fc62 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -262,6 +262,21 @@ struct ntreg_vk_record { char name[1]; /* key name follows here */ } __attribute__((__packed__)); +static uint32_t +header_checksum (hive_h *h) +{ + uint32_t *daddr = (uint32_t *) h->addr; + size_t i; + uint32_t sum = 0; + + for (i = 0; i < 0x1fc / 4; ++i) { + sum ^= le32toh (*daddr); + daddr++; + } + + return sum; +} + hive_h * hivex_open (const char *filename, int flags) { @@ -340,14 +355,7 @@ hivex_open (const char *filename, int flags) goto error; /* Header checksum. */ - uint32_t *daddr = (uint32_t *) h->addr; - size_t i; - uint32_t sum = 0; - for (i = 0; i < 0x1fc / 4; ++i) { - sum ^= le32toh (*daddr); - daddr++; - } - + uint32_t sum = header_checksum (h); if (sum != le32toh (h->hdr->csum)) { fprintf (stderr, "hivex: %s: bad checksum in hive header\n", filename); errno = EINVAL; -- 1.6.5.2
Richard W.M. Jones
2010-Jan-19 13:43 UTC
[Libguestfs] [PATCH 6/7] hivex: Add value_any callback to the visitor.
This patch should not change behaviour for existing clients. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones New in Fedora 11: Fedora Windows cross-compiler. Compile Windows programs, test, and build Windows installers. Over 70 libraries supprt'd http://fedoraproject.org/wiki/MinGW http://www.annexia.org/fedora_mingw -------------- next part -------------->From 2775c947aa502a4ef5023315294147018778ae2d Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Tue, 19 Jan 2010 10:06:00 +0000 Subject: [PATCH 6/7] hivex: Add value_any callback to the visitor. The visitor currently contains lots of value_* callbacks, such as value_string which is called back when the value has type string. This is fine but it makes it complicated to deal with the case where you just want to see 'a value', and don't care about its type. The value_any callback allows visitors to see values generically. --- hivex/hivex.c | 178 +++++++++++++++++++++++++++++-------------------------- hivex/hivex.h | 1 + hivex/hivex.pod | 5 ++ 3 files changed, 101 insertions(+), 83 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index cb6fc62..81d217b 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -1421,114 +1421,126 @@ hivex__visit_node (hive_h *h, hive_node_h node, goto error; } - switch (t) { - case hive_t_none: + if (vtor->value_any) { str = hivex_value_value (h, values[i], &t, &len); if (str == NULL) { ret = skip_bad ? 0 : -1; goto error; } - if (t != hive_t_none) { - ret = skip_bad ? 0 : -1; - goto error; - } - if (vtor->value_none && - vtor->value_none (h, opaque, node, values[i], t, len, key, str) == -1) + if (vtor->value_any (h, opaque, node, values[i], t, len, key, str) == -1) goto error; free (str); str = NULL; - break; - - case hive_t_string: - case hive_t_expand_string: - case hive_t_link: - str = hivex_value_string (h, values[i]); - if (str == NULL) { - if (errno != EILSEQ && errno != EINVAL) { + } + else { + switch (t) { + case hive_t_none: + str = hivex_value_value (h, values[i], &t, &len); + if (str == NULL) { ret = skip_bad ? 0 : -1; goto error; } - if (vtor->value_string_invalid_utf16) { - str = hivex_value_value (h, values[i], &t, &len); - if (vtor->value_string_invalid_utf16 (h, opaque, node, values[i], t, len, key, str) == -1) - goto error; - free (str); str = NULL; + if (t != hive_t_none) { + ret = skip_bad ? 0 : -1; + goto error; } + if (vtor->value_none && + vtor->value_none (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; break; - } - if (vtor->value_string && - vtor->value_string (h, opaque, node, values[i], t, len, key, str) == -1) - goto error; - free (str); str = NULL; - break; - - case hive_t_dword: - case hive_t_dword_be: { - int32_t i32 = hivex_value_dword (h, values[i]); - if (vtor->value_dword && - vtor->value_dword (h, opaque, node, values[i], t, len, key, i32) == -1) - goto error; - break; - } - case hive_t_qword: { - int64_t i64 = hivex_value_qword (h, values[i]); - if (vtor->value_qword && - vtor->value_qword (h, opaque, node, values[i], t, len, key, i64) == -1) - goto error; - break; - } + case hive_t_string: + case hive_t_expand_string: + case hive_t_link: + str = hivex_value_string (h, values[i]); + if (str == NULL) { + if (errno != EILSEQ && errno != EINVAL) { + ret = skip_bad ? 0 : -1; + goto error; + } + if (vtor->value_string_invalid_utf16) { + str = hivex_value_value (h, values[i], &t, &len); + if (vtor->value_string_invalid_utf16 (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; + } + break; + } + if (vtor->value_string && + vtor->value_string (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; + break; - case hive_t_binary: - str = hivex_value_value (h, values[i], &t, &len); - if (str == NULL) { - ret = skip_bad ? 0 : -1; - goto error; + case hive_t_dword: + case hive_t_dword_be: { + int32_t i32 = hivex_value_dword (h, values[i]); + if (vtor->value_dword && + vtor->value_dword (h, opaque, node, values[i], t, len, key, i32) == -1) + goto error; + break; } - if (t != hive_t_binary) { - ret = skip_bad ? 0 : -1; - goto error; + + case hive_t_qword: { + int64_t i64 = hivex_value_qword (h, values[i]); + if (vtor->value_qword && + vtor->value_qword (h, opaque, node, values[i], t, len, key, i64) == -1) + goto error; + break; } - if (vtor->value_binary && - vtor->value_binary (h, opaque, node, values[i], t, len, key, str) == -1) - goto error; - free (str); str = NULL; - break; - case hive_t_multiple_strings: - strs = hivex_value_multiple_strings (h, values[i]); - if (strs == NULL) { - if (errno != EILSEQ && errno != EINVAL) { + case hive_t_binary: + str = hivex_value_value (h, values[i], &t, &len); + if (str == NULL) { ret = skip_bad ? 0 : -1; goto error; } - if (vtor->value_string_invalid_utf16) { - str = hivex_value_value (h, values[i], &t, &len); - if (vtor->value_string_invalid_utf16 (h, opaque, node, values[i], t, len, key, str) == -1) + if (t != hive_t_binary) { + ret = skip_bad ? 0 : -1; + goto error; + } + if (vtor->value_binary && + vtor->value_binary (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; + break; + + case hive_t_multiple_strings: + strs = hivex_value_multiple_strings (h, values[i]); + if (strs == NULL) { + if (errno != EILSEQ && errno != EINVAL) { + ret = skip_bad ? 0 : -1; goto error; - free (str); str = NULL; + } + if (vtor->value_string_invalid_utf16) { + str = hivex_value_value (h, values[i], &t, &len); + if (vtor->value_string_invalid_utf16 (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; + } + break; } + if (vtor->value_multiple_strings && + vtor->value_multiple_strings (h, opaque, node, values[i], t, len, key, strs) == -1) + goto error; + free_strings (strs); strs = NULL; break; - } - if (vtor->value_multiple_strings && - vtor->value_multiple_strings (h, opaque, node, values[i], t, len, key, strs) == -1) - goto error; - free_strings (strs); strs = NULL; - break; - case hive_t_resource_list: - case hive_t_full_resource_description: - case hive_t_resource_requirements_list: - default: - str = hivex_value_value (h, values[i], &t, &len); - if (str == NULL) { - ret = skip_bad ? 0 : -1; - goto error; + case hive_t_resource_list: + case hive_t_full_resource_description: + case hive_t_resource_requirements_list: + default: + str = hivex_value_value (h, values[i], &t, &len); + if (str == NULL) { + ret = skip_bad ? 0 : -1; + goto error; + } + if (vtor->value_other && + vtor->value_other (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; + break; } - if (vtor->value_other && - vtor->value_other (h, opaque, node, values[i], t, len, key, str) == -1) - goto error; - free (str); str = NULL; - break; } free (key); key = NULL; diff --git a/hivex/hivex.h b/hivex/hivex.h index b0c1c3b..56718b4 100644 --- a/hivex/hivex.h +++ b/hivex/hivex.h @@ -102,6 +102,7 @@ struct hivex_visitor { int (*value_binary) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); int (*value_none) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); int (*value_other) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); + int (*value_any) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); }; #define HIVEX_VISIT_SKIP_BAD 1 diff --git a/hivex/hivex.pod b/hivex/hivex.pod index 0de4d54..5a58144 100644 --- a/hivex/hivex.pod +++ b/hivex/hivex.pod @@ -288,6 +288,11 @@ all, set the function pointer to NULL. hive_type t, size_t len, const char *key, const char *value); int (*value_other) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); + /* If value_any callback is not NULL, then the other value_* + * callbacks are not used, and value_any is called on all values. + */ + int (*value_any) (hive_h *, void *opaque, hive_node_h, hive_value_h, + hive_type t, size_t len, const char *key, const char *value); }; =over 4 -- 1.6.5.2
Richard W.M. Jones
2010-Jan-19 13:44 UTC
[Libguestfs] [PATCH 7/7] hivex: Modify children/values functions to return intermediate blocks.
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones New in Fedora 11: Fedora Windows cross-compiler. Compile Windows programs, test, and build Windows installers. Over 70 libraries supprt'd http://fedoraproject.org/wiki/MinGW http://www.annexia.org/fedora_mingw -------------- next part -------------->From 2743ba2a3c78c687e81397a2c18487651e8d846c Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Tue, 19 Jan 2010 12:22:10 +0000 Subject: [PATCH 7/7] hivex: Modify children/values functions to return intermediate blocks. Modify the functions that return child subnodes and values so they can also be used to return a list of the intermediate blocks. This is so we can delete those intermediate blocks (in a later commit). We also introduce an offset_list structure which is used for collecting lists of offsets, ie. lists of nodes, values or blocks. Note that this commit should not change the semantics of the code. --- hivex/hivex.c | 238 ++++++++++++++++++++++++++++++++++++++++----------------- 1 files changed, 168 insertions(+), 70 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index 81d217b..dfac896 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -644,34 +644,97 @@ hivex_node_classname (hive_h *h, hive_node_h node) } #endif -hive_node_h * -hivex_node_children (hive_h *h, hive_node_h node) +/* Structure for returning 0-terminated lists of offsets (nodes, + * values, etc). + */ +struct offset_list { + size_t *offsets; + size_t len; + size_t alloc; +}; + +static void +init_offset_list (struct offset_list *list) +{ + list->len = 0; + list->alloc = 0; + list->offsets = NULL; +} + +#define INIT_OFFSET_LIST(name) \ + struct offset_list name; \ + init_offset_list (&name) + +/* Preallocates the offset_list, but doesn't make the contents longer. */ +static int +grow_offset_list (struct offset_list *list, size_t alloc) +{ + assert (alloc >= list->len); + size_t *p = realloc (list->offsets, alloc * sizeof (size_t)); + if (p == NULL) + return -1; + list->offsets = p; + list->alloc = alloc; + return 0; +} + +static int +add_to_offset_list (struct offset_list *list, size_t offset) +{ + if (list->len >= list->alloc) { + if (grow_offset_list (list, list->alloc ? list->alloc * 2 : 4) == -1) + return -1; + } + list->offsets[list->len] = offset; + list->len++; + return 0; +} + +static void +free_offset_list (struct offset_list *list) +{ + free (list->offsets); +} + +static size_t * +return_offset_list (struct offset_list *list) +{ + if (add_to_offset_list (list, 0) == -1) + return NULL; + return list->offsets; /* caller frees */ +} + +/* Iterate over children, returning child nodes and intermediate blocks. */ +static int +get_children (hive_h *h, hive_node_h node, + hive_node_h **children_ret, size_t **blocks_ret) { if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) { errno = EINVAL; - return NULL; + return -1; } struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node); size_t nr_subkeys_in_nk = le32toh (nk->nr_subkeys); + INIT_OFFSET_LIST (children); + INIT_OFFSET_LIST (blocks); + /* Deal with the common "no subkeys" case quickly. */ - hive_node_h *ret; - if (nr_subkeys_in_nk == 0) { - ret = malloc (sizeof (hive_node_h)); - if (ret == NULL) - return NULL; - ret[0] = 0; - return ret; - } + if (nr_subkeys_in_nk == 0) + goto ok; /* Arbitrarily limit the number of subkeys we will ever deal with. */ if (nr_subkeys_in_nk > 1000000) { errno = ERANGE; - return NULL; + goto error; } + /* Preallocate space for the children. */ + if (grow_offset_list (&children, nr_subkeys_in_nk) == -1) + goto error; + /* The subkey_lf field can point either to an lf-record, which is * the common case, or if there are lots of subkeys, to an * ri-record. @@ -683,9 +746,12 @@ hivex_node_children (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_children: returning EFAULT because subkey_lf is not a valid block (%zu)\n", subkey_lf); errno = EFAULT; - return NULL; + goto error; } + if (add_to_offset_list (&blocks, subkey_lf) == -1) + goto error; + struct ntreg_hbin_block *block (struct ntreg_hbin_block *) (h->addr + subkey_lf); @@ -706,7 +772,7 @@ hivex_node_children (hive_h *h, hive_node_h node) if (nr_subkeys_in_nk != nr_subkeys_in_lf) { errno = ENOTSUP; - return NULL; + goto error; } size_t len = block_len (h, subkey_lf, NULL); @@ -715,16 +781,9 @@ hivex_node_children (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_children: returning EFAULT because too many subkeys (%zu, %zu)\n", nr_subkeys_in_lf, len); errno = EFAULT; - return NULL; + goto error; } - /* Allocate space for the returned values. Note that - * nr_subkeys_in_lf is limited to a 16 bit value. - */ - ret = malloc ((1 + nr_subkeys_in_lf) * sizeof (hive_node_h)); - if (ret == NULL) - return NULL; - size_t i; for (i = 0; i < nr_subkeys_in_lf; ++i) { hive_node_h subkey = lf->keys[i].offset; @@ -734,13 +793,12 @@ hivex_node_children (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_children: returning EFAULT because subkey is not a valid block (0x%zx)\n", subkey); errno = EFAULT; - free (ret); - return NULL; + goto error; } - ret[i] = subkey; + if (add_to_offset_list (&children, subkey) == -1) + goto error; } - ret[i] = 0; - return ret; + goto ok; } /* Points to ri-record? */ else if (block->id[0] == 'r' && block->id[1] == 'i') { @@ -758,13 +816,16 @@ hivex_node_children (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_children: returning EFAULT because ri-offset is not a valid block (0x%zx)\n", offset); errno = EFAULT; - return NULL; + goto error; } if (!BLOCK_ID_EQ (h, offset, "lf") && !BLOCK_ID_EQ (h, offset, "lh")) { errno = ENOTSUP; - return NULL; + goto error; } + if (add_to_offset_list (&blocks, offset) == -1) + goto error; + struct ntreg_lf_record *lf (struct ntreg_lf_record *) (h->addr + offset); @@ -777,17 +838,12 @@ hivex_node_children (hive_h *h, hive_node_h node) if (nr_subkeys_in_nk != count) { errno = ENOTSUP; - return NULL; + goto error; } /* Copy list of children. Note nr_subkeys_in_nk is limited to * something reasonable above. */ - ret = malloc ((1 + nr_subkeys_in_nk) * sizeof (hive_node_h)); - if (ret == NULL) - return NULL; - - count = 0; for (i = 0; i < nr_offsets; ++i) { hive_node_h offset = ri->offset[i]; offset += 0x1000; @@ -796,11 +852,11 @@ hivex_node_children (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_children: returning EFAULT because ri-offset is not a valid block (0x%zx)\n", offset); errno = EFAULT; - return NULL; + goto error; } if (!BLOCK_ID_EQ (h, offset, "lf") && !BLOCK_ID_EQ (h, offset, "lh")) { errno = ENOTSUP; - return NULL; + goto error; } struct ntreg_lf_record *lf @@ -815,20 +871,40 @@ hivex_node_children (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_children: returning EFAULT because indirect subkey is not a valid block (0x%zx)\n", subkey); errno = EFAULT; - free (ret); - return NULL; + goto error; } - ret[count++] = subkey; + if (add_to_offset_list (&children, subkey) == -1) + goto error; } } - ret[count] = 0; - - return ret; + goto ok; } - else { - errno = ENOTSUP; + /* else not supported, set errno and fall through */ + errno = ENOTSUP; + error: + free_offset_list (&children); + free_offset_list (&blocks); + return -1; + + ok: + *children_ret = return_offset_list (&children); + *blocks_ret = return_offset_list (&blocks); + if (!*children_ret || !*blocks_ret) + goto error; + return 0; +} + +hive_node_h * +hivex_node_children (hive_h *h, hive_node_h node) +{ + hive_node_h *children; + size_t *blocks; + + if (get_children (h, node, &children, &blocks) == -1) return NULL; - } + + free (blocks); + return children; } /* Very inefficient, but at least having a separate API call @@ -883,12 +959,13 @@ hivex_node_parent (hive_h *h, hive_node_h node) return ret; } -hive_value_h * -hivex_node_values (hive_h *h, hive_node_h node) +static int +get_values (hive_h *h, hive_node_h node, + hive_value_h **values_ret, size_t **blocks_ret) { if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) { errno = EINVAL; - return 0; + return -1; } struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node); @@ -898,22 +975,23 @@ hivex_node_values (hive_h *h, hive_node_h node) if (h->msglvl >= 2) fprintf (stderr, "hivex_node_values: nr_values = %zu\n", nr_values); + INIT_OFFSET_LIST (values); + INIT_OFFSET_LIST (blocks); + /* Deal with the common "no values" case quickly. */ - hive_node_h *ret; - if (nr_values == 0) { - ret = malloc (sizeof (hive_node_h)); - if (ret == NULL) - return NULL; - ret[0] = 0; - return ret; - } + if (nr_values == 0) + goto ok; /* Arbitrarily limit the number of values we will ever deal with. */ if (nr_values > 100000) { errno = ERANGE; - return NULL; + goto error; } + /* Preallocate space for the values. */ + if (grow_offset_list (&values, nr_values) == -1) + goto error; + /* Get the value list and check it looks reasonable. */ size_t vlist_offset = le32toh (nk->vallist); vlist_offset += 0x1000; @@ -922,9 +1000,12 @@ hivex_node_values (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_values: returning EFAULT because value list is not a valid block (0x%zx)\n", vlist_offset); errno = EFAULT; - return NULL; + goto error; } + if (add_to_offset_list (&blocks, vlist_offset) == -1) + goto error; + struct ntreg_value_list *vlist (struct ntreg_value_list *) (h->addr + vlist_offset); @@ -934,14 +1015,9 @@ hivex_node_values (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_values: returning EFAULT because value list is too long (%zu, %zu)\n", nr_values, len); errno = EFAULT; - return NULL; + goto error; } - /* Allocate return array and copy values in. */ - ret = malloc ((1 + nr_values) * sizeof (hive_node_h)); - if (ret == NULL) - return NULL; - size_t i; for (i = 0; i < nr_values; ++i) { hive_node_h value = vlist->offset[i]; @@ -951,14 +1027,36 @@ hivex_node_values (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_values: returning EFAULT because value is not a valid block (0x%zx)\n", value); errno = EFAULT; - free (ret); - return NULL; + goto error; } - ret[i] = value; + if (add_to_offset_list (&values, value) == -1) + goto error; } - ret[i] = 0; - return ret; + ok: + *values_ret = return_offset_list (&values); + *blocks_ret = return_offset_list (&blocks); + if (!*values_ret || !*blocks_ret) + goto error; + return 0; + + error: + free_offset_list (&values); + free_offset_list (&blocks); + return -1; +} + +hive_value_h * +hivex_node_values (hive_h *h, hive_node_h node) +{ + hive_value_h *values; + size_t *blocks; + + if (get_values (h, node, &values, &blocks) == -1) + return NULL; + + free (blocks); + return values; } /* Very inefficient, but at least having a separate API call -- 1.6.5.2
Apparently Analagous Threads
- [PATCH 0/13 v2] Prepare for adding write support to hivex (Windows registry) library
- [hivex PATCH 0/5] Fix various uninitialized data problems in hivex.
- [hivex] [PATCH 1/1] hivexml: Change value type output to standard names
- [PATCH] hivex: add hivex_set_value api call and perl bindings, tests
- [PATCH libguestfs] hivex: fail upon integer overflow