Richard W.M. Jones
2010-Jan-28 10:14 UTC
[Libguestfs] [PATCH 0/13 v2] Prepare for adding write support to hivex (Windows registry) library
This series of patches, now up to 13 parts, contains lots of bug fixes and groundwork required before we add write support to the hivex library. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming blog: http://rwmj.wordpress.com Fedora now supports 80 OCaml packages (the OPEN alternative to F#) http://cocan.org/getting_started_with_ocaml_on_red_hat_and_fedora
Richard W.M. Jones
2010-Jan-28 10:16 UTC
[Libguestfs] [PATCH 1/13] hivex: Store filename in hive handle.
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-p2v converts physical machines to virtual machines. Boot with a live CD or over the network (PXE) and turn machines into Xen guests. http://et.redhat.com/~rjones/virt-p2v -------------- next part -------------->From 6a69ac7b31abc628f6fe52c8a241b699d3f98c07 Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Mon, 18 Jan 2010 10:58:06 +0000 Subject: [PATCH 01/13] hivex: Store filename in hive handle. --- hivex/hivex.c | 7 +++++++ 1 files changed, 7 insertions(+), 0 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index 6129017..a760300 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -89,6 +89,7 @@ static char *windows_utf16_to_utf8 (/* const */ char *input, size_t len); struct hive_h { + char *filename; int fd; size_t size; int msglvl; @@ -281,6 +282,10 @@ hivex_open (const char *filename, int flags) if (h->msglvl >= 2) fprintf (stderr, "hivex_open: created handle %p\n", h); + h->filename = strdup (filename); + if (h->filename == NULL) + goto error; + h->fd = open (filename, O_RDONLY); if (h->fd == -1) goto error; @@ -482,6 +487,7 @@ hivex_open (const char *filename, int flags) munmap (h->addr, h->size); if (h->fd >= 0) close (h->fd); + free (h->filename); free (h); } errno = err; @@ -496,6 +502,7 @@ hivex_close (hive_h *h) free (h->bitmap); munmap (h->addr, h->size); r = close (h->fd); + free (h->filename); free (h); return r; -- 1.6.5.2
Richard W.M. Jones
2010-Jan-28 10:16 UTC
[Libguestfs] [PATCH 2/13] hivex: Add HIVEX_OPEN_WRITE flag to allow hive to be
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-df lists disk usage of guests without needing to install any software inside the virtual machine. Supports Linux and Windows. http://et.redhat.com/~rjones/virt-df/ -------------- next part -------------->From 699d7fa711968a8fd3904587d2bf4188ac32681c Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Mon, 18 Jan 2010 11:08:56 +0000 Subject: [PATCH 02/13] hivex: Add HIVEX_OPEN_WRITE flag to allow hive to be opened for writing. If this flag is omitted (as in the case for all existing callers) then the hive is still opened read-only. We add a 'writable' flag to the hive handle, and we change the way that the hive file (data) is stored. The data is still mmapped if the file is opened read-only, since that is more efficient and allows us to handle larger hives. However if we need to write to the file then we have to read it all into memory, since if we had to extend the file we need to realloc that data. Note the manpage section L</WRITING TO HIVE FILES> comes in a later commit. --- bootstrap | 1 + hivex/Makefile.am | 10 +++++----- hivex/README | 3 --- hivex/hivex.c | 44 ++++++++++++++++++++++++++++++++++---------- hivex/hivex.h | 4 +++- hivex/hivex.pod | 15 +++++++++++---- 6 files changed, 54 insertions(+), 23 deletions(-) diff --git a/bootstrap b/bootstrap index 7010eca..e743a4b 100755 --- a/bootstrap +++ b/bootstrap @@ -60,6 +60,7 @@ modules=' arpa_inet c-ctype closeout +full-read full-write gitlog-to-changelog gnu-make diff --git a/hivex/Makefile.am b/hivex/Makefile.am index b73aa2f..a2be7e3 100644 --- a/hivex/Makefile.am +++ b/hivex/Makefile.am @@ -23,16 +23,16 @@ libhivex_la_SOURCES = \ hivex.c \ hivex.h -libhivex_la_LDFLAGS = -version-info 0:0:0 -libhivex_la_CFLAGS = \ - $(WARN_CFLAGS) $(WERROR_CFLAGS) +libhivex_la_LDFLAGS = -version-info 0:0:0 $(LTLIBINTL) $(LTLIBTHREAD) +libhivex_la_CFLAGS = $(WARN_CFLAGS) $(WERROR_CFLAGS) +libhivex_la_CPPFLAGS = -I$(top_srcdir)/gnulib/lib bin_PROGRAMS = hivexml hivexget hivexml_SOURCES = \ hivexml.c -hivexml_LDADD = libhivex.la $(LIBXML2_LIBS) +hivexml_LDADD = libhivex.la $(LIBXML2_LIBS) ../gnulib/lib/libgnu.la hivexml_CFLAGS = \ $(LIBXML2_CFLAGS) \ $(WARN_CFLAGS) $(WERROR_CFLAGS) @@ -40,7 +40,7 @@ hivexml_CFLAGS = \ hivexget_SOURCES = \ hivexget.c -hivexget_LDADD = libhivex.la +hivexget_LDADD = libhivex.la ../gnulib/lib/libgnu.la hivexget_CFLAGS = \ $(WARN_CFLAGS) $(WERROR_CFLAGS) diff --git a/hivex/README b/hivex/README index 5e7d21f..583d351 100644 --- a/hivex/README +++ b/hivex/README @@ -5,9 +5,6 @@ Copyright (C) 2009-2010 Red Hat Inc. This is a self-contained library for reading Windows Registry "hive" binary files. -It is totally dedicated to reading the files and doesn't deal with -writing or modifying them in any way. - Unlike many other tools in this area, it doesn't use the textual .REG format for output, because parsing that is as much trouble as parsing the original binary format. Instead it makes the file available diff --git a/hivex/hivex.c b/hivex/hivex.c index a760300..849049c 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -40,6 +40,12 @@ #include <byteswap.h> #endif +#include "full-read.h" + +#ifndef O_CLOEXEC +#define O_CLOEXEC 0 +#endif + #define STREQ(a,b) (strcmp((a),(b)) == 0) #define STRCASEEQ(a,b) (strcasecmp((a),(b)) == 0) //#define STRNEQ(a,b) (strcmp((a),(b)) != 0) @@ -93,8 +99,9 @@ struct hive_h { int fd; size_t size; int msglvl; + int writable; - /* Memory-mapped (readonly) registry file. */ + /* Registry file, memory mapped if read-only, or malloc'd if writing. */ union { char *addr; struct ntreg_header *hdr; @@ -282,11 +289,12 @@ hivex_open (const char *filename, int flags) if (h->msglvl >= 2) fprintf (stderr, "hivex_open: created handle %p\n", h); + h->writable = !!(flags & HIVEX_OPEN_WRITE); h->filename = strdup (filename); if (h->filename == NULL) goto error; - h->fd = open (filename, O_RDONLY); + h->fd = open (filename, O_RDONLY | O_CLOEXEC); if (h->fd == -1) goto error; @@ -296,12 +304,21 @@ hivex_open (const char *filename, int flags) h->size = statbuf.st_size; - h->addr = mmap (NULL, h->size, PROT_READ, MAP_SHARED, h->fd, 0); - if (h->addr == MAP_FAILED) - goto error; + if (!h->writable) { + h->addr = mmap (NULL, h->size, PROT_READ, MAP_SHARED, h->fd, 0); + if (h->addr == MAP_FAILED) + goto error; - if (h->msglvl >= 2) - fprintf (stderr, "hivex_open: mapped file at %p\n", h->addr); + if (h->msglvl >= 2) + fprintf (stderr, "hivex_open: mapped file at %p\n", h->addr); + } else { + h->addr = malloc (h->size); + if (h->addr == NULL) + goto error; + + if (full_read (h->fd, h->addr, h->size) < h->size) + goto error; + } /* Check header. */ if (h->hdr->magic[0] != 'r' || @@ -483,8 +500,12 @@ hivex_open (const char *filename, int flags) int err = errno; if (h) { free (h->bitmap); - if (h->addr && h->size && h->addr != MAP_FAILED) - munmap (h->addr, h->size); + if (h->addr && h->size && h->addr != MAP_FAILED) { + if (!h->writable) + munmap (h->addr, h->size); + else + free (h->addr); + } if (h->fd >= 0) close (h->fd); free (h->filename); @@ -500,7 +521,10 @@ hivex_close (hive_h *h) int r; free (h->bitmap); - munmap (h->addr, h->size); + if (!h->writable) + munmap (h->addr, h->size); + else + free (h->addr); r = close (h->fd); free (h->filename); free (h); diff --git a/hivex/hivex.h b/hivex/hivex.h index 14bdcc5..b0c1c3b 100644 --- a/hivex/hivex.h +++ b/hivex/hivex.h @@ -69,9 +69,11 @@ enum hive_type { typedef enum hive_type hive_type; +/* Bitmask of flags passed to hivex_open. */ #define HIVEX_OPEN_VERBOSE 1 #define HIVEX_OPEN_DEBUG 2 -#define HIVEX_OPEN_MSGLVL_MASK 3 +#define HIVEX_OPEN_MSGLVL_MASK (HIVEX_OPEN_VERBOSE|HIVEX_OPEN_DEBUG) +#define HIVEX_OPEN_WRITE 4 extern hive_h *hivex_open (const char *filename, int flags); extern int hivex_close (hive_h *h); diff --git a/hivex/hivex.pod b/hivex/hivex.pod index 0cc91af..0de4d54 100644 --- a/hivex/hivex.pod +++ b/hivex/hivex.pod @@ -13,8 +13,7 @@ hivex - Windows Registry "hive" extraction library libhivex is a library for extracting the contents of Windows Registry "hive" files. It is designed to be secure against buggy or malicious -registry files, and to have limited functionality (writing or -modifying these files is not in the scope of this library). +registry files. Unlike many other tools in this area, it doesn't use the textual .REG format for output, because parsing that is as much trouble as parsing @@ -32,8 +31,7 @@ L<hivexget(1)>). Opens the hive named C<filename> for reading. Flags is an ORed list of the open flags (or C<0> if you don't -want to pass any flags). Currently the only -flags defined are: +want to pass any flags). These flags are defined: =over 4 @@ -49,6 +47,12 @@ itself. This is also selected if the C<HIVEX_DEBUG> environment variable is set to 1. +=item HIVEX_OPEN_WRITE + +Open the hive for writing. If omitted, the hive is read-only. + +See L</WRITING TO HIVE FILES>. + =back C<hivex_open> returns a hive handle. On error this returns NULL and @@ -58,6 +62,9 @@ sets C<errno> to indicate the error. Close a hive handle and free all associated resources. +Note that any uncommitted writes are I<not> committed by this call, +but instead are lost. See L</WRITING TO HIVE FILES>. + Returns 0 on success. On error this returns -1 and sets errno. =back -- 1.6.5.2
Richard W.M. Jones
2010-Jan-28 10:17 UTC
[Libguestfs] [PATCH 3/13] hivex: Collect more statistics about registries.
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-df lists disk usage of guests without needing to install any software inside the virtual machine. Supports Linux and Windows. http://et.redhat.com/~rjones/virt-df/ -------------- next part -------------->From 297c2213b683b7f9ed05501098a31166d0c62721 Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Mon, 18 Jan 2010 14:14:40 +0000 Subject: [PATCH 03/13] hivex: Collect more statistics about registries. --- hivex/hivex.c | 51 +++++++++++++++++++++++++++++++-------------------- 1 files changed, 31 insertions(+), 20 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index 849049c..365f328 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -127,12 +127,6 @@ struct hive_h { /* Fields from the header, extracted from little-endianness hell. */ size_t rootoffs; /* Root key offset (always an nk-block). */ size_t endpages; /* Offset of end of pages. */ - - /* Stats. */ - size_t pages; /* Number of hbin pages read. */ - size_t blocks; /* Total number of blocks found. */ - size_t used_blocks; /* Total number of used blocks found. */ - size_t used_size; /* Total size (bytes) of used blocks. */ }; /* NB. All fields are little endian. */ @@ -393,6 +387,14 @@ hivex_open (const char *filename, int flags) */ int seen_root_block = 0, bad_root_block = 0; + /* Collect some stats. */ + size_t pages = 0; /* Number of hbin pages read. */ + size_t smallest_page = SIZE_MAX, largest_page = 0; + size_t blocks = 0; /* Total number of blocks found. */ + size_t smallest_block = SIZE_MAX, largest_block = 0, blocks_bytes = 0; + size_t used_blocks = 0; /* Total number of used blocks found. */ + size_t used_size = 0; /* Total size (bytes) of used blocks. */ + /* Read the pages and blocks. The aim here is to be robust against * corrupt or malicious registries. So we make sure the loops * always make forward progress. We add the address of each block @@ -411,14 +413,17 @@ hivex_open (const char *filename, int flags) page->magic[2] != 'i' || page->magic[3] != 'n') { fprintf (stderr, "hivex: %s: trailing garbage at end of file (at 0x%zx, after %zu pages)\n", - filename, off, h->pages); + filename, off, pages); errno = ENOTSUP; goto error; } + size_t page_size = le32toh (page->offset_next); if (h->msglvl >= 2) - fprintf (stderr, "hivex_open: page at 0x%zx\n", off); - h->pages++; + fprintf (stderr, "hivex_open: page at 0x%zx, size %zu\n", off, page_size); + pages++; + if (page_size < smallest_page) smallest_page = page_size; + if (page_size > largest_page) largest_page = page_size; if (le32toh (page->offset_next) <= sizeof (struct ntreg_hbin_page) || (le32toh (page->offset_next) & 3) != 0) { @@ -431,11 +436,11 @@ hivex_open (const char *filename, int flags) /* Read the blocks in this page. */ size_t blkoff; struct ntreg_hbin_block *block; - int32_t seg_len; + size_t seg_len; for (blkoff = off + 0x20; blkoff < off + le32toh (page->offset_next); blkoff += seg_len) { - h->blocks++; + blocks++; int is_root = blkoff == h->rootoffs; if (is_root) @@ -452,16 +457,20 @@ hivex_open (const char *filename, int flags) } if (h->msglvl >= 2) - fprintf (stderr, "hivex_open: %s block id %d,%d at 0x%zx%s\n", + fprintf (stderr, "hivex_open: %s block id %d,%d at 0x%zx size %zu%s\n", used ? "used" : "free", block->id[0], block->id[1], blkoff, - is_root ? " (root)" : ""); + seg_len, is_root ? " (root)" : ""); + + blocks_bytes += seg_len; + if (seg_len < smallest_block) smallest_block = seg_len; + if (seg_len > largest_block) largest_block = seg_len; if (is_root && !used) bad_root_block = 1; if (used) { - h->used_blocks++; - h->used_size += seg_len; + used_blocks++; + used_size += seg_len; /* Root block must be an nk-block. */ if (is_root && (block->id[0] != 'n' || block->id[1] != 'k')) @@ -488,11 +497,13 @@ hivex_open (const char *filename, int flags) if (h->msglvl >= 1) fprintf (stderr, "hivex_open: successfully read Windows Registry hive file:\n" - " pages: %zu\n" - " blocks: %zu\n" - " blocks used: %zu\n" - " bytes used: %zu\n", - h->pages, h->blocks, h->used_blocks, h->used_size); + " pages: %zu [sml: %zu, lge: %zu]\n" + " blocks: %zu [sml: %zu, avg: %zu, lge: %zu]\n" + " blocks used: %zu\n" + " bytes used: %zu\n", + pages, smallest_page, largest_page, + blocks, smallest_block, blocks_bytes / blocks, largest_block, + used_blocks, used_size); return h; -- 1.6.5.2
Richard W.M. Jones
2010-Jan-28 10:17 UTC
[Libguestfs] [PATCH 4/13] hivex: page 'offset_next' field is really 'page_size'.
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones New in Fedora 11: Fedora Windows cross-compiler. Compile Windows programs, test, and build Windows installers. Over 70 libraries supprt'd http://fedoraproject.org/wiki/MinGW http://www.annexia.org/fedora_mingw -------------- next part -------------->From 9bfb450519680670125ac1af78d85d77aa8b8708 Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Mon, 18 Jan 2010 15:24:16 +0000 Subject: [PATCH 04/13] hivex: page 'offset_next' field is really 'page_size'. The documentation, as usual, is contradictory. However this field is definitely the page size in all observed registries. Furthermore the following field marked 'unknown' is always zero, although this contradicts what the sentinelchicken.com paper says. --- hivex/hivex.c | 16 ++++++++-------- 1 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index 365f328..cb6c772 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -168,7 +168,7 @@ struct ntreg_header { struct ntreg_hbin_page { char magic[4]; /* "hbin" */ uint32_t offset_first; /* offset from 1st block */ - uint32_t offset_next; /* offset of next (relative to this) */ + uint32_t page_size; /* size of this page (multiple of 4KB) */ char unknown[20]; /* Linked list of blocks follows here. */ } __attribute__((__packed__)); @@ -403,7 +403,7 @@ hivex_open (const char *filename, int flags) */ size_t off; struct ntreg_hbin_page *page; - for (off = 0x1000; off < h->size; off += le32toh (page->offset_next)) { + for (off = 0x1000; off < h->size; off += le32toh (page->page_size)) { if (off >= h->endpages) break; @@ -418,17 +418,17 @@ hivex_open (const char *filename, int flags) goto error; } - size_t page_size = le32toh (page->offset_next); + size_t page_size = le32toh (page->page_size); if (h->msglvl >= 2) fprintf (stderr, "hivex_open: page at 0x%zx, size %zu\n", off, page_size); pages++; if (page_size < smallest_page) smallest_page = page_size; if (page_size > largest_page) largest_page = page_size; - if (le32toh (page->offset_next) <= sizeof (struct ntreg_hbin_page) || - (le32toh (page->offset_next) & 3) != 0) { - fprintf (stderr, "hivex: %s: pagesize %d at %zu, bad registry\n", - filename, le32toh (page->offset_next), off); + if (page_size <= sizeof (struct ntreg_hbin_page) || + (page_size & 0x0fff) != 0) { + fprintf (stderr, "hivex: %s: page size %zu at 0x%zx, bad registry\n", + filename, page_size, off); errno = ENOTSUP; goto error; } @@ -438,7 +438,7 @@ hivex_open (const char *filename, int flags) struct ntreg_hbin_block *block; size_t seg_len; for (blkoff = off + 0x20; - blkoff < off + le32toh (page->offset_next); + blkoff < off + page_size; blkoff += seg_len) { blocks++; -- 1.6.5.2
Richard W.M. Jones
2010-Jan-28 10:17 UTC
[Libguestfs] [PATCH 5/13] hivex: Move header checksum code into a function.
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-top is 'top' for virtual machines. Tiny program with many powerful monitoring features, net stats, disk stats, logging, etc. http://et.redhat.com/~rjones/virt-top -------------- next part -------------->From d19d554663a20b40a06e7b126f8110490280ae2b Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Mon, 18 Jan 2010 17:56:13 +0000 Subject: [PATCH 05/13] hivex: Move header checksum code into a function. This function can be reused later. --- hivex/hivex.c | 24 ++++++++++++++++-------- 1 files changed, 16 insertions(+), 8 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index cb6c772..cb6fc62 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -262,6 +262,21 @@ struct ntreg_vk_record { char name[1]; /* key name follows here */ } __attribute__((__packed__)); +static uint32_t +header_checksum (hive_h *h) +{ + uint32_t *daddr = (uint32_t *) h->addr; + size_t i; + uint32_t sum = 0; + + for (i = 0; i < 0x1fc / 4; ++i) { + sum ^= le32toh (*daddr); + daddr++; + } + + return sum; +} + hive_h * hivex_open (const char *filename, int flags) { @@ -340,14 +355,7 @@ hivex_open (const char *filename, int flags) goto error; /* Header checksum. */ - uint32_t *daddr = (uint32_t *) h->addr; - size_t i; - uint32_t sum = 0; - for (i = 0; i < 0x1fc / 4; ++i) { - sum ^= le32toh (*daddr); - daddr++; - } - + uint32_t sum = header_checksum (h); if (sum != le32toh (h->hdr->csum)) { fprintf (stderr, "hivex: %s: bad checksum in hive header\n", filename); errno = EINVAL; -- 1.6.5.2
Richard W.M. Jones
2010-Jan-28 10:18 UTC
[Libguestfs] [PATCH 6/13] hivex: Add value_any callback to the visitor.
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-top is 'top' for virtual machines. Tiny program with many powerful monitoring features, net stats, disk stats, logging, etc. http://et.redhat.com/~rjones/virt-top -------------- next part -------------->From 83f27fdbca43dba3706c3c7b21f113dfa2bbf946 Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Tue, 19 Jan 2010 10:06:00 +0000 Subject: [PATCH 06/13] hivex: Add value_any callback to the visitor. The visitor currently contains lots of value_* callbacks, such as value_string which is called back when the value has type string. This is fine but it makes it complicated to deal with the case where you just want to see 'a value', and don't care about its type. The value_any callback allows visitors to see values generically. --- hivex/hivex.c | 178 +++++++++++++++++++++++++++++-------------------------- hivex/hivex.h | 1 + hivex/hivex.pod | 5 ++ 3 files changed, 101 insertions(+), 83 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index cb6fc62..81d217b 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -1421,114 +1421,126 @@ hivex__visit_node (hive_h *h, hive_node_h node, goto error; } - switch (t) { - case hive_t_none: + if (vtor->value_any) { str = hivex_value_value (h, values[i], &t, &len); if (str == NULL) { ret = skip_bad ? 0 : -1; goto error; } - if (t != hive_t_none) { - ret = skip_bad ? 0 : -1; - goto error; - } - if (vtor->value_none && - vtor->value_none (h, opaque, node, values[i], t, len, key, str) == -1) + if (vtor->value_any (h, opaque, node, values[i], t, len, key, str) == -1) goto error; free (str); str = NULL; - break; - - case hive_t_string: - case hive_t_expand_string: - case hive_t_link: - str = hivex_value_string (h, values[i]); - if (str == NULL) { - if (errno != EILSEQ && errno != EINVAL) { + } + else { + switch (t) { + case hive_t_none: + str = hivex_value_value (h, values[i], &t, &len); + if (str == NULL) { ret = skip_bad ? 0 : -1; goto error; } - if (vtor->value_string_invalid_utf16) { - str = hivex_value_value (h, values[i], &t, &len); - if (vtor->value_string_invalid_utf16 (h, opaque, node, values[i], t, len, key, str) == -1) - goto error; - free (str); str = NULL; + if (t != hive_t_none) { + ret = skip_bad ? 0 : -1; + goto error; } + if (vtor->value_none && + vtor->value_none (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; break; - } - if (vtor->value_string && - vtor->value_string (h, opaque, node, values[i], t, len, key, str) == -1) - goto error; - free (str); str = NULL; - break; - case hive_t_dword: - case hive_t_dword_be: { - int32_t i32 = hivex_value_dword (h, values[i]); - if (vtor->value_dword && - vtor->value_dword (h, opaque, node, values[i], t, len, key, i32) == -1) - goto error; - break; - } - - case hive_t_qword: { - int64_t i64 = hivex_value_qword (h, values[i]); - if (vtor->value_qword && - vtor->value_qword (h, opaque, node, values[i], t, len, key, i64) == -1) - goto error; - break; - } + case hive_t_string: + case hive_t_expand_string: + case hive_t_link: + str = hivex_value_string (h, values[i]); + if (str == NULL) { + if (errno != EILSEQ && errno != EINVAL) { + ret = skip_bad ? 0 : -1; + goto error; + } + if (vtor->value_string_invalid_utf16) { + str = hivex_value_value (h, values[i], &t, &len); + if (vtor->value_string_invalid_utf16 (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; + } + break; + } + if (vtor->value_string && + vtor->value_string (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; + break; - case hive_t_binary: - str = hivex_value_value (h, values[i], &t, &len); - if (str == NULL) { - ret = skip_bad ? 0 : -1; - goto error; + case hive_t_dword: + case hive_t_dword_be: { + int32_t i32 = hivex_value_dword (h, values[i]); + if (vtor->value_dword && + vtor->value_dword (h, opaque, node, values[i], t, len, key, i32) == -1) + goto error; + break; } - if (t != hive_t_binary) { - ret = skip_bad ? 0 : -1; - goto error; + + case hive_t_qword: { + int64_t i64 = hivex_value_qword (h, values[i]); + if (vtor->value_qword && + vtor->value_qword (h, opaque, node, values[i], t, len, key, i64) == -1) + goto error; + break; } - if (vtor->value_binary && - vtor->value_binary (h, opaque, node, values[i], t, len, key, str) == -1) - goto error; - free (str); str = NULL; - break; - case hive_t_multiple_strings: - strs = hivex_value_multiple_strings (h, values[i]); - if (strs == NULL) { - if (errno != EILSEQ && errno != EINVAL) { + case hive_t_binary: + str = hivex_value_value (h, values[i], &t, &len); + if (str == NULL) { + ret = skip_bad ? 0 : -1; + goto error; + } + if (t != hive_t_binary) { ret = skip_bad ? 0 : -1; goto error; } - if (vtor->value_string_invalid_utf16) { - str = hivex_value_value (h, values[i], &t, &len); - if (vtor->value_string_invalid_utf16 (h, opaque, node, values[i], t, len, key, str) == -1) + if (vtor->value_binary && + vtor->value_binary (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; + break; + + case hive_t_multiple_strings: + strs = hivex_value_multiple_strings (h, values[i]); + if (strs == NULL) { + if (errno != EILSEQ && errno != EINVAL) { + ret = skip_bad ? 0 : -1; goto error; - free (str); str = NULL; + } + if (vtor->value_string_invalid_utf16) { + str = hivex_value_value (h, values[i], &t, &len); + if (vtor->value_string_invalid_utf16 (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; + } + break; } + if (vtor->value_multiple_strings && + vtor->value_multiple_strings (h, opaque, node, values[i], t, len, key, strs) == -1) + goto error; + free_strings (strs); strs = NULL; break; - } - if (vtor->value_multiple_strings && - vtor->value_multiple_strings (h, opaque, node, values[i], t, len, key, strs) == -1) - goto error; - free_strings (strs); strs = NULL; - break; - case hive_t_resource_list: - case hive_t_full_resource_description: - case hive_t_resource_requirements_list: - default: - str = hivex_value_value (h, values[i], &t, &len); - if (str == NULL) { - ret = skip_bad ? 0 : -1; - goto error; + case hive_t_resource_list: + case hive_t_full_resource_description: + case hive_t_resource_requirements_list: + default: + str = hivex_value_value (h, values[i], &t, &len); + if (str == NULL) { + ret = skip_bad ? 0 : -1; + goto error; + } + if (vtor->value_other && + vtor->value_other (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; + break; } - if (vtor->value_other && - vtor->value_other (h, opaque, node, values[i], t, len, key, str) == -1) - goto error; - free (str); str = NULL; - break; } free (key); key = NULL; diff --git a/hivex/hivex.h b/hivex/hivex.h index b0c1c3b..56718b4 100644 --- a/hivex/hivex.h +++ b/hivex/hivex.h @@ -102,6 +102,7 @@ struct hivex_visitor { int (*value_binary) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); int (*value_none) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); int (*value_other) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); + int (*value_any) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); }; #define HIVEX_VISIT_SKIP_BAD 1 diff --git a/hivex/hivex.pod b/hivex/hivex.pod index 0de4d54..5a58144 100644 --- a/hivex/hivex.pod +++ b/hivex/hivex.pod @@ -288,6 +288,11 @@ all, set the function pointer to NULL. hive_type t, size_t len, const char *key, const char *value); int (*value_other) (hive_h *, void *opaque, hive_node_h, hive_value_h, hive_type t, size_t len, const char *key, const char *value); + /* If value_any callback is not NULL, then the other value_* + * callbacks are not used, and value_any is called on all values. + */ + int (*value_any) (hive_h *, void *opaque, hive_node_h, hive_value_h, + hive_type t, size_t len, const char *key, const char *value); }; =over 4 -- 1.6.5.2
Richard W.M. Jones
2010-Jan-28 10:18 UTC
[Libguestfs] [PATCH 7/13] hivex: Modify children/values functions to return
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-df lists disk usage of guests without needing to install any software inside the virtual machine. Supports Linux and Windows. http://et.redhat.com/~rjones/virt-df/ -------------- next part -------------->From 6440385c8d563847669515dd5c80d23136cc2b32 Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Tue, 19 Jan 2010 12:22:10 +0000 Subject: [PATCH 07/13] hivex: Modify children/values functions to return intermediate blocks. Modify the functions that return child subnodes and values so they can also be used to return a list of the intermediate blocks. This is so we can delete those intermediate blocks (in a later commit). We also introduce an offset_list structure which is used for collecting lists of offsets, ie. lists of nodes, values or blocks. Note that this commit should not change the semantics of the code. --- hivex/hivex.c | 238 ++++++++++++++++++++++++++++++++++++++++----------------- 1 files changed, 168 insertions(+), 70 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index 81d217b..dfac896 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -644,34 +644,97 @@ hivex_node_classname (hive_h *h, hive_node_h node) } #endif -hive_node_h * -hivex_node_children (hive_h *h, hive_node_h node) +/* Structure for returning 0-terminated lists of offsets (nodes, + * values, etc). + */ +struct offset_list { + size_t *offsets; + size_t len; + size_t alloc; +}; + +static void +init_offset_list (struct offset_list *list) +{ + list->len = 0; + list->alloc = 0; + list->offsets = NULL; +} + +#define INIT_OFFSET_LIST(name) \ + struct offset_list name; \ + init_offset_list (&name) + +/* Preallocates the offset_list, but doesn't make the contents longer. */ +static int +grow_offset_list (struct offset_list *list, size_t alloc) +{ + assert (alloc >= list->len); + size_t *p = realloc (list->offsets, alloc * sizeof (size_t)); + if (p == NULL) + return -1; + list->offsets = p; + list->alloc = alloc; + return 0; +} + +static int +add_to_offset_list (struct offset_list *list, size_t offset) +{ + if (list->len >= list->alloc) { + if (grow_offset_list (list, list->alloc ? list->alloc * 2 : 4) == -1) + return -1; + } + list->offsets[list->len] = offset; + list->len++; + return 0; +} + +static void +free_offset_list (struct offset_list *list) +{ + free (list->offsets); +} + +static size_t * +return_offset_list (struct offset_list *list) +{ + if (add_to_offset_list (list, 0) == -1) + return NULL; + return list->offsets; /* caller frees */ +} + +/* Iterate over children, returning child nodes and intermediate blocks. */ +static int +get_children (hive_h *h, hive_node_h node, + hive_node_h **children_ret, size_t **blocks_ret) { if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) { errno = EINVAL; - return NULL; + return -1; } struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node); size_t nr_subkeys_in_nk = le32toh (nk->nr_subkeys); + INIT_OFFSET_LIST (children); + INIT_OFFSET_LIST (blocks); + /* Deal with the common "no subkeys" case quickly. */ - hive_node_h *ret; - if (nr_subkeys_in_nk == 0) { - ret = malloc (sizeof (hive_node_h)); - if (ret == NULL) - return NULL; - ret[0] = 0; - return ret; - } + if (nr_subkeys_in_nk == 0) + goto ok; /* Arbitrarily limit the number of subkeys we will ever deal with. */ if (nr_subkeys_in_nk > 1000000) { errno = ERANGE; - return NULL; + goto error; } + /* Preallocate space for the children. */ + if (grow_offset_list (&children, nr_subkeys_in_nk) == -1) + goto error; + /* The subkey_lf field can point either to an lf-record, which is * the common case, or if there are lots of subkeys, to an * ri-record. @@ -683,9 +746,12 @@ hivex_node_children (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_children: returning EFAULT because subkey_lf is not a valid block (%zu)\n", subkey_lf); errno = EFAULT; - return NULL; + goto error; } + if (add_to_offset_list (&blocks, subkey_lf) == -1) + goto error; + struct ntreg_hbin_block *block (struct ntreg_hbin_block *) (h->addr + subkey_lf); @@ -706,7 +772,7 @@ hivex_node_children (hive_h *h, hive_node_h node) if (nr_subkeys_in_nk != nr_subkeys_in_lf) { errno = ENOTSUP; - return NULL; + goto error; } size_t len = block_len (h, subkey_lf, NULL); @@ -715,16 +781,9 @@ hivex_node_children (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_children: returning EFAULT because too many subkeys (%zu, %zu)\n", nr_subkeys_in_lf, len); errno = EFAULT; - return NULL; + goto error; } - /* Allocate space for the returned values. Note that - * nr_subkeys_in_lf is limited to a 16 bit value. - */ - ret = malloc ((1 + nr_subkeys_in_lf) * sizeof (hive_node_h)); - if (ret == NULL) - return NULL; - size_t i; for (i = 0; i < nr_subkeys_in_lf; ++i) { hive_node_h subkey = lf->keys[i].offset; @@ -734,13 +793,12 @@ hivex_node_children (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_children: returning EFAULT because subkey is not a valid block (0x%zx)\n", subkey); errno = EFAULT; - free (ret); - return NULL; + goto error; } - ret[i] = subkey; + if (add_to_offset_list (&children, subkey) == -1) + goto error; } - ret[i] = 0; - return ret; + goto ok; } /* Points to ri-record? */ else if (block->id[0] == 'r' && block->id[1] == 'i') { @@ -758,13 +816,16 @@ hivex_node_children (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_children: returning EFAULT because ri-offset is not a valid block (0x%zx)\n", offset); errno = EFAULT; - return NULL; + goto error; } if (!BLOCK_ID_EQ (h, offset, "lf") && !BLOCK_ID_EQ (h, offset, "lh")) { errno = ENOTSUP; - return NULL; + goto error; } + if (add_to_offset_list (&blocks, offset) == -1) + goto error; + struct ntreg_lf_record *lf (struct ntreg_lf_record *) (h->addr + offset); @@ -777,17 +838,12 @@ hivex_node_children (hive_h *h, hive_node_h node) if (nr_subkeys_in_nk != count) { errno = ENOTSUP; - return NULL; + goto error; } /* Copy list of children. Note nr_subkeys_in_nk is limited to * something reasonable above. */ - ret = malloc ((1 + nr_subkeys_in_nk) * sizeof (hive_node_h)); - if (ret == NULL) - return NULL; - - count = 0; for (i = 0; i < nr_offsets; ++i) { hive_node_h offset = ri->offset[i]; offset += 0x1000; @@ -796,11 +852,11 @@ hivex_node_children (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_children: returning EFAULT because ri-offset is not a valid block (0x%zx)\n", offset); errno = EFAULT; - return NULL; + goto error; } if (!BLOCK_ID_EQ (h, offset, "lf") && !BLOCK_ID_EQ (h, offset, "lh")) { errno = ENOTSUP; - return NULL; + goto error; } struct ntreg_lf_record *lf @@ -815,20 +871,40 @@ hivex_node_children (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_children: returning EFAULT because indirect subkey is not a valid block (0x%zx)\n", subkey); errno = EFAULT; - free (ret); - return NULL; + goto error; } - ret[count++] = subkey; + if (add_to_offset_list (&children, subkey) == -1) + goto error; } } - ret[count] = 0; - - return ret; + goto ok; } - else { - errno = ENOTSUP; + /* else not supported, set errno and fall through */ + errno = ENOTSUP; + error: + free_offset_list (&children); + free_offset_list (&blocks); + return -1; + + ok: + *children_ret = return_offset_list (&children); + *blocks_ret = return_offset_list (&blocks); + if (!*children_ret || !*blocks_ret) + goto error; + return 0; +} + +hive_node_h * +hivex_node_children (hive_h *h, hive_node_h node) +{ + hive_node_h *children; + size_t *blocks; + + if (get_children (h, node, &children, &blocks) == -1) return NULL; - } + + free (blocks); + return children; } /* Very inefficient, but at least having a separate API call @@ -883,12 +959,13 @@ hivex_node_parent (hive_h *h, hive_node_h node) return ret; } -hive_value_h * -hivex_node_values (hive_h *h, hive_node_h node) +static int +get_values (hive_h *h, hive_node_h node, + hive_value_h **values_ret, size_t **blocks_ret) { if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) { errno = EINVAL; - return 0; + return -1; } struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node); @@ -898,22 +975,23 @@ hivex_node_values (hive_h *h, hive_node_h node) if (h->msglvl >= 2) fprintf (stderr, "hivex_node_values: nr_values = %zu\n", nr_values); + INIT_OFFSET_LIST (values); + INIT_OFFSET_LIST (blocks); + /* Deal with the common "no values" case quickly. */ - hive_node_h *ret; - if (nr_values == 0) { - ret = malloc (sizeof (hive_node_h)); - if (ret == NULL) - return NULL; - ret[0] = 0; - return ret; - } + if (nr_values == 0) + goto ok; /* Arbitrarily limit the number of values we will ever deal with. */ if (nr_values > 100000) { errno = ERANGE; - return NULL; + goto error; } + /* Preallocate space for the values. */ + if (grow_offset_list (&values, nr_values) == -1) + goto error; + /* Get the value list and check it looks reasonable. */ size_t vlist_offset = le32toh (nk->vallist); vlist_offset += 0x1000; @@ -922,9 +1000,12 @@ hivex_node_values (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_values: returning EFAULT because value list is not a valid block (0x%zx)\n", vlist_offset); errno = EFAULT; - return NULL; + goto error; } + if (add_to_offset_list (&blocks, vlist_offset) == -1) + goto error; + struct ntreg_value_list *vlist (struct ntreg_value_list *) (h->addr + vlist_offset); @@ -934,14 +1015,9 @@ hivex_node_values (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_values: returning EFAULT because value list is too long (%zu, %zu)\n", nr_values, len); errno = EFAULT; - return NULL; + goto error; } - /* Allocate return array and copy values in. */ - ret = malloc ((1 + nr_values) * sizeof (hive_node_h)); - if (ret == NULL) - return NULL; - size_t i; for (i = 0; i < nr_values; ++i) { hive_node_h value = vlist->offset[i]; @@ -951,14 +1027,36 @@ hivex_node_values (hive_h *h, hive_node_h node) fprintf (stderr, "hivex_node_values: returning EFAULT because value is not a valid block (0x%zx)\n", value); errno = EFAULT; - free (ret); - return NULL; + goto error; } - ret[i] = value; + if (add_to_offset_list (&values, value) == -1) + goto error; } - ret[i] = 0; - return ret; + ok: + *values_ret = return_offset_list (&values); + *blocks_ret = return_offset_list (&blocks); + if (!*values_ret || !*blocks_ret) + goto error; + return 0; + + error: + free_offset_list (&values); + free_offset_list (&blocks); + return -1; +} + +hive_value_h * +hivex_node_values (hive_h *h, hive_node_h node) +{ + hive_value_h *values; + size_t *blocks; + + if (get_values (h, node, &values, &blocks) == -1) + return NULL; + + free (blocks); + return values; } /* Very inefficient, but at least having a separate API call -- 1.6.5.2
Richard W.M. Jones
2010-Jan-28 10:19 UTC
[Libguestfs] [PATCH 8/13] hivex: Clarify some more fields.
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones New in Fedora 11: Fedora Windows cross-compiler. Compile Windows programs, test, and build Windows installers. Over 70 libraries supprt'd http://fedoraproject.org/wiki/MinGW http://www.annexia.org/fedora_mingw -------------- next part -------------->From 5144c91bc57d26602accd8fc0419a5d00d8a5c58 Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Tue, 19 Jan 2010 15:20:36 +0000 Subject: [PATCH 08/13] hivex: Clarify some more fields. Taken from sentinelchicken.com documentation. --- hivex/hivex.c | 5 +++-- 1 files changed, 3 insertions(+), 2 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index dfac896..1f5c08b 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -203,7 +203,8 @@ struct ntreg_nk_record { int32_t seg_len; /* length (always -ve because used) */ char id[2]; /* "nk" */ uint16_t flags; - char timestamp[12]; + char timestamp[8]; + char unknown0[4]; uint32_t parent; /* offset of owner/parent */ uint32_t nr_subkeys; /* number of subkeys */ uint32_t unknown1; @@ -226,7 +227,7 @@ struct ntreg_lf_record { uint16_t nr_keys; /* number of keys in this record */ struct { uint32_t offset; /* offset of nk-record for this subkey */ - char name[4]; /* first 4 characters of subkey name */ + char hash[4]; /* hash of subkey name */ } keys[1]; } __attribute__((__packed__)); -- 1.6.5.2
Richard W.M. Jones
2010-Jan-28 10:19 UTC
[Libguestfs] [PATCH 9/13] hivex: Add missing le32toh conversion around field
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-top is 'top' for virtual machines. Tiny program with many powerful monitoring features, net stats, disk stats, logging, etc. http://et.redhat.com/~rjones/virt-top -------------- next part -------------->From 596de09265076de8fe4909f662e898159dfc3736 Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Tue, 19 Jan 2010 15:21:06 +0000 Subject: [PATCH 09/13] hivex: Add missing le32toh conversion around field access. This was missing. It only worked because we test on a little endian platform. --- hivex/hivex.c | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index 1f5c08b..d8e599b 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -787,7 +787,7 @@ get_children (hive_h *h, hive_node_h node, size_t i; for (i = 0; i < nr_subkeys_in_lf; ++i) { - hive_node_h subkey = lf->keys[i].offset; + hive_node_h subkey = le32toh (lf->keys[i].offset); subkey += 0x1000; if (!IS_VALID_BLOCK (h, subkey)) { if (h->msglvl >= 2) @@ -865,7 +865,7 @@ get_children (hive_h *h, hive_node_h node, size_t j; for (j = 0; j < le16toh (lf->nr_keys); ++j) { - hive_node_h subkey = lf->keys[j].offset; + hive_node_h subkey = le32toh (lf->keys[j].offset); subkey += 0x1000; if (!IS_VALID_BLOCK (h, subkey)) { if (h->msglvl >= 2) @@ -1198,7 +1198,7 @@ hivex_value_value (hive_h *h, hive_value_h value, return ret; } - size_t data_offset = vk->data_offset; + size_t data_offset = le32toh (vk->data_offset); data_offset += 0x1000; if (!IS_VALID_BLOCK (h, data_offset)) { if (h->msglvl >= 2) -- 1.6.5.2
Richard W.M. Jones
2010-Jan-28 10:20 UTC
[Libguestfs] [PATCH 10/13] hivex: hive type in vk-record is an unsigned 32 bit int
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-df lists disk usage of guests without needing to install any software inside the virtual machine. Supports Linux and Windows. http://et.redhat.com/~rjones/virt-df/ -------------- next part -------------->From 350a37366be2a62d41c9a3f373992818940eb2a4 Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Thu, 21 Jan 2010 16:19:26 +0000 Subject: [PATCH 10/13] hivex: hive type in vk-record is an unsigned 32 bit int --- hivex/hivex.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index d8e599b..4fe8175 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -255,7 +255,7 @@ struct ntreg_vk_record { */ uint32_t data_len; uint32_t data_offset; /* pointer to the data (or data if inline) */ - hive_type data_type; /* type of the data */ + uint32_t data_type; /* type of the data */ uint16_t flags; /* bit 0 set => key name ASCII, bit 0 clr => key name UTF-16. Only seen ASCII here in the wild. */ -- 1.6.5.2
Richard W.M. Jones
2010-Jan-28 10:20 UTC
[Libguestfs] [PATCH 11/13] hivex: display bad block offset in hex
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-p2v converts physical machines to virtual machines. Boot with a live CD or over the network (PXE) and turn machines into Xen guests. http://et.redhat.com/~rjones/virt-p2v -------------- next part -------------->From 3a58f3145b6b966ab65fdbc57211d630d8b82005 Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Thu, 21 Jan 2010 16:19:49 +0000 Subject: [PATCH 11/13] hivex: display bad block offset in hex --- hivex/hivex.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index 4fe8175..1eadee3 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -459,7 +459,7 @@ hivex_open (const char *filename, int flags) int used; seg_len = block_len (h, blkoff, &used); if (seg_len <= 4 || (seg_len & 3) != 0) { - fprintf (stderr, "hivex: %s: block size %d at %zu, bad registry\n", + fprintf (stderr, "hivex: %s: block size %d at 0x%zx, bad registry\n", filename, le32toh (block->seg_len), blkoff); errno = ENOTSUP; goto error; -- 1.6.5.2
Richard W.M. Jones
2010-Jan-28 10:21 UTC
[Libguestfs] [PATCH 12/13] hivex: Display incorrect block size as unsigned in an
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones libguestfs lets you edit virtual machines. Supports shell scripting, bindings from many languages. http://et.redhat.com/~rjones/libguestfs/ See what it can do: http://et.redhat.com/~rjones/libguestfs/recipes.html -------------- next part -------------->From 95bad75b0d529c92ee77cb8e9c08be15af47341d Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Thu, 21 Jan 2010 17:07:21 +0000 Subject: [PATCH 12/13] hivex: Display incorrect block size as unsigned in an error message. --- hivex/hivex.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index 1eadee3..4e67b55 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -459,7 +459,7 @@ hivex_open (const char *filename, int flags) int used; seg_len = block_len (h, blkoff, &used); if (seg_len <= 4 || (seg_len & 3) != 0) { - fprintf (stderr, "hivex: %s: block size %d at 0x%zx, bad registry\n", + fprintf (stderr, "hivex: %s: block size %" PRIu32 " at 0x%zx, bad registry\n", filename, le32toh (block->seg_len), blkoff); errno = ENOTSUP; goto error; -- 1.6.5.2
Richard W.M. Jones
2010-Jan-28 10:21 UTC
[Libguestfs] [PATCH 13/13] hivex: Fix calculation of block size for vk data blocks.
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming blog: http://rwmj.wordpress.com Fedora now supports 80 OCaml packages (the OPEN alternative to F#) http://cocan.org/getting_started_with_ocaml_on_red_hat_and_fedora -------------- next part -------------->From 57838e1803df4ab50e11cd89c4f85f760d5c01dc Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Thu, 21 Jan 2010 17:07:42 +0000 Subject: [PATCH 13/13] hivex: Fix calculation of block size for vk data blocks. --- hivex/hivex.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/hivex/hivex.c b/hivex/hivex.c index 4e67b55..43d5788 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -1211,7 +1211,7 @@ hivex_value_value (hive_h *h, hive_value_h value, /* Check that the declared size isn't larger than the block its in. */ size_t blen = block_len (h, data_offset, NULL); - if (len > blen) { + if (len > blen - 4 /* subtract 4 for block header */) { if (h->msglvl >= 2) fprintf (stderr, "hivex_value_value: returning EFAULT because data is longer than its block (data 0x%zx, data len %zu, block len %zu)\n", data_offset, len, blen); -- 1.6.5.2
Richard W.M. Jones
2010-Jan-28 10:23 UTC
[Libguestfs] [PATCH 14/13 NOT FOR REVIEW] hivex: Implement writing to hives.
This final patch actually implements writing to hives. It is not complete yet because although it works as far as our tools are concerned, Windows ignores any new values added to a node, for reasons which we don't yet understand. Therefore I am continuing to reverse- engineer the hive format itself so that we fully understand all the fields. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones New in Fedora 11: Fedora Windows cross-compiler. Compile Windows programs, test, and build Windows installers. Over 70 libraries supprt'd http://fedoraproject.org/wiki/MinGW http://www.annexia.org/fedora_mingw -------------- next part -------------->From 04cfc3dd9aae969272e810f9a6a66b73d0cf93ba Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Mon, 18 Jan 2010 13:36:20 +0000 Subject: [PATCH] hivex: Implement writing to hives. --- .gitignore | 3 + hivex/Makefile.am | 35 +++- hivex/README | 2 +- hivex/example1.c | 46 ++++ hivex/example2.c | 86 +++++++ hivex/hivex.c | 543 ++++++++++++++++++++++++++++++++++++++++++- hivex/hivex.h | 14 ++ hivex/hivex.pod | 143 ++++++++++++ hivex/visualizer.ml | 531 ++++++++++++++++++++++++++++++++++++++++++ hivex/visualizer_NT_time.ml | 30 +++ hivex/visualizer_utils.ml | 124 ++++++++++ m4/.gitignore | 1 + po/POTFILES.in | 2 + 13 files changed, 1557 insertions(+), 3 deletions(-) create mode 100644 hivex/example1.c create mode 100644 hivex/example2.c create mode 100644 hivex/visualizer.ml create mode 100644 hivex/visualizer_NT_time.ml create mode 100644 hivex/visualizer_utils.ml diff --git a/.gitignore b/.gitignore index 829f807..d066611 100644 --- a/.gitignore +++ b/.gitignore @@ -82,8 +82,11 @@ haskell/Guestfs.hs *.hi hivex/*.1 hivex/*.3 +hivex/example1 +hivex/example2 hivex/hivexget hivex/hivexml +hivex/visualizer.opt html/guestfish.1.html html/guestfs.3.html html/guestmount.1.html diff --git a/hivex/Makefile.am b/hivex/Makefile.am index a2be7e3..c8a7cf6 100644 --- a/hivex/Makefile.am +++ b/hivex/Makefile.am @@ -15,7 +15,14 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -EXTRA_DIST = hivex.pod hivexml.pod hivexget.pod LICENSE +EXTRA_DIST = \ + hivex.pod \ + hivexml.pod \ + hivexget.pod \ + LICENSE \ + visualizer.ml \ + visualizer_utils.ml \ + visualizer_NT_time.ml lib_LTLIBRARIES = libhivex.la @@ -28,6 +35,7 @@ libhivex_la_CFLAGS = $(WARN_CFLAGS) $(WERROR_CFLAGS) libhivex_la_CPPFLAGS = -I$(top_srcdir)/gnulib/lib bin_PROGRAMS = hivexml hivexget +noinst_PROGRAMS = example1 example2 hivexml_SOURCES = \ hivexml.c @@ -44,6 +52,20 @@ hivexget_LDADD = libhivex.la ../gnulib/lib/libgnu.la hivexget_CFLAGS = \ $(WARN_CFLAGS) $(WERROR_CFLAGS) +example1_SOURCES = \ + example1.c + +example1_LDADD = libhivex.la ../gnulib/lib/libgnu.la +example1_CFLAGS = \ + $(WARN_CFLAGS) $(WERROR_CFLAGS) + +example2_SOURCES = \ + example2.c + +example2_LDADD = libhivex.la ../gnulib/lib/libgnu.la +example2_CFLAGS = \ + $(WARN_CFLAGS) $(WERROR_CFLAGS) + man_MANS = hivex.3 hivexml.1 hivexget.1 hivex.3: hivex.pod @@ -98,3 +120,14 @@ $(top_builddir)/html/hivexget.1.html: hivexget.pod --htmldir html \ --outfile html/hivexget.1.html \ hivex/hivexget.pod + +# OCaml Windows Registry visualizer. This was used while reverse +# engineering the hive format, and is not normally compiled. If you +# do with to compile it, you'll need ocaml-bitstring-devel and +# ocaml-extlib-devel. Also you'll need a collection of hive files +# from Windows machines to experiment with. + +visualizer.opt: visualizer_utils.ml visualizer_NT_time.ml visualizer.ml + ocamlfind ocamlopt \ + -package bitstring,bitstring.syntax,extlib \ + -syntax camlp4 -linkpkg $^ -o $@ diff --git a/hivex/README b/hivex/README index 583d351..0aebc8a 100644 --- a/hivex/README +++ b/hivex/README @@ -15,7 +15,7 @@ This library was derived from several sources: . NTREG registry reader/writer library by Petter Nordahl-Hagen (LGPL v2.1 licensed library and program) - . http://home.eunet.no/pnordahl/ntpasswd/WinReg.txt + . http://pogostick.net/~pnh/ntpasswd/WinReg.txt . dumphive (a BSD-licensed Pascal program by Markus Stephany) . http://www.sentinelchicken.com/data/TheWindowsNTRegistryFileFormat.pdf . editreg program from Samba - this program was removed in later diff --git a/hivex/example1.c b/hivex/example1.c new file mode 100644 index 0000000..18c847d --- /dev/null +++ b/hivex/example1.c @@ -0,0 +1,46 @@ +/* Example program which loads and saves a hive. + * This example may be freely copied and modified without restrictions. + * + * The intention of this example is just to check that we can do this + * without corrupting the hive (header etc). + * + * NB: The copy of the hive will not be absolutely identical. The + * sequence numbers in the header will change. If we implement the + * last modified field in the header, then that and the checksum will + * also change. + */ + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "hivex.h" + +int +main (int argc, char *argv[]) +{ + if (argc != 3) { + fprintf (stderr, "example1 hive.orig hive.new\n"); + exit (EXIT_FAILURE); + } + char *orig = argv[1]; + char *newf = argv[2]; + + hive_h *h = hivex_open (orig, HIVEX_OPEN_WRITE /*| HIVEX_OPEN_DEBUG*/); + if (h == NULL) { + error: + perror (orig); + exit (EXIT_FAILURE); + } + + if (hivex_commit (h, newf, 0) == -1) + goto error; + + if (hivex_close (h) == -1) + goto error; + + exit (EXIT_SUCCESS); +} diff --git a/hivex/example2.c b/hivex/example2.c new file mode 100644 index 0000000..5b1cb17 --- /dev/null +++ b/hivex/example2.c @@ -0,0 +1,86 @@ +/* Example program which modifies a hive. + * This example may be freely copied and modified without restrictions. + * + * You need to supply the 'software' hive from a Windows distribution + * (usually in C:\windows\system32\config\software). This hive + * contains a node '\Classes\*'. This program removes existing (key, + * value) pairs at this node and replaces them with some example + * values. + * + * You can load the modified hive using another tool to see the + * changes. eg. Using Windows regedit, select HKLM and then in the + * File menu choose "Load Hive ...". Point to the update hive, and + * then give a key (eg. "test1"). The modified hive will be loaded + * under HKLM\test1 and the modified class can be inspected under + * HKLM\test1\Classes\*. After inspecting the changes, unload the + * hive using File -> Unload Hive. + * + * Don't replace the original Windows 'software' hive, else you'll + * break things :-) + */ + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "hivex.h" + +int +main (int argc, char *argv[]) +{ + if (argc != 3) { + fprintf (stderr, "example1 software software.new\n"); + exit (EXIT_FAILURE); + } + char *orig = argv[1]; + char *newf = argv[2]; + + hive_h *h = hivex_open (orig, HIVEX_OPEN_WRITE | HIVEX_OPEN_DEBUG); + if (h == NULL) { + error: + perror (orig); + exit (EXIT_FAILURE); + } + + /* Navigate to the desired node. */ + hive_node_h root = hivex_root (h); + if (!root) + goto error; + + hive_node_h node_cl = hivex_node_get_child (h, root, "Classes"); + if (!node_cl) { + fprintf (stderr, "%s: cannot find node \\Classes", orig); + exit (EXIT_FAILURE); + } + + hive_node_h node_star = hivex_node_get_child (h, node_cl, "*"); + if (!node_star) { + fprintf (stderr, "%s: cannot find node \\Classes\\*", orig); + exit (EXIT_FAILURE); + } + + const hive_set_value values[] = { + { "A", hive_t_string, 8, "a\0b\0c\0d\0" }, + { "B", hive_t_dword, 4, "\x78\x56\x34\x12" /* little endian 0x12345678 */ }, + { "C", hive_t_string, 32, "d\0c\0b\0a\0d\0c\0b\0a\0d\0c\0b\0a\0a\0b\0c\0d\0" }, + }; + const int nr_values = sizeof values / sizeof values[0]; + + printf ("setting %d new values in node \\Classes\\* ...\n", nr_values); + + if (hivex_node_set_values (h, node_star, nr_values, values, 0) == -1) + goto error; + + printf ("committing changes to new file %s ...\n", newf); + + if (hivex_commit (h, newf, 0) == -1) + goto error; + + if (hivex_close (h) == -1) + goto error; + + exit (EXIT_SUCCESS); +} diff --git a/hivex/hivex.c b/hivex/hivex.c index 43d5788..b0657e3 100644 --- a/hivex/hivex.c +++ b/hivex/hivex.c @@ -41,6 +41,7 @@ #endif #include "full-read.h" +#include "full-write.h" #ifndef O_CLOEXEC #define O_CLOEXEC 0 @@ -60,35 +61,65 @@ #ifndef be32toh #define be32toh(x) __bswap_32 (x) #endif +#ifndef htobe32 +#define htobe32(x) __bswap_32 (x) +#endif #ifndef be64toh #define be64toh(x) __bswap_64 (x) #endif +#ifndef htobe64 +#define htobe64(x) __bswap_64 (x) +#endif #ifndef le16toh #define le16toh(x) (x) #endif +#ifndef htole16 +#define htole16(x) (x) +#endif #ifndef le32toh #define le32toh(x) (x) #endif +#ifndef htole32 +#define htole32(x) (x) +#endif #ifndef le64toh #define le64toh(x) (x) #endif -#else +#ifndef htole64 +#define htole64(x) (x) +#endif +#else /* __BYTE_ORDER == __BIG_ENDIAN */ #ifndef be32toh #define be32toh(x) (x) #endif +#ifndef htobe32 +#define htobe32(x) (x) +#endif #ifndef be64toh #define be64toh(x) (x) #endif +#ifndef htobe64 +#define htobe64(x) (x) +#endif #ifndef le16toh #define le16toh(x) __bswap_16 (x) #endif +#ifndef htole16 +#define htole16(x) __bswap_16 (x) +#endif #ifndef le32toh #define le32toh(x) __bswap_32 (x) #endif +#ifndef htole32 +#define htole32(x) __bswap_32 (x) +#endif #ifndef le64toh #define le64toh(x) __bswap_64 (x) #endif +#ifndef htole64 +#define htole64(x) __bswap_64 (x) #endif +#endif /* __BYTE_ORDER == __BIG_ENDIAN */ #include "hivex.h" @@ -127,6 +158,10 @@ struct hive_h { /* Fields from the header, extracted from little-endianness hell. */ size_t rootoffs; /* Root key offset (always an nk-block). */ size_t endpages; /* Offset of end of pages. */ + + /* For writing. */ + size_t endblocks; /* Offset to next block allocation (0 + if not allocated anything yet). */ }; /* NB. All fields are little endian. */ @@ -552,6 +587,10 @@ hivex_close (hive_h *h) return r; } +/*---------------------------------------------------------------------- + * Reading. + */ + hive_node_h hivex_root (hive_h *h) { @@ -1431,6 +1470,10 @@ hivex_value_qword (hive_h *h, hive_value_h value) return ret; } +/*---------------------------------------------------------------------- + * Visiting. + */ + int hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags) @@ -1674,3 +1717,501 @@ hivex__visit_node (hive_h *h, hive_node_h node, free_strings (strs); return ret; } + +/*---------------------------------------------------------------------- + * Writing. + */ + +/* Allocate an hbin (page), extending the malloc'd space if necessary, + * and updating the hive handle fields (but NOT the hive disk header + * -- the hive disk header is updated when we commit). This function + * also extends the bitmap if necessary. + * + * 'allocation_hint' is the size of the block allocation we would like + * to make. Normally registry blocks are very small (avg 50 bytes) + * and are contained in standard-sized pages (4KB), but the registry + * can support blocks which are larger than a standard page, in which + * case it creates a page of 8KB, 12KB etc. + * + * Returns: + * > 0 : offset of first usable byte of new page (after page header) + * 0 : error (errno set) + */ +static size_t +allocate_page (hive_h *h, size_t allocation_hint) +{ + /* In almost all cases this will be 1. */ + size_t nr_4k_pages + 1 + (allocation_hint + sizeof (struct ntreg_hbin_page) - 1) / 4096; + assert (nr_4k_pages >= 1); + + /* 'extend' is the number of bytes to extend the file by. Note that + * hives found in the wild often contain slack between 'endpages' + * and the actual end of the file, so we don't always need to make + * the file larger. + */ + ssize_t extend = h->endpages + nr_4k_pages * 4096 - h->size; + + if (h->msglvl >= 2) { + fprintf (stderr, "allocate_page: current endpages = 0x%zx, current size = 0x%zx\n", + h->endpages, h->size); + fprintf (stderr, "allocate_page: extending file by %zd bytes (<= 0 if no extension)\n", + extend); + } + + if (extend > 0) { + size_t oldsize = h->size; + size_t newsize = h->size + extend; + char *newaddr = realloc (h->addr, newsize); + if (newaddr == NULL) + return 0; + + size_t oldbitmapsize = 1 + oldsize / 32; + size_t newbitmapsize = 1 + newsize / 32; + char *newbitmap = realloc (h->addr, newbitmapsize); + if (newbitmap == NULL) { + free (newaddr); + return 0; + } + + h->addr = newaddr; + h->size = newsize; + + memset (h->addr + oldsize, 0, newsize - oldsize); + memset (h->bitmap + oldbitmapsize, 0, newbitmapsize - oldbitmapsize); + } + + size_t offset = h->endpages; + h->endpages += nr_4k_pages * 4096; + + if (h->msglvl >= 2) + fprintf (stderr, "allocate_page: new endpages = 0x%zx, new size = 0x%zx\n", + h->endpages, h->size); + + /* Write the hbin header. */ + struct ntreg_hbin_page *page + (struct ntreg_hbin_page *) (h->addr + offset); + page->magic[0] = 'h'; + page->magic[1] = 'b'; + page->magic[2] = 'i'; + page->magic[3] = 'n'; + page->offset_first = htole32 (offset - 0x1000); + page->page_size = htole32 (nr_4k_pages * 4096); + memset (page->unknown, 0, sizeof (page->unknown)); + + if (h->msglvl >= 2) + fprintf (stderr, "allocate_page: new page at 0x%zx\n", offset); + + /* Offset of first usable byte after the header. */ + return offset + sizeof (struct ntreg_hbin_page); +} + +/* Allocate a single block, first allocating an hbin (page) at the end + * of the current file if necessary. NB. To keep the implementation + * simple and more likely to be correct, we do not reuse existing free + * blocks. + * + * seg_len is the size of the block (this INCLUDES the block header). + * The header of the block is initialized to -seg_len (negative to + * indicate used). id[2] is the block ID (type), eg. "nk" for nk- + * record. The block bitmap is updated to show this block as valid. + * The rest of the contents of the block will be zero. + * + * Returns: + * > 0 : offset of new block + * 0 : error (errno set) + */ +static size_t +allocate_block (hive_h *h, size_t seg_len, const char id[2]) +{ + if (!h->writable) { + errno = EROFS; + return 0; + } + + if (seg_len < 4) { + /* The caller probably forgot to include the header. Note that + * value lists have no ID field, so seg_len == 4 would be possible + * for them, albeit unusual. + */ + if (h->msglvl >= 2) + fprintf (stderr, "allocate_block: refusing too small allocation (%zu), returning ERANGE\n", + seg_len); + errno = ERANGE; + return 0; + } + + /* Refuse really large allocations. */ + if (seg_len > 1000000) { + if (h->msglvl >= 2) + fprintf (stderr, "allocate_block: refusing large allocation (%zu), returning ERANGE\n", + seg_len); + errno = ERANGE; + return 0; + } + + /* Round up allocation to multiple of 4 bytes. */ + seg_len = (seg_len + 3) & ~3; + + /* Allocate a new page if necessary. */ + if (h->endblocks == 0 || h->endblocks + seg_len > h->endpages) { + size_t newendblocks = allocate_page (h, seg_len); + if (newendblocks == 0) + return 0; + h->endblocks = newendblocks; + } + + size_t offset = h->endblocks; + + if (h->msglvl >= 2) + fprintf (stderr, "allocate_block: new block at 0x%zx, size %zu\n", + offset, seg_len); + + struct ntreg_hbin_block *blockhdr + (struct ntreg_hbin_block *) (h->addr + offset); + + blockhdr->seg_len = htole32 (- (int32_t) seg_len); + if (id[0] && id[1] && seg_len >= 6) { + blockhdr->id[0] = id[0]; + blockhdr->id[1] = id[1]; + } + + h->endblocks += seg_len; + + /* If there is space after the last block in the last page, then we + * have to put a dummy free block header here to mark the rest of + * the page as free. + */ + ssize_t rem = h->endpages - h->endblocks; + if (rem > 0) { + if (h->msglvl >= 2) + fprintf (stderr, "allocate_block: marking remainder of page free starting at 0x%zx, size %zd\n", + h->endblocks, rem); + + assert (rem >= 4); + + blockhdr = (struct ntreg_hbin_block *) (h->addr + h->endblocks); + blockhdr->seg_len = htole32 ((int32_t) rem); + } + + return offset; +} + +/* 'offset' must point to a valid, used block. This function marks + * the block unused (by updating the seg_len field) and invalidates + * the bitmap. It does NOT do this recursively, so to avoid creating + * unreachable used blocks, callers may have to recurse over the hive + * structures. Also callers must ensure there are no references to + * this block from other parts of the hive. + */ +static void +mark_block_unused (hive_h *h, size_t offset) +{ + assert (h->writable); + assert (IS_VALID_BLOCK (h, offset)); + + struct ntreg_hbin_block *blockhdr + (struct ntreg_hbin_block *) (h->addr + offset); + + size_t seg_len = block_len (h, offset, NULL); + blockhdr->seg_len = htole32 (seg_len); + + BITMAP_CLR (h->bitmap, offset); +} + +/* Delete all existing values at this node. */ +static int +delete_values (hive_h *h, hive_node_h node) +{ + assert (h->writable); + + hive_value_h *values; + size_t *blocks; + if (get_values (h, node, &values, &blocks) == -1) + return -1; + + size_t i; + for (i = 0; blocks[i] != 0; ++i) + mark_block_unused (h, blocks[i]); + + free (blocks); + + for (i = 0; values[i] != 0; ++i) { + struct ntreg_vk_record *vk + (struct ntreg_vk_record *) (h->addr + values[i]); + + size_t len; + len = le32toh (vk->data_len); + if (len == 0x80000000) /* special case */ + len = 4; + len &= 0x7fffffff; + + if (len > 4) { /* non-inline, so remove data block */ + size_t data_offset = le32toh (vk->data_offset); + data_offset += 0x1000; + mark_block_unused (h, data_offset); + } + + /* remove vk record */ + mark_block_unused (h, values[i]); + } + + free (values); + + struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node); + nk->nr_values = htole32 (0); + nk->vallist = htole32 (0xffffffff); + + return 0; +} + +int +hivex_commit (hive_h *h, const char *filename, int flags) +{ + if (flags != 0) { + errno = EINVAL; + return -1; + } + + if (!h->writable) { + errno = EROFS; + return -1; + } + + filename = filename ? : h->filename; + int fd = open (filename, O_WRONLY); + if (fd == -1) + return -1; + + /* Update the header fields. */ + uint32_t sequence = le32toh (h->hdr->sequence1); + sequence++; + h->hdr->sequence1 = htole32 (sequence); + h->hdr->sequence2 = htole32 (sequence); + /* XXX Ought to update h->hdr->last_modified. */ + h->hdr->blocks = htole32 (h->endpages - 0x1000); + + /* Recompute header checksum. */ + uint32_t sum = header_checksum (h); + h->hdr->csum = htole32 (sum); + + if (h->msglvl >= 2) + fprintf (stderr, "hivex_commit: new header checksum: 0x%x\n", sum); + + if (full_write (fd, h->addr, h->size) != h->size) { + int err = errno; + close (fd); + errno = err; + return -1; + } + + if (close (fd) == -1) + return -1; + + return 0; +} + +#if 0 +hive_node_h +hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name) +{ + if (!h->writable) { + errno = EROFS; + return 0; + } + + if (!IS_VALID_BLOCK (h, parent) || !BLOCK_ID_EQ (h, parent, "nk")) { + errno = EINVAL; + return -1; + } + + if (name == NULL) { + errno = EINVAL; + return -1; + } + + + + + + +} +#endif + +/* Callback from hivex_node_delete_child which is called to delete a + * node AFTER its subnodes have been visited. The subnodes have been + * deleted but we still have to delete any lf/lh/li/ri records and the + * value list block and values, followed by deleting the node itself. + */ +static int +delete_node (hive_h *h, void *opaque, hive_node_h node, const char *name) +{ + hive_node_h *unused; + size_t *blocks; + if (get_children (h, node, &unused, &blocks) == -1) + return -1; + free (unused); + + /* We don't care what's in these intermediate blocks, so we can just + * delete them unconditionally. + */ + size_t i; + for (i = 0; blocks[i] != 0; ++i) + mark_block_unused (h, blocks[i]); + + free (blocks); + + /* Delete the values in the node. */ + if (delete_values (h, node) == -1) + return -1; + + /* XXX + mark_block_unused (node->sk); + mark_block_unused (node->classname); + */ + + /* Delete the node itself. */ + mark_block_unused (h, node); + + return 0; +} + +int +hivex_node_delete_child (hive_h *h, hive_node_h node) +{ + if (!h->writable) { + errno = EROFS; + return -1; + } + + if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) { + errno = EINVAL; + return -1; + } + + if (node == hivex_root (h)) { + if (h->msglvl >= 2) + fprintf (stderr, "hivex_node_delete_child: cannot delete root node\n"); + errno = EINVAL; + return -1; + } + + hive_node_h parent = hivex_node_parent (h, node); + if (parent == 0) + return -1; + + /* Delete node and all its children and values recursively. */ + static const struct hivex_visitor visitor = { .node_end = delete_node }; + if (hivex_visit_node (h, node, &visitor, sizeof visitor, NULL, 0) == -1) + return -1; + + /* Delete the link from parent to child. We need to find the lf/lh + * record which contains the offset and remove the offset from that + * record, then decrement the element count in that record, and + * decrement the overall number of subkeys stored in the parent + * node. + */ + hive_node_h *unused; + size_t *blocks; + if (get_children (h, parent, &unused, &blocks) == -1) + return -1; + + size_t i, j; + for (i = 0; blocks[i] != 0; ++i) { + struct ntreg_hbin_block *block + (struct ntreg_hbin_block *) (h->addr + blocks[i]); + + if (block->id[0] == 'l' && (block->id[1] == 'f' || block->id[1] == 'h')) { + struct ntreg_lf_record *lf = (struct ntreg_lf_record *) block; + + size_t nr_subkeys_in_lf = le16toh (lf->nr_keys); + + for (j = 0; j < nr_subkeys_in_lf; ++j) + if (le32toh (lf->keys[j].offset) + 0x1000 == node) { + for (; j < nr_subkeys_in_lf - 1; ++j) + memcpy (&lf->keys[j], &lf->keys[j+1], sizeof (lf->keys[j])); + lf->nr_keys = htole16 (nr_subkeys_in_lf - 1); + goto found; + } + } + } + if (h->msglvl >= 2) + fprintf (stderr, "hivex_node_delete_child: could not find parent to child link\n"); + errno = ENOTSUP; + return -1; + + found:; + struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node); + size_t nr_subkeys_in_nk = le32toh (nk->nr_subkeys); + nk->nr_subkeys = htole32 (nr_subkeys_in_nk - 1); + + return 0; +} + +int +hivex_node_set_values (hive_h *h, hive_node_h node, + size_t nr_values, const hive_set_value *values, + int flags) +{ + if (!h->writable) { + errno = EROFS; + return -1; + } + + if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) { + errno = EINVAL; + return -1; + } + + /* Delete all existing values. */ + if (delete_values (h, node) == -1) + return -1; + + if (nr_values == 0) + return 0; + + /* Allocate value list node. Value lists have no id field. */ + static const char nul_id[2] = { 0, 0 }; + size_t seg_len + sizeof (struct ntreg_value_list) + (nr_values - 1) * sizeof (uint32_t); + size_t vallist_offs = allocate_block (h, seg_len, nul_id); + if (vallist_offs == 0) + return -1; + + struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node); + nk->nr_values = htole32 (nr_values); + nk->vallist = htole32 (vallist_offs - 0x1000); + + struct ntreg_value_list *vallist + (struct ntreg_value_list *) (h->addr + vallist_offs); + + size_t i; + for (i = 0; i < nr_values; ++i) { + /* Allocate vk record to store this (key, value) pair. */ + static const char vk_id[2] = { 'v', 'k' }; + seg_len = sizeof (struct ntreg_vk_record) + strlen (values[i].key); + size_t vk_offs = allocate_block (h, seg_len, vk_id); + if (vk_offs == 0) + return -1; + + vallist->offset[i] = htole32 (vk_offs - 0x1000); + + struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + vk_offs); + vk->name_len = htole16 (strlen (values[i].key)); + strcpy (vk->name, values[i].key); + vk->data_type = htole32 (values[i].t); + vk->data_len = htole16 (values[i].len); + + if (values[i].len <= 4) /* Store data inline. */ + memcpy (&vk->data_offset, values[i].value, values[i].len); + else { + size_t offs = allocate_block (h, values[i].len + 4, nul_id); + if (offs == 0) + return -1; + memcpy (h->addr + offs + 4, values[i].value, values[i].len); + vk->data_offset = htole32 (offs - 0x1000); + } + } + + return 0; +} diff --git a/hivex/hivex.h b/hivex/hivex.h index 56718b4..cca1971 100644 --- a/hivex/hivex.h +++ b/hivex/hivex.h @@ -110,6 +110,20 @@ struct hivex_visitor { extern int hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags); extern int hivex_visit_node (hive_h *h, hive_node_h node, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags); +extern int hivex_commit (hive_h *h, const char *filename, int flags); +extern hive_node_h hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name); +extern int hivex_node_delete_child (hive_h *h, hive_node_h node); + +struct hive_set_value { + const char *key; + hive_type t; + size_t len; + const char *value; +}; +typedef struct hive_set_value hive_set_value; + +extern int hivex_node_set_values (hive_h *h, hive_node_h node, size_t nr_values, const hive_set_value *values, int flags); + #ifdef __cplusplus } #endif diff --git a/hivex/hivex.pod b/hivex/hivex.pod index 5a58144..f8386e0 100644 --- a/hivex/hivex.pod +++ b/hivex/hivex.pod @@ -326,6 +326,145 @@ starts at C<node>. =back +=head2 WRITING TO HIVE FILES + +The hivex library supports making limited modifications to hive files. +We have tried to implement this very conservatively in order to reduce +the chance of corrupting your registry. However you should be careful +and take back-ups, since Microsoft has never documented the hive +format, and so it is possible there are nuances in the +reverse-engineered format that we do not understand. + +To be able to modify a hive, you must pass the C<HIVEX_OPEN_WRITE> +flag to C<hivex_open>, otherwise any write operation will return with +errno C<EROFS>. + +The write operations shown below do not modify the on-disk file +immediately. You must call C<hivex_commit> in order to write the +changes to disk. If you call C<hivex_close> without committing then +any writes are discarded. + +Hive files internally consist of a "memory dump" of binary blocks +(like the C heap), and some of these blocks can be unused. The hivex +library never reuses these unused blocks. Instead, to ensure +robustness in the face of the partially understood on-disk format, +hivex only allocates new blocks after the end of the file, and makes +minimal modifications to existing structures in the file to point to +these new blocks. This makes hivex slightly less disk-efficient than +it could be, but disk is cheap, and registry modifications tend to be +very small. + +When deleting nodes, it is possible that this library may leave +unreachable live blocks in the hive. This is because certain parts of +the hive disk format such as security (sk) records and big data (db) +records and classname fields are not well understood (and not +documented at all) and we play it safe by not attempting to modify +them. Apart from wasting a little bit of disk space, it is not +thought that unreachable blocks are a problem. + +=over 4 + +=item int hivex_commit (hive_h *h, const char *filename, int flags); + +Commit (write) any changes which have been made. + +C<filename> is the new file to write. If C<filename == NULL> then we +overwrite the original file (ie. the file name that was passed to +C<hivex_open>). C<flags> is not used, always pass 0. + +Returns 0 on success. On error this returns -1 and sets errno. + +Note this does not close the hive handle. You can perform further +operations on the hive after committing, including making more +modifications. If you no longer wish to use the hive, call +C<hivex_close> after this. + +=item hive_node_h hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name); + +Add a new child node named C<name> to the existing node C<parent>. +The new child initially has no subnodes and contains no keys or +values. The parent must not have an existing child called C<name>, so +if you want to overwrite an existing child, call +C<hivex_node_delete_child> first. + +Returns the node handle. On error this returns 0 and sets errno. + +=item int hivex_node_delete_child (hive_h *h, hive_node_h node); + +Delete the node C<node>. All values at the node and all subnodes are +deleted (recursively). The C<node> handle and the handles of all +subnodes become invalid. You cannot delete the root node. + +Returns 0 on success. On error this returns -1 and sets errno. + +=item hive_set_value + +The typedef C<hive_set_value> is used in conjunction with the +C<hivex_node_set_values> call described below. + + struct hive_set_value { + const char *key; /* key - a UTF-8 encoded ASCIIZ string */ + hive_type t; /* type of value field */ + size_t len; /* length of value field in bytes */ + const char *value; /* value field */ + }; + typedef struct hive_set_value hive_set_value; + +To set the default value for a node, you have to pass C<key = "">. + +Note that the C<value> field is just treated as a list of bytes, and +is stored directly in the hive. The caller has to ensure correct +encoding and endianness, for example converting dwords to little +endian. + +The correct type and encoding for values depends on the node and key +in the registry, the version of Windows, and sometimes even changes +between versions of Windows for the same key. We don't document it +here. Often it's not documented at all. + +=item int hivex_node_set_values (hive_h *h, hive_node_h node, size_t nr_values, const hive_set_value *values, int flags); + +This call can be used to set all the (key, value) pairs stored in C<node>. + +C<node> is the node to modify. C<values> is an array of (key, value) +pairs. There should be C<nr_values> elements in this array. C<flags> +is not used, always pass 0. + +Any existing values stored at the node are discarded, and their +C<hive_value_h> handles become invalid. Thus you can remove all +values stored at C<node> by passing C<nr_values = 0>. + +Returns 0 on success. On error this returns -1 and sets errno. + +Note that this library does not offer a way to modify just a single +key at a node. We don't implement a way to do this efficiently. + +=back + +=head3 WRITE OPERATIONS WHICH ARE NOT SUPPORTED + +=over 4 + +=item * + +Changing the root node. + +=item * + +Creating a new hive file from scratch. This is impossible at present +because not all fields in the header are understood. + +=item * + +Modifying or deleting single values at a node. + +=item * + +Modifying security key (sk) records or classnames. These are not +well understood. + +=back + =head1 THE STRUCTURE OF THE WINDOWS REGISTRY Note: To understand the relationship between hives and the common @@ -452,6 +591,10 @@ Registry contains cycles. Field in the registry out of range. +=item EROFS + +Tried to write to a registry which is not opened for writing. + =back =head1 ENVIRONMENT VARIABLES diff --git a/hivex/visualizer.ml b/hivex/visualizer.ml new file mode 100644 index 0000000..acfce5b --- /dev/null +++ b/hivex/visualizer.ml @@ -0,0 +1,531 @@ +(* Windows Registry reverse-engineering tool. + * Copyright (C) 2010 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * For existing information on the registry format, please refer + * to the following documents. Note they are both incomplete + * and inaccurate in some respects. + * + * http://www.sentinelchicken.com/data/TheWindowsNTRegistryFileFormat.pdf + * http://pogostick.net/~pnh/ntpasswd/WinReg.txt + *) + +open Bitstring +open ExtString +open Printf +open Visualizer_utils +open Visualizer_NT_time + +let filename + if Array.length Sys.argv = 2 then Sys.argv.(1) + else + failwithf "error: missing filename\nusage: %s hivefile\n" + Sys.executable_name + +(* Load the file. *) +let bits = bitstring_of_file filename + +(* Split into header + data at the 4KB boundary. *) +let header, data = takebits (4096 * 8) bits, dropbits (4096 * 8) bits + +(* Define a persistent pattern which matches the header fields. By + * using persistent patterns, we can reuse them later in the + * program. + *) +let bitmatch header_fields + { "regf" : 4*8 : string; + seq1 : 4*8 : littleendian; + seq2 : 4*8 : littleendian; + last_modified : 64 + : littleendian, bind (nt_to_time_t last_modified); + major : 4*8 : littleendian; + minor : 4*8 : littleendian; + unknown1 : 4*8 : littleendian; + unknown2 : 4*8 : littleendian; + root_key : 4*8 + : littleendian, bind (get_offset root_key); + end_pages : 4*8 + : littleendian, bind (get_offset end_pages); + unknown3 : 4*8 : littleendian; + filename : 64*8 : string; + (* sentinelchicken documentation has some fields here which + * plainly don't exist in any hives I've seen. Treat it as a big + * block of unknown. + *) + unknown4 : 396*8 : bitstring; + csum : 4*8 + : littleendian, save_offset_to (crc_offset), + check (assert (crc_offset = 0x1fc * 8); true); + unknown5 : (0x1000-0x200)*8 : bitstring } + +let fprintf_header chan bits + bitmatch bits with + | { :header_fields } -> + fprintf chan + "HD %6ld %6ld %s %ld.%ld u%08lx u%08lx %s %s u%08lx %s %s %08lx %s\n" + seq1 seq2 (print_time last_modified) major minor + unknown1 unknown2 + (print_offset root_key) (print_offset end_pages) + unknown3 (print_utf16 filename) + (print_bitstring unknown4) csum (print_bitstring unknown5) + +(* Parse the header and check it. *) +let root_key, end_pages + bitmatch header with + | { :header_fields } -> + if major <> 1 then + eprintf "!HD major"; + + + root_key, end_pages + | {_} -> + failwithf "%s: this doesn't look like a registry hive file\n" filename + +(* Define persistent patterns to match page and block fields. *) +let bitmatch page_fields + { "hbin" : 4*8 : string; + page_offset : 4*8 + : littleendian, bind (get_offset page_offset); + page_size : 4*8 + : littleendian, check (Int32.rem page_size 4096_l = 0_l), + bind (Int32.to_int page_size); + unknown : 20*8 : bitstring; + blocks : (page_size - 32) * 8 : bitstring; + rest : -1 : bitstring } + +let fprintf_page chan bits + bitmatch bits with + | { :page_fields } -> + ignore (blocks, rest); + fprintf chan "HB %s %08x %s\n" + (print_offset page_offset) + page_size (print_bitstring unknown) + +let bitmatch block_fields + { seg_len : 4*8 + : littleendian, bind (Int32.to_int seg_len); + block_data : (abs seg_len - 4) * 8 : bitstring; + rest : -1 : bitstring } + +(* Iterate over the pages and blocks. In the process we will examine + * each page (hbin) header. Also we will build block_list which is a + * list of (block offset, length, used flag, data). + *) +let block_list = ref [] +let () + let rec loop_over_pages data data_offset + if data_offset >= end_pages then () + else ( + bitmatch data with + | { rest : -1 : bitstring } when bitstring_length rest = 0 -> () + + | { :page_fields } -> + ignore (unknown); + + assert (page_offset = data_offset); + + (* Loop over the blocks in this page. *) + loop_over_blocks blocks (data_offset + 32); + + (* Loop over rest of the pages. *) + loop_over_pages rest (data_offset + page_size) + + | {_} -> + failwithf "%s: invalid hbin at offset %s\n" + filename (print_offset data_offset) + ) + and loop_over_blocks blocks block_offset + bitmatch blocks with + | { rest : -1 : bitstring } when bitstring_length rest = 0 -> () + + | { :block_fields } -> + let used, seg_len + if seg_len < 0 then true, -seg_len else false, seg_len in + + let block = block_offset, (seg_len, used, block_data) in + block_list := block :: !block_list; + + (* Loop over the rest of the blocks in this page. *) + loop_over_blocks rest (block_offset + seg_len) + + | {_} -> + failwithf "%s: invalid block near offset %s\n" + filename (print_offset block_offset) + in + loop_over_pages data 0 + +(* Turn the block_list into a map so we can quickly look up a block + * from its offset. + *) +let block_list = !block_list +let block_map + List.fold_left ( + fun map (block_offset, block) -> IntMap.add block_offset block map + ) IntMap.empty block_list +let lookup fn offset + try + let (_, used, _) as block = IntMap.find offset block_map in + if not used then + failwithf "%s: %s: lookup: free block %s referenced from hive tree" + filename fn (print_offset offset); + block + with Not_found -> + failwithf "%s: %s: lookup: unknown block %s referenced from hive tree" + filename fn (print_offset offset) + +(* Use this to mark blocks that we've visited. If the hive contains + * no unreferenced blocks, then by the end this should just contain + * free blocks. + *) +let mark_visited, is_not_visited, unvisited_blocks + let v = ref block_map in + let mark_visited offset = v := IntMap.remove offset !v + and is_not_visited offset = IntMap.mem offset !v + and unvisited_blocks () = !v in + mark_visited, is_not_visited, unvisited_blocks + +(* Define persistent patterns to match nk-records, vk-records and + * sk-records, which are the record types that we especially want to + * analyze later. Other blocks types (eg. value lists, lf-records) + * have no "spare space" so everything is known about them and we don't + * store these. + *) +let bitmatch nk_fields + { "nk" : 2*8 : string; + (* Flags stored in the file as a little endian word, hence the + * unusual ordering: + *) + unknownflag0080 : 1; + predefinedhandle : 1; keynameascii : 1; symlinkkey : 1; + cannotbedeleted : 1; isroot : 1; ismountpoint : 1; isvolatile : 1; + unknownflag8000 : 1; unknownflag4000 : 1; + unknownflag2000 : 1; unknownflag1000 : 1; + unknownflag0800 : 1; unknownflag0400 : 1; + unknownflag0200 : 1; unknownflag0100 : 1; + timestamp : 64 : littleendian, bind (nt_to_time_t timestamp); + unknown1 : 4*8 : littleendian; + parent : 4*8 : littleendian, bind (get_offset parent); + nr_subkeys : 4*8 : littleendian, bind (Int32.to_int nr_subkeys); + nr_subkeys_vol : 4*8; + subkeys : 4*8 : littleendian, bind (get_offset subkeys); + subkeys_vol : 4*8; + nr_values : 4*8 : littleendian, bind (Int32.to_int nr_values); + vallist : 4*8 : littleendian, bind (get_offset vallist); + sk : 4*8 : littleendian, bind (get_offset sk); + classname : 4*8 : littleendian, bind (get_offset classname); + unknown2 : 4*8 : littleendian; + unknown3 : 4*8 : littleendian; + unknown4 : 4*8 : littleendian; + unknown5 : 4*8 : littleendian; + unknown6 : 4*8 : littleendian; + name_len : 2*8 : littleendian; + classname_len : 2*8 : littleendian; + name : name_len * 8 : string } + +let fprintf_nk chan nk + let (_, _, bits) = lookup "fprintf_nk" nk in + bitmatch bits with + | { :nk_fields } -> + fprintf chan + "NK %s %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s %s u%08lx %08x %d %ld %s %08lx %d %s %s %s u%08lx u%08lx u%08lx u%08lx u%08lx %d %d %s\n" + (print_offset nk) + (if unknownflag8000 then "8" else ".") + (if unknownflag4000 then "4" else ".") + (if unknownflag2000 then "2" else ".") + (if unknownflag1000 then "1" else ".") + (if unknownflag0800 then "8" else ".") + (if unknownflag0400 then "4" else ".") + (if unknownflag0200 then "2" else ".") + (if unknownflag0100 then "1" else ".") + (if unknownflag0080 then "8" else ".") + (if predefinedhandle then "P" else ".") + (if keynameascii then "A" else ".") + (if symlinkkey then "S" else ".") + (if cannotbedeleted then "N" else ".") + (if isroot then "R" else ".") + (if ismountpoint then "M" else ".") + (if isvolatile then "V" else ".") + (print_time timestamp) + unknown1 parent nr_subkeys nr_subkeys_vol + (print_offset subkeys) subkeys_vol + nr_values (print_offset vallist) + (print_offset sk) (print_offset classname) + unknown2 unknown3 unknown4 unknown5 unknown6 + name_len classname_len name + +type data_t = Inline of bitstring | Offset of int +let bitmatch vk_fields + { "vk" : 2*8 : string; + name_len : 2*8 : littleendian; + (* No one documents the important fact that data_len can have the + * top bit set (randomly or is it meaningful?). The length can + * also be 0 (or 0x80000000) if the data type is NONE. + *) + data_len : 4*8 + : littleendian, bind ( + let data_len = Int32.logand data_len 0x7fff_ffff_l in + Int32.to_int data_len + ); + (* Inline data if len <= 4, offset otherwise. *) + data : 4*8 + : bitstring, bind ( + if data_len <= 4 then + Inline (takebits (data_len*8) data) + else ( + let offset + bitmatch data with { offset : 4*8 : littleendian } -> offset in + let offset = get_offset offset in + Offset offset + ) + ); + t : 4*8 : littleendian, bind (Int32.to_int t); + (* Flags, stored as a little-endian word: *) + unknown1 : 7; nameisascii : 1; unknown2 : 8; + unknown3 : 2*8 : littleendian; + name : name_len * 8 : string } + +let fprintf_vk chan vk + let (_, _, bits) = lookup "fprintf_vk" vk in + bitmatch bits with + | { :vk_fields } -> + let data + match data with + | Inline data -> data + | Offset offset -> + let (_, _, bits) = lookup "fprintf_vk (data)" offset in + bits in + fprintf chan "VK %s %s %d %s %s u%08x %s u%08x u%08x\n" + (print_offset vk) + name data_len (print_bitstring data) (print_vk_type t) + unknown1 (if nameisascii then "A" else "L") + unknown2 unknown3 + +let bitmatch sk_fields + { "sk" : 2*8 : string; + unknown1 : 2*8 : littleendian; + sk_prev : 4*8 : littleendian, bind (get_offset sk_prev); + sk_next : 4*8 : littleendian, bind (get_offset sk_next); + refcount : 4*8 : littleendian, bind (Int32.to_int refcount); + sec_len : 4*8 : littleendian, bind (Int32.to_int sec_len); + sec_desc : sec_len * 8 : bitstring } + +let fprintf_sk chan sk + let (_, _, bits) = lookup "fprintf_sk" sk in + bitmatch bits with + | { :sk_fields } -> + ignore (sec_desc); + fprintf chan "SK %s u%04x %s %s %d %d\n" + (print_offset sk) unknown1 + (print_offset sk_prev) (print_offset sk_next) + refcount sec_len + (* print_bitstring sec_desc -- suppress this *) + +(* Store lists of records we encounter (lists of offsets). *) +let nk_records = ref [] +and vk_records = ref [] +and sk_records = ref [] + +(* Functions to visit each block, starting at the root. Each block + * that we visit is printed. + *) +let rec visit_nk ?(nk_is_root = false) nk + let (_, _, bits) = lookup "visit_nk" nk in + mark_visited nk; + (bitmatch bits with + | { :nk_fields } -> + ignore (parent, timestamp); + + nk_records := nk :: !nk_records; + + (* Check the isroot flag is only set on the root node. *) + assert (isroot = nk_is_root); + + (* Visit the values first at this node. *) + if vallist <> -1 then + visit_vallist nr_values vallist; + + (* Visit the subkeys of this node. *) + if subkeys <> -1 then ( + let counted = visit_subkeys subkeys in + if counted <> nr_subkeys then + failwithf "%s: incorrect count of subkeys (%d, counted %d) in subkey list at %s\n" + filename nr_subkeys counted (print_offset subkeys) + ); + + (* Visit the sk-record and classname. *) + if sk <> -1 then + visit_sk sk; + if classname <> -1 then + visit_classname classname classname_len; + + | {_} -> + failwithf "%s: invalid nk block at offset %s\n" + filename (print_offset nk) + ) + +and visit_vallist nr_values vallist + let (_, _, bits) = lookup "visit_vallist" vallist in + mark_visited vallist; + visit_values_in_vallist nr_values vallist bits + +and visit_values_in_vallist nr_values vallist bits + if nr_values > 0 then ( + bitmatch bits with + | { rest : -1 : bitstring } when bitstring_length rest = 0 -> + assert (nr_values = 0) + + | { value : 4*8 : littleendian, bind (get_offset value); + rest : -1 : bitstring } -> + visit_vk value; + visit_values_in_vallist (nr_values-1) vallist rest + + | {_} -> + failwithf "%s: invalid offset in value list at %s\n" + filename (print_offset vallist) + ) + +and visit_vk vk + let (_, _, bits) = lookup "visit_vk" vk in + mark_visited vk; + + (bitmatch bits with + | { :vk_fields } -> + ignore (t); + + vk_records := vk :: !vk_records; + (match data with + | Inline data -> () + | Offset offset -> + let _ = lookup "visit_vk (data)" offset in + mark_visited offset + ); + + | {_} -> + failwithf "%s: invalid vk block at offset %s\n" + filename (print_offset vk) + ) + +(* Visits subkeys, recursing through intermediate lf/lh/ri structures, + * and returns the number of subkeys actually seen. + *) +and visit_subkeys subkeys + let (_, _, bits) = lookup "visit_subkeys" subkeys in + mark_visited subkeys; + (bitmatch bits with + | { ("lf"|"lh") : 2*8 : string; + len : 2*8 : littleendian; (* number of subkeys of this node *) + rest : len*8*8 : bitstring } -> + (*printf "LF %s %d\n" (print_offset subkeys) len;*) + visit_subkeys_in_lf_list subkeys len rest + + | { "ri" : 2*8 : string; + len : 2*8 : littleendian; + rest : len*4*8 : bitstring } -> + (*printf "RI %s %d\n" (print_offset subkeys) len;*) + visit_subkeys_in_ri_list subkeys len rest + + (* In theory you can have an li-record here, but we've never + * seen one. + *) + + | { "nk" : 2*8 : string } -> + visit_nk subkeys; 1 + + | {_} -> + failwithf "%s: invalid subkey node found at %s\n" + filename (print_offset subkeys) + ) + +and visit_subkeys_in_lf_list subkeys_top len bits + if len > 0 then ( + bitmatch bits with + | { rest : -1 : bitstring } when bitstring_length rest = 0 -> + assert (len = 0); + 0 + + | { offset : 4*8 : littleendian, bind (get_offset offset); + _ (* hash *) : 4*8 : bitstring; + rest : -1 : bitstring } -> + let c1 = visit_subkeys offset in + let c2 = visit_subkeys_in_lf_list subkeys_top (len-1) rest in + c1 + c2 + + | {_} -> + failwithf "%s: invalid subkey in lf/lh list at %s\n" + filename (print_offset subkeys_top) + ) else 0 + +and visit_subkeys_in_ri_list subkeys_top len bits + if len > 0 then ( + bitmatch bits with + | { rest : -1 : bitstring } when bitstring_length rest = 0 -> + assert (len = 0); + 0 + + | { offset : 4*8 : littleendian, bind (get_offset offset); + rest : -1 : bitstring } -> + let c1 = visit_subkeys offset in + let c2 = visit_subkeys_in_ri_list subkeys_top (len-1) rest in + c1 + c2 + + | {_} -> + failwithf "%s: invalid subkey in ri list at %s\n" + filename (print_offset subkeys_top) + ) else 0 + +and visit_sk sk + let (_, _, bits) = lookup "visit_sk" sk in + if is_not_visited sk then ( + mark_visited sk; + (bitmatch bits with + | { :sk_fields } -> + ignore (sk_prev, sk_next, refcount, sec_desc); + sk_records := sk :: !sk_records + + | {_} -> + failwithf "%s: invalid sk-record at %s\n" + filename (print_offset sk) + ) + ) + +and visit_classname classname classname_len + let (seg_len, _, bits) = lookup "visit_classname" classname in + mark_visited classname; + assert (seg_len >= classname_len) + (*printf "CL %s %s\n" (print_offset classname) (print_bitstring bits)*) + +let () + visit_nk ~nk_is_root:true root_key + +(* Now after visiting all the blocks, are there any used blocks which + * are unvisited? If there are any then that would indicate either (a) + * that the hive contains unreferenced blocks, or (b) that there are + * referenced blocks that we did not visit because we don't have a full + * understanding of the hive format. + * + * Windows 7 registries often contain a few of these -- not clear + * how serious they are, but don't fail here. + *) +let () + let unvisited = unvisited_blocks () in + IntMap.iter ( + fun offset block -> + match block with + | (_, false, _) -> () (* ignore unused blocks *) + | (_, true, _) -> + eprintf "!-- used block %s is not referenced\n" + filename (print_offset offset) + ) unvisited diff --git a/hivex/visualizer_NT_time.ml b/hivex/visualizer_NT_time.ml new file mode 100644 index 0000000..a752112 --- /dev/null +++ b/hivex/visualizer_NT_time.ml @@ -0,0 +1,30 @@ +(* Windows Registry reverse-engineering tool. + * Copyright (C) 2010 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * For existing information on the registry format, please refer + * to the following documents. Note they are both incomplete + * and inaccurate in some respects. + *) + +(* Convert an NT file timestamp to time_t. See: + * http://blogs.msdn.com/oldnewthing/archive/2003/09/05/54806.aspx + * http://support.microsoft.com/kb/167296 + *) +let nt_to_time_t t + let t = Int64.sub t 116444736000000000L in + let t = Int64.div t 10000000L in + Int64.to_float t diff --git a/hivex/visualizer_utils.ml b/hivex/visualizer_utils.ml new file mode 100644 index 0000000..4abf96e --- /dev/null +++ b/hivex/visualizer_utils.ml @@ -0,0 +1,124 @@ +(* Windows Registry reverse-engineering tool. + * Copyright (C) 2010 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * For existing information on the registry format, please refer + * to the following documents. Note they are both incomplete + * and inaccurate in some respects. + *) + +open Bitstring +open ExtString +open Printf + +let failwithf fs = ksprintf failwith fs + +(* Useful function to convert unknown bitstring fragments into + * printable strings. + *) +let rec print_bitstring bits + let str = string_of_bitstring bits in + print_binary_string str +and print_binary_string str + let rec printable = function + | '\x00' -> "\\0" | '\x01' -> "\\1" | '\x02' -> "\\2" | '\x03' -> "\\3" + | '\x04' -> "\\4" | '\x05' -> "\\5" | '\x06' -> "\\6" | '\x07' -> "\\7" + | ('\x08'..'\x31' as c) + | ('\x7f'..'\xff' as c) -> sprintf "\\x%02x" (Char.code c) + | ('\x32'..'\x7e' as c) -> String.make 1 c + and repeat str = function + | n when n <= 0 -> "" + | n -> str ^ repeat str (n-1) + in + let chars = String.explode str in + let rec loop = function + | [] -> [] + | x :: (y :: _ as ys) when x = y -> + let (nr, _), ys + match loop ys with [] -> assert false | a :: b -> a, b in + (nr+1, x) :: ys + | x :: ys -> (1, x) :: loop ys + in + let frags = loop chars in + let frags + List.map (function + | (nr, x) when nr <= 4 -> repeat (printable x) nr + | (nr, x) -> sprintf "%s<%d times>" (printable x) nr + ) frags in + "\"" ^ String.concat "" frags ^ "\"" + +(* Convert an offset from the file to an offset. The only special + * thing is that 0xffffffff in the file is used as a kind of "NULL + * pointer". We map these null values to -1. + *) +let get_offset = function + | 0xffffffff_l -> -1 + | i -> Int32.to_int i + +(* Print an offset. *) +let print_offset = function + | -1 -> "NULL" + | i -> sprintf "@%08x" i + +(* Print time. *) +let print_time t + let tm = Unix.gmtime t in + sprintf "%04d-%02d-%02d %02d:%02d:%02d" + (tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday + tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec + +(* Print UTF16LE. *) +let print_utf16 str + let n = String.length str in + if n land 1 <> 0 then + print_binary_string str + else ( + let rec loop i + if i < n-1 then ( + let c1 = Char.code (str.[i]) in + let c2 = Char.code (str.[i+1]) in + if c1 <> 0 || c2 <> 0 then ( + (* Well, this doesn't print non-7bit-ASCII ... *) + let c + if c2 = 0 then String.make 1 (Char.chr c1) + else sprintf "\\u%04d" (c2 * 256 + c1) in + c :: loop (i+2) + ) else [] + ) else [] + in + let frags = loop 0 in + "L\"" ^ String.concat "" frags ^ "\"" + ) + +(* A map of int -> anything. *) +module IntMap + Map.Make (struct type t = int let compare = Pervasives.compare end) + +(* Print registry vk-record type field. *) +let print_vk_type = function + | 0 -> "NONE" + | 1 -> "SZ" + | 2 -> "EXPAND_SZ" + | 3 -> "BINARY" + | 4 -> "DWORD" + | 5 -> "DWORD_BIG_ENDIAN" + | 6 -> "LINK" + | 7 -> "MULTI_SZ" + | 8 -> "RESOURCE_LiST" + | 9 -> "FULL_RESOURCE_DESCRIPTOR" + | 10 -> "RESOURCE_REQUIREMENTS_LIST" + | 11 -> "QWORD" + | i -> sprintf "UNKNOWN_VK_TYPE_%d" i diff --git a/m4/.gitignore b/m4/.gitignore index 01c8e2d..3f1a5a4 100644 --- a/m4/.gitignore +++ b/m4/.gitignore @@ -143,3 +143,4 @@ xsize.m4 /strtoll.m4 /strtoul.m4 /strtoull.m4 +/xstrtol.m4 diff --git a/po/POTFILES.in b/po/POTFILES.in index 85892e8..5a27d5c 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -80,6 +80,8 @@ fish/tilde.c fish/time.c fuse/dircache.c fuse/guestmount.c +hivex/example1.c +hivex/example2.c hivex/hivex.c hivex/hivexget.c hivex/hivexml.c -- 1.6.5.2
Apparently Analagous Threads
- [PATCH 0/7] Prepare for adding write support to hivex (windows registry) library
- [PATCH 0/12] Add support for writing to hive files
- [PATCH febootstrap 0/8] Add support for building an ext2-based appliance
- [hivex PATCH 0/5] Fix various uninitialized data problems in hivex.
- [PATCH 0/8 v2 DISCUSSION ONLY] Connecting to live virtual machines