Jan Beulich
2009-Nov-05 14:24 UTC
[Xen-devel] [PATCH] x86/dom0: support bzip2 and lzma compressed bzImage payloads
This matches functionality in the tools already supporting the same for DomU-s. Code taken from Linux 2.6.32-rc and adjusted as little as possible to be usable in Xen. The question is whether, particularly for non-Linux Dom0-s, plain ELF images compressed by bzip2 or lzma should also be supported. Signed-off-by: Jan Beulich <jbeulich@novell.com> --- 2009-10-27.orig/xen/arch/x86/bzimage.c 2009-10-07 13:31:36.000000000 +0200 +++ 2009-10-27/xen/arch/x86/bzimage.c 2009-11-05 12:23:05.000000000 +0100 @@ -4,6 +4,7 @@ #include <xen/mm.h> #include <xen/string.h> #include <xen/types.h> +#include <xen/decompress.h> #include <asm/bzimage.h> #define HEAPORDER 3 @@ -93,28 +94,38 @@ static __init void flush_window(void) outcnt = 0; } -static __init int gzip_length(char *image, unsigned long image_len) +static __init unsigned long output_length(char *image, unsigned long image_len) { return *(uint32_t *)&image[image_len - 4]; } -static __init int perform_gunzip(char *output, char **_image_start, unsigned long *image_len) +static __init int gzip_check(char *image, unsigned long image_len) { - char *image = *_image_start; - int rc; - unsigned char magic0 = (unsigned char)image[0]; - unsigned char magic1 = (unsigned char)image[1]; + unsigned char magic0, magic1; - if ( magic0 != 0x1f || ( (magic1 != 0x8b) && (magic1 != 0x9e) ) ) + if ( image_len < 2 ) return 0; + magic0 = (unsigned char)image[0]; + magic1 = (unsigned char)image[1]; + + return (magic0 == 0x1f) && ((magic1 == 0x8b) || (magic1 == 0x9e)); +} + +static __init int perform_gunzip(char *output, char *image, unsigned long image_len) +{ + int rc; + + if ( !gzip_check(image, image_len) ) + return 1; + window = (unsigned char *)output; free_mem_ptr = (unsigned long)alloc_xenheap_pages(HEAPORDER, 0); free_mem_end_ptr = free_mem_ptr + (PAGE_SIZE << HEAPORDER); inbuf = (unsigned char *)image; - insize = *image_len; + insize = image_len; inptr = 0; makecrc(); @@ -125,8 +136,6 @@ static __init int perform_gunzip(char * } else { - *_image_start = (char *)window; - *image_len = gzip_length(image, *image_len); rc = 0; } @@ -203,9 +212,12 @@ int __init bzimage_headroom(char *image_ img = image_start + (hdr->setup_sects+1) * 512; img += hdr->payload_offset; - headroom = gzip_length(img, hdr->payload_length); - headroom += headroom >> 12; /* Add 8 bytes for every 32K input block */ - headroom += (32768 + 18); /* Add 32K + 18 bytes of extra headroom */ + headroom = output_length(img, hdr->payload_length); + if (gzip_check(img, hdr->payload_length)) { + headroom += headroom >> 12; /* Add 8 bytes for every 32K input block */ + headroom += (32768 + 18); /* Add 32K + 18 bytes of extra headroom */ + } else + headroom += hdr->payload_length; headroom = (headroom + 4095) & ~4095; return headroom; @@ -215,6 +227,7 @@ int __init bzimage_parse(char *image_bas { struct setup_header *hdr = (struct setup_header *)(*image_start); int err = bzimage_check(hdr, *image_len); + unsigned long output_len; if (err < 1) return err; @@ -224,11 +237,18 @@ int __init bzimage_parse(char *image_bas *image_start += (hdr->setup_sects+1) * 512; *image_start += hdr->payload_offset; *image_len = hdr->payload_length; + output_len = output_length(*image_start, *image_len); - if ( (err = perform_gunzip(image_base, image_start, image_len)) < 0 ) - return err; + if ( (err = perform_gunzip(image_base, *image_start, *image_len)) > 0 ) + err = decompress(*image_start, *image_len, image_base); + + if ( !err ) + { + *image_start = image_base; + *image_len = output_len; + } - return 0; + return err > 0 ? 0 : err; } /* --- 2009-10-27.orig/xen/common/Makefile 2009-05-27 13:54:07.000000000 +0200 +++ 2009-10-27/xen/common/Makefile 2009-11-05 12:26:53.000000000 +0100 @@ -35,6 +35,8 @@ obj-y += radix-tree.o obj-y += rbtree.o obj-y += lzo.o +obj-$(CONFIG_X86) += decompress.o bunzip2.o unlzma.o + obj-$(perfc) += perfc.o obj-$(crash_debug) += gdbstub.o obj-$(xenoprof) += xenoprof.o --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2009-10-27/xen/common/bunzip2.c 2009-11-05 12:44:51.000000000 +0100 @@ -0,0 +1,726 @@ +/* vi: set sw = 4 ts = 4: */ +/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net). + + Based on bzip2 decompression code by Julian R Seward (jseward@acm.org), + which also acknowledges contributions by Mike Burrows, David Wheeler, + Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten, + Robert Sedgewick, and Jon L. Bentley. + + This code is licensed under the LGPLv2: + LGPL (http://www.gnu.org/copyleft/lgpl.html +*/ + +/* + Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org). + + More efficient reading of Huffman codes, a streamlined read_bunzip() + function, and various other tweaks. In (limited) tests, approximately + 20% faster than bzcat on x86 and about 10% faster on arm. + + Note that about 2/3 of the time is spent in read_unzip() reversing + the Burrows-Wheeler transformation. Much of that time is delay + resulting from cache misses. + + I would ask that anyone benefiting from this work, especially those + using it in commercial products, consider making a donation to my local + non-profit hospice organization in the name of the woman I loved, who + passed away Feb. 12, 2003. + + In memory of Toni W. Hagan + + Hospice of Acadiana, Inc. + 2600 Johnston St., Suite 200 + Lafayette, LA 70503-3240 + + Phone (337) 232-1234 or 1-800-738-2226 + Fax (337) 232-1297 + + http://www.hospiceacadiana.com/ + + Manuel + */ + +/* + Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu) +*/ + +#include "decompress.h" + +#ifndef INT_MAX +#define INT_MAX 0x7fffffff +#endif + +/* Constants for Huffman coding */ +#define MAX_GROUPS 6 +#define GROUP_SIZE 50 /* 64 would have been more efficient */ +#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */ +#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */ +#define SYMBOL_RUNA 0 +#define SYMBOL_RUNB 1 + +/* Status return values */ +#define RETVAL_OK 0 +#define RETVAL_LAST_BLOCK (-1) +#define RETVAL_NOT_BZIP_DATA (-2) +#define RETVAL_UNEXPECTED_INPUT_EOF (-3) +#define RETVAL_UNEXPECTED_OUTPUT_EOF (-4) +#define RETVAL_DATA_ERROR (-5) +#define RETVAL_OUT_OF_MEMORY (-6) +#define RETVAL_OBSOLETE_INPUT (-7) + +/* Other housekeeping constants */ +#define BZIP2_IOBUF_SIZE 4096 + +/* This is what we know about each Huffman coding group */ +struct group_data { + /* We have an extra slot at the end of limit[] for a sentinal value. */ + int limit[MAX_HUFCODE_BITS+1]; + int base[MAX_HUFCODE_BITS]; + int permute[MAX_SYMBOLS]; + int minLen, maxLen; +}; + +/* Structure holding all the housekeeping data, including IO buffers and + memory that persists between calls to bunzip */ +struct bunzip_data { + /* State for interrupting output loop */ + int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; + /* I/O tracking data (file handles, buffers, positions, etc.) */ + int (*fill)(void*, unsigned int); + int inbufCount, inbufPos /*, outbufPos*/; + unsigned char *inbuf /*,*outbuf*/; + unsigned int inbufBitCount, inbufBits; + /* The CRC values stored in the block header and calculated from the + data */ + unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC; + /* Intermediate buffer and its size (in bytes) */ + unsigned int *dbuf, dbufSize; + /* These things are a bit too big to go on the stack */ + unsigned char selectors[32768]; /* nSelectors = 15 bits */ + struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ + int io_error; /* non-zero if we have IO error */ +}; + + +/* Return the next nnn bits of input. All reads from the compressed input + are done through this function. All reads are big endian */ +static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted) +{ + unsigned int bits = 0; + + /* If we need to get more data from the byte buffer, do so. + (Loop getting one byte at a time to enforce endianness and avoid + unaligned access.) */ + while (bd->inbufBitCount < bits_wanted) { + /* If we need to read more data from file into byte buffer, do + so */ + if (bd->inbufPos == bd->inbufCount) { + if (bd->io_error) + return 0; + bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE); + if (bd->inbufCount <= 0) { + bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF; + return 0; + } + bd->inbufPos = 0; + } + /* Avoid 32-bit overflow (dump bit buffer to top of output) */ + if (bd->inbufBitCount >= 24) { + bits = bd->inbufBits&((1 << bd->inbufBitCount)-1); + bits_wanted -= bd->inbufBitCount; + bits <<= bits_wanted; + bd->inbufBitCount = 0; + } + /* Grab next 8 bits of input from buffer. */ + bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++]; + bd->inbufBitCount += 8; + } + /* Calculate result */ + bd->inbufBitCount -= bits_wanted; + bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1); + + return bits; +} + +/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ + +static int INIT get_next_block(struct bunzip_data *bd) +{ + struct group_data *hufGroup = NULL; + int *base = NULL; + int *limit = NULL; + int dbufCount, nextSym, dbufSize, groupCount, selector, + i, j, k, t, runPos, symCount, symTotal, nSelectors, + byteCount[256]; + unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; + unsigned int *dbuf, origPtr; + + dbuf = bd->dbuf; + dbufSize = bd->dbufSize; + selectors = bd->selectors; + + /* Read in header signature and CRC, then validate signature. + (last block signature means CRC is for whole file, return now) */ + i = get_bits(bd, 24); + j = get_bits(bd, 24); + bd->headerCRC = get_bits(bd, 32); + if ((i == 0x177245) && (j == 0x385090)) + return RETVAL_LAST_BLOCK; + if ((i != 0x314159) || (j != 0x265359)) + return RETVAL_NOT_BZIP_DATA; + /* We can add support for blockRandomised if anybody complains. + There was some code for this in busybox 1.0.0-pre3, but nobody ever + noticed that it didn''t actually work. */ + if (get_bits(bd, 1)) + return RETVAL_OBSOLETE_INPUT; + origPtr = get_bits(bd, 24); + if (origPtr > dbufSize) + return RETVAL_DATA_ERROR; + /* mapping table: if some byte values are never used (encoding things + like ascii text), the compression code removes the gaps to have fewer + symbols to deal with, and writes a sparse bitfield indicating which + values were present. We make a translation table to convert the + symbols back to the corresponding bytes. */ + t = get_bits(bd, 16); + symTotal = 0; + for (i = 0; i < 16; i++) { + if (t&(1 << (15-i))) { + k = get_bits(bd, 16); + for (j = 0; j < 16; j++) + if (k&(1 << (15-j))) + symToByte[symTotal++] = (16*i)+j; + } + } + /* How many different Huffman coding groups does this block use? */ + groupCount = get_bits(bd, 3); + if (groupCount < 2 || groupCount > MAX_GROUPS) + return RETVAL_DATA_ERROR; + /* nSelectors: Every GROUP_SIZE many symbols we select a new + Huffman coding group. Read in the group selector list, + which is stored as MTF encoded bit runs. (MTF = Move To + Front, as each value is used it''s moved to the start of the + list.) */ + nSelectors = get_bits(bd, 15); + if (!nSelectors) + return RETVAL_DATA_ERROR; + for (i = 0; i < groupCount; i++) + mtfSymbol[i] = i; + for (i = 0; i < nSelectors; i++) { + /* Get next value */ + for (j = 0; get_bits(bd, 1); j++) + if (j >= groupCount) + return RETVAL_DATA_ERROR; + /* Decode MTF to get the next selector */ + uc = mtfSymbol[j]; + for (; j; j--) + mtfSymbol[j] = mtfSymbol[j-1]; + mtfSymbol[0] = selectors[i] = uc; + } + /* Read the Huffman coding tables for each group, which code + for symTotal literal symbols, plus two run symbols (RUNA, + RUNB) */ + symCount = symTotal+2; + for (j = 0; j < groupCount; j++) { + unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1]; + int minLen, maxLen, pp; + /* Read Huffman code lengths for each symbol. They''re + stored in a way similar to mtf; record a starting + value for the first symbol, and an offset from the + previous value for everys symbol after that. + (Subtracting 1 before the loop and then adding it + back at the end is an optimization that makes the + test inside the loop simpler: symbol length 0 + becomes negative, so an unsigned inequality catches + it.) */ + t = get_bits(bd, 5)-1; + for (i = 0; i < symCount; i++) { + for (;;) { + if (((unsigned)t) > (MAX_HUFCODE_BITS-1)) + return RETVAL_DATA_ERROR; + + /* If first bit is 0, stop. Else + second bit indicates whether to + increment or decrement the value. + Optimization: grab 2 bits and unget + the second if the first was 0. */ + + k = get_bits(bd, 2); + if (k < 2) { + bd->inbufBitCount++; + break; + } + /* Add one if second bit 1, else + * subtract 1. Avoids if/else */ + t += (((k+1)&2)-1); + } + /* Correct for the initial -1, to get the + * final symbol length */ + length[i] = t+1; + } + /* Find largest and smallest lengths in this group */ + minLen = maxLen = length[0]; + + for (i = 1; i < symCount; i++) { + if (length[i] > maxLen) + maxLen = length[i]; + else if (length[i] < minLen) + minLen = length[i]; + } + + /* Calculate permute[], base[], and limit[] tables from + * length[]. + * + * permute[] is the lookup table for converting + * Huffman coded symbols into decoded symbols. base[] + * is the amount to subtract from the value of a + * Huffman symbol of a given length when using + * permute[]. + * + * limit[] indicates the largest numerical value a + * symbol with a given number of bits can have. This + * is how the Huffman codes can vary in length: each + * code with a value > limit[length] needs another + * bit. + */ + hufGroup = bd->groups+j; + hufGroup->minLen = minLen; + hufGroup->maxLen = maxLen; + /* Note that minLen can''t be smaller than 1, so we + adjust the base and limit array pointers so we''re + not always wasting the first entry. We do this + again when using them (during symbol decoding).*/ + base = hufGroup->base-1; + limit = hufGroup->limit-1; + /* Calculate permute[]. Concurently, initialize + * temp[] and limit[]. */ + pp = 0; + for (i = minLen; i <= maxLen; i++) { + temp[i] = limit[i] = 0; + for (t = 0; t < symCount; t++) + if (length[t] == i) + hufGroup->permute[pp++] = t; + } + /* Count symbols coded for at each bit length */ + for (i = 0; i < symCount; i++) + temp[length[i]]++; + /* Calculate limit[] (the largest symbol-coding value + *at each bit length, which is (previous limit << + *1)+symbols at this level), and base[] (number of + *symbols to ignore at each bit length, which is limit + *minus the cumulative count of symbols coded for + *already). */ + pp = t = 0; + for (i = minLen; i < maxLen; i++) { + pp += temp[i]; + /* We read the largest possible symbol size + and then unget bits after determining how + many we need, and those extra bits could be + set to anything. (They''re noise from + future symbols.) At each level we''re + really only interested in the first few + bits, so here we set all the trailing + to-be-ignored bits to 1 so they don''t + affect the value > limit[length] + comparison. */ + limit[i] = (pp << (maxLen - i)) - 1; + pp <<= 1; + base[i+1] = pp-(t += temp[i]); + } + limit[maxLen+1] = INT_MAX; /* Sentinal value for + * reading next sym. */ + limit[maxLen] = pp+temp[maxLen]-1; + base[minLen] = 0; + } + /* We''ve finished reading and digesting the block header. Now + read this block''s Huffman coded symbols from the file and + undo the Huffman coding and run length encoding, saving the + result into dbuf[dbufCount++] = uc */ + + /* Initialize symbol occurrence counters and symbol Move To + * Front table */ + for (i = 0; i < 256; i++) { + byteCount[i] = 0; + mtfSymbol[i] = (unsigned char)i; + } + /* Loop through compressed symbols. */ + runPos = dbufCount = symCount = selector = 0; + for (;;) { + /* Determine which Huffman coding group to use. */ + if (!(symCount--)) { + symCount = GROUP_SIZE-1; + if (selector >= nSelectors) + return RETVAL_DATA_ERROR; + hufGroup = bd->groups+selectors[selector++]; + base = hufGroup->base-1; + limit = hufGroup->limit-1; + } + /* Read next Huffman-coded symbol. */ + /* Note: It is far cheaper to read maxLen bits and + back up than it is to read minLen bits and then an + additional bit at a time, testing as we go. + Because there is a trailing last block (with file + CRC), there is no danger of the overread causing an + unexpected EOF for a valid compressed file. As a + further optimization, we do the read inline + (falling back to a call to get_bits if the buffer + runs dry). The following (up to got_huff_bits:) is + equivalent to j = get_bits(bd, hufGroup->maxLen); + */ + while (bd->inbufBitCount < hufGroup->maxLen) { + if (bd->inbufPos == bd->inbufCount) { + j = get_bits(bd, hufGroup->maxLen); + goto got_huff_bits; + } + bd->inbufBits + (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++]; + bd->inbufBitCount += 8; + }; + bd->inbufBitCount -= hufGroup->maxLen; + j = (bd->inbufBits >> bd->inbufBitCount)& + ((1 << hufGroup->maxLen)-1); +got_huff_bits: + /* Figure how how many bits are in next symbol and + * unget extras */ + i = hufGroup->minLen; + while (j > limit[i]) + ++i; + bd->inbufBitCount += (hufGroup->maxLen - i); + /* Huffman decode value to get nextSym (with bounds checking) */ + if ((i > hufGroup->maxLen) + || (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i])) + >= MAX_SYMBOLS)) + return RETVAL_DATA_ERROR; + nextSym = hufGroup->permute[j]; + /* We have now decoded the symbol, which indicates + either a new literal byte, or a repeated run of the + most recent literal byte. First, check if nextSym + indicates a repeated run, and if so loop collecting + how many times to repeat the last literal. */ + if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */ + /* If this is the start of a new run, zero out + * counter */ + if (!runPos) { + runPos = 1; + t = 0; + } + /* Neat trick that saves 1 symbol: instead of + or-ing 0 or 1 at each bit position, add 1 + or 2 instead. For example, 1011 is 1 << 0 + + 1 << 1 + 2 << 2. 1010 is 2 << 0 + 2 << 1 + + 1 << 2. You can make any bit pattern + that way using 1 less symbol than the basic + or 0/1 method (except all bits 0, which + would use no symbols, but a run of length 0 + doesn''t mean anything in this context). + Thus space is saved. */ + t += (runPos << nextSym); + /* +runPos if RUNA; +2*runPos if RUNB */ + + runPos <<= 1; + continue; + } + /* When we hit the first non-run symbol after a run, + we now know how many times to repeat the last + literal, so append that many copies to our buffer + of decoded symbols (dbuf) now. (The last literal + used is the one at the head of the mtfSymbol + array.) */ + if (runPos) { + runPos = 0; + if (dbufCount+t >= dbufSize) + return RETVAL_DATA_ERROR; + + uc = symToByte[mtfSymbol[0]]; + byteCount[uc] += t; + while (t--) + dbuf[dbufCount++] = uc; + } + /* Is this the terminating symbol? */ + if (nextSym > symTotal) + break; + /* At this point, nextSym indicates a new literal + character. Subtract one to get the position in the + MTF array at which this literal is currently to be + found. (Note that the result can''t be -1 or 0, + because 0 and 1 are RUNA and RUNB. But another + instance of the first symbol in the mtf array, + position 0, would have been handled as part of a + run above. Therefore 1 unused mtf position minus 2 + non-literal nextSym values equals -1.) */ + if (dbufCount >= dbufSize) + return RETVAL_DATA_ERROR; + i = nextSym - 1; + uc = mtfSymbol[i]; + /* Adjust the MTF array. Since we typically expect to + *move only a small number of symbols, and are bound + *by 256 in any case, using memmove here would + *typically be bigger and slower due to function call + *overhead and other assorted setup costs. */ + do { + mtfSymbol[i] = mtfSymbol[i-1]; + } while (--i); + mtfSymbol[0] = uc; + uc = symToByte[uc]; + /* We have our literal byte. Save it into dbuf. */ + byteCount[uc]++; + dbuf[dbufCount++] = (unsigned int)uc; + } + /* At this point, we''ve read all the Huffman-coded symbols + (and repeated runs) for this block from the input stream, + and decoded them into the intermediate buffer. There are + dbufCount many decoded bytes in dbuf[]. Now undo the + Burrows-Wheeler transform on dbuf. See + http://dogma.net/markn/articles/bwt/bwt.htm + */ + /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ + j = 0; + for (i = 0; i < 256; i++) { + k = j+byteCount[i]; + byteCount[i] = j; + j = k; + } + /* Figure out what order dbuf would be in if we sorted it. */ + for (i = 0; i < dbufCount; i++) { + uc = (unsigned char)(dbuf[i] & 0xff); + dbuf[byteCount[uc]] |= (i << 8); + byteCount[uc]++; + } + /* Decode first byte by hand to initialize "previous" byte. + Note that it doesn''t get output, and if the first three + characters are identical it doesn''t qualify as a run (hence + writeRunCountdown = 5). */ + if (dbufCount) { + if (origPtr >= dbufCount) + return RETVAL_DATA_ERROR; + bd->writePos = dbuf[origPtr]; + bd->writeCurrent = (unsigned char)(bd->writePos&0xff); + bd->writePos >>= 8; + bd->writeRunCountdown = 5; + } + bd->writeCount = dbufCount; + + return RETVAL_OK; +} + +/* Undo burrows-wheeler transform on intermediate buffer to produce output. + If start_bunzip was initialized with out_fd =-1, then up to len bytes of + data are written to outbuf. Return value is number of bytes written or + error (all errors are negative numbers). If out_fd!=-1, outbuf and len + are ignored, data is written to out_fd and return is RETVAL_OK or error. +*/ + +static int INIT read_bunzip(struct bunzip_data *bd, unsigned char *outbuf, int len) +{ + const unsigned int *dbuf; + int pos, xcurrent, previous, gotcount; + + /* If last read was short due to end of file, return last block now */ + if (bd->writeCount < 0) + return bd->writeCount; + + gotcount = 0; + dbuf = bd->dbuf; + pos = bd->writePos; + xcurrent = bd->writeCurrent; + + /* We will always have pending decoded data to write into the output + buffer unless this is the very first call (in which case we haven''t + Huffman-decoded a block into the intermediate buffer yet). */ + + if (bd->writeCopies) { + /* Inside the loop, writeCopies means extra copies (beyond 1) */ + --bd->writeCopies; + /* Loop outputting bytes */ + for (;;) { + /* If the output buffer is full, snapshot + * state and return */ + if (gotcount >= len) { + bd->writePos = pos; + bd->writeCurrent = xcurrent; + bd->writeCopies++; + return len; + } + /* Write next byte into output buffer, updating CRC */ + outbuf[gotcount++] = xcurrent; + bd->writeCRC = (((bd->writeCRC) << 8) + ^bd->crc32Table[((bd->writeCRC) >> 24) + ^xcurrent]); + /* Loop now if we''re outputting multiple + * copies of this byte */ + if (bd->writeCopies) { + --bd->writeCopies; + continue; + } +decode_next_byte: + if (!bd->writeCount--) + break; + /* Follow sequence vector to undo + * Burrows-Wheeler transform */ + previous = xcurrent; + pos = dbuf[pos]; + xcurrent = pos&0xff; + pos >>= 8; + /* After 3 consecutive copies of the same + byte, the 4th is a repeat count. We count + down from 4 instead *of counting up because + testing for non-zero is faster */ + if (--bd->writeRunCountdown) { + if (xcurrent != previous) + bd->writeRunCountdown = 4; + } else { + /* We have a repeated run, this byte + * indicates the count */ + bd->writeCopies = xcurrent; + xcurrent = previous; + bd->writeRunCountdown = 5; + /* Sometimes there are just 3 bytes + * (run length 0) */ + if (!bd->writeCopies) + goto decode_next_byte; + /* Subtract the 1 copy we''d output + * anyway to get extras */ + --bd->writeCopies; + } + } + /* Decompression of this block completed successfully */ + bd->writeCRC = ~bd->writeCRC; + bd->totalCRC = ((bd->totalCRC << 1) | + (bd->totalCRC >> 31)) ^ bd->writeCRC; + /* If this block had a CRC error, force file level CRC error. */ + if (bd->writeCRC != bd->headerCRC) { + bd->totalCRC = bd->headerCRC+1; + return RETVAL_LAST_BLOCK; + } + } + + /* Refill the intermediate buffer by Huffman-decoding next + * block of input */ + /* (previous is just a convenient unused temp variable here) */ + previous = get_next_block(bd); + if (previous) { + bd->writeCount = previous; + return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount; + } + bd->writeCRC = 0xffffffffUL; + pos = bd->writePos; + xcurrent = bd->writeCurrent; + goto decode_next_byte; +} + +static int INIT nofill(void *buf, unsigned int len) +{ + return -1; +} + +/* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain + a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are + ignored, and data is read from file handle into temporary buffer. */ +static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len, + int (*fill)(void*, unsigned int)) +{ + struct bunzip_data *bd; + unsigned int i, j, c; + const unsigned int BZh0 + (((unsigned int)''B'') << 24)+(((unsigned int)''Z'') << 16) + +(((unsigned int)''h'') << 8)+(unsigned int)''0''; + + /* Figure out how much data to allocate */ + i = sizeof(struct bunzip_data); + + /* Allocate bunzip_data. Most fields initialize to zero. */ + bd = *bdp = malloc(i); + memset(bd, 0, sizeof(struct bunzip_data)); + /* Setup input buffer */ + bd->inbuf = inbuf; + bd->inbufCount = len; + if (fill != NULL) + bd->fill = fill; + else + bd->fill = nofill; + + /* Init the CRC32 table (big endian) */ + for (i = 0; i < 256; i++) { + c = i << 24; + for (j = 8; j; j--) + c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1); + bd->crc32Table[i] = c; + } + + /* Ensure that file starts with "BZh[''1''-''9'']." */ + i = get_bits(bd, 32); + if (((unsigned int)(i-BZh0-1)) >= 9) + return RETVAL_NOT_BZIP_DATA; + + /* Fourth byte (ascii ''1''-''9''), indicates block size in units of 100k of + uncompressed data. Allocate intermediate buffer for block. */ + bd->dbufSize = 100000*(i-BZh0); + + bd->dbuf = large_malloc(bd->dbufSize * sizeof(int)); + return RETVAL_OK; +} + +/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data, + not end of file.) */ +STATIC int INIT bunzip2(unsigned char *buf, unsigned int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *outbuf, + unsigned int *pos, + void(*error_fn)(const char *x)) +{ + struct bunzip_data *bd; + int i = -1; + unsigned char *inbuf; + + set_error_fn(error_fn); + if (flush) + outbuf = malloc(BZIP2_IOBUF_SIZE); + + if (!outbuf) { + error("Could not allocate output bufer"); + return -1; + } + if (buf) + inbuf = buf; + else + inbuf = malloc(BZIP2_IOBUF_SIZE); + if (!inbuf) { + error("Could not allocate input bufer"); + goto exit_0; + } + i = start_bunzip(&bd, inbuf, len, fill); + if (!i) { + for (;;) { + i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE); + if (i <= 0) + break; + if (!flush) + outbuf += i; + else + if (i != flush(outbuf, i)) { + i = RETVAL_UNEXPECTED_OUTPUT_EOF; + break; + } + } + } + /* Check CRC and release memory */ + if (i == RETVAL_LAST_BLOCK) { + if (bd->headerCRC != bd->totalCRC) + error("Data integrity error when decompressing."); + else + i = RETVAL_OK; + } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) { + error("Compressed file ends unexpectedly"); + } + if (bd->dbuf) + large_free(bd->dbuf); + if (pos) + *pos = bd->inbufPos; + free(bd); + if (!buf) + free(inbuf); +exit_0: + if (flush) + free(outbuf); + return i; +} --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2009-10-27/xen/common/decompress.c 2009-11-05 12:27:10.000000000 +0100 @@ -0,0 +1,27 @@ +#include <xen/config.h> +#include <xen/init.h> +#include <xen/lib.h> +#include <xen/string.h> +#include <xen/decompress.h> + +static void __init error(const char *msg) +{ + printk("%s\n", msg); +} + +int __init decompress(void *inbuf, unsigned int len, void *outbuf) +{ +#if 0 /* Not needed here yet. */ + if ( len >= 2 && + (!memcmp(inbuf, "\037\213", 2) || !memcmp(inbuf, "\037\236", 2)) ) + return gunzip(inbuf, len, NULL, NULL, outbuf, NULL, error); +#endif + + if ( len >= 3 && !memcmp(inbuf, "\x42\x5a\x68", 3) ) + return bunzip2(inbuf, len, NULL, NULL, outbuf, NULL, error); + + if ( len >= 2 && !memcmp(inbuf, "\135\000", 2) ) + return unlzma(inbuf, len, NULL, NULL, outbuf, NULL, error); + + return 1; +} --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2009-10-27/xen/common/decompress.h 2009-11-05 15:21:52.000000000 +0100 @@ -0,0 +1,19 @@ +#include <xen/config.h> +#include <xen/cache.h> +#include <xen/decompress.h> +#include <xen/init.h> +#include <xen/string.h> +#include <xen/types.h> +#include <xen/xmalloc.h> + +#define STATIC +#define INIT __init + +static void(*__initdata error)(const char *); +#define set_error_fn(x) error = x; + +#define malloc xmalloc_bytes +#define free xfree + +#define large_malloc xmalloc_bytes +#define large_free xfree --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2009-10-27/xen/common/unlzma.c 2009-11-05 12:45:37.000000000 +0100 @@ -0,0 +1,647 @@ +/* Lzma decompressor for Linux kernel. Shamelessly snarfed + * from busybox 1.1.1 + * + * Linux kernel adaptation + * Copyright (C) 2006 Alain < alain@knaff.lu > + * + * Based on small lzma deflate implementation/Small range coder + * implementation for lzma. + * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + * Copyright (C) 1999-2005 Igor Pavlov + * + * Copyrights of the parts, see headers below. + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "decompress.h" + +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +static long long INIT read_int(unsigned char *ptr, int size) +{ + int i; + long long ret = 0; + + for (i = 0; i < size; i++) + ret = (ret << 8) | ptr[size-i-1]; + return ret; +} + +#define ENDIAN_CONVERT(x) \ + x = (typeof(x))read_int((unsigned char *)&x, sizeof(x)) + + +/* Small range coder implementation for lzma. + * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + * Copyright (c) 1999-2005 Igor Pavlov + */ + +#include <xen/compiler.h> + +#define LZMA_IOBUF_SIZE 0x10000 + +struct rc { + int (*fill)(void*, unsigned int); + uint8_t *ptr; + uint8_t *buffer; + uint8_t *buffer_end; + int buffer_size; + uint32_t code; + uint32_t range; + uint32_t bound; +}; + + +#define RC_TOP_BITS 24 +#define RC_MOVE_BITS 5 +#define RC_MODEL_TOTAL_BITS 11 + + +static int nofill(void *buffer, unsigned int len) +{ + return -1; +} + +/* Called twice: once at startup and once in rc_normalize() */ +static void INIT rc_read(struct rc *rc) +{ + rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE); + if (rc->buffer_size <= 0) + error("unexpected EOF"); + rc->ptr = rc->buffer; + rc->buffer_end = rc->buffer + rc->buffer_size; +} + +/* Called once */ +static inline void INIT rc_init(struct rc *rc, + int (*fill)(void*, unsigned int), + unsigned char *buffer, int buffer_size) +{ + if (fill) + rc->fill = fill; + else + rc->fill = nofill; + rc->buffer = (uint8_t *)buffer; + rc->buffer_size = buffer_size; + rc->buffer_end = rc->buffer + rc->buffer_size; + rc->ptr = rc->buffer; + + rc->code = 0; + rc->range = 0xFFFFFFFF; +} + +static inline void INIT rc_init_code(struct rc *rc) +{ + int i; + + for (i = 0; i < 5; i++) { + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->code = (rc->code << 8) | *rc->ptr++; + } +} + + +/* Called once. TODO: bb_maybe_free() */ +static inline void INIT rc_free(struct rc *rc) +{ + free(rc->buffer); +} + +/* Called twice, but one callsite is in inline''d rc_is_bit_0_helper() */ +static void INIT rc_do_normalize(struct rc *rc) +{ + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->range <<= 8; + rc->code = (rc->code << 8) | *rc->ptr++; +} +static inline void INIT rc_normalize(struct rc *rc) +{ + if (rc->range < (1 << RC_TOP_BITS)) + rc_do_normalize(rc); +} + +/* Called 9 times */ +/* Why rc_is_bit_0_helper exists? + *Because we want to always expose (rc->code < rc->bound) to optimizer + */ +static inline uint32_t INIT rc_is_bit_0_helper(struct rc *rc, uint16_t *p) +{ + rc_normalize(rc); + rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); + return rc->bound; +} +static inline int INIT rc_is_bit_0(struct rc *rc, uint16_t *p) +{ + uint32_t t = rc_is_bit_0_helper(rc, p); + return rc->code < t; +} + +/* Called ~10 times, but very small, thus inlined */ +static inline void INIT rc_update_bit_0(struct rc *rc, uint16_t *p) +{ + rc->range = rc->bound; + *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; +} +static inline void rc_update_bit_1(struct rc *rc, uint16_t *p) +{ + rc->range -= rc->bound; + rc->code -= rc->bound; + *p -= *p >> RC_MOVE_BITS; +} + +/* Called 4 times in unlzma loop */ +static int INIT rc_get_bit(struct rc *rc, uint16_t *p, int *symbol) +{ + if (rc_is_bit_0(rc, p)) { + rc_update_bit_0(rc, p); + *symbol *= 2; + return 0; + } else { + rc_update_bit_1(rc, p); + *symbol = *symbol * 2 + 1; + return 1; + } +} + +/* Called once */ +static inline int INIT rc_direct_bit(struct rc *rc) +{ + rc_normalize(rc); + rc->range >>= 1; + if (rc->code >= rc->range) { + rc->code -= rc->range; + return 1; + } + return 0; +} + +/* Called twice */ +static inline void INIT +rc_bit_tree_decode(struct rc *rc, uint16_t *p, int num_levels, int *symbol) +{ + int i = num_levels; + + *symbol = 1; + while (i--) + rc_get_bit(rc, p + *symbol, symbol); + *symbol -= 1 << num_levels; +} + + +/* + * Small lzma deflate implementation. + * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + * Copyright (C) 1999-2005 Igor Pavlov + */ + + +struct lzma_header { + uint8_t pos; + uint32_t dict_size; + uint64_t dst_size; +} __attribute__ ((packed)) ; + + +#define LZMA_BASE_SIZE 1846 +#define LZMA_LIT_SIZE 768 + +#define LZMA_NUM_POS_BITS_MAX 4 + +#define LZMA_LEN_NUM_LOW_BITS 3 +#define LZMA_LEN_NUM_MID_BITS 3 +#define LZMA_LEN_NUM_HIGH_BITS 8 + +#define LZMA_LEN_CHOICE 0 +#define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1) +#define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1) +#define LZMA_LEN_MID (LZMA_LEN_LOW \ + + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))) +#define LZMA_LEN_HIGH (LZMA_LEN_MID \ + +(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))) +#define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)) + +#define LZMA_NUM_STATES 12 +#define LZMA_NUM_LIT_STATES 7 + +#define LZMA_START_POS_MODEL_INDEX 4 +#define LZMA_END_POS_MODEL_INDEX 14 +#define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1)) + +#define LZMA_NUM_POS_SLOT_BITS 6 +#define LZMA_NUM_LEN_TO_POS_STATES 4 + +#define LZMA_NUM_ALIGN_BITS 4 + +#define LZMA_MATCH_MIN_LEN 2 + +#define LZMA_IS_MATCH 0 +#define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) +#define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES) +#define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES) +#define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES) +#define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES) +#define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \ + + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) +#define LZMA_SPEC_POS (LZMA_POS_SLOT \ + +(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)) +#define LZMA_ALIGN (LZMA_SPEC_POS \ + + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX) +#define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)) +#define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS) +#define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS) + + +struct writer { + uint8_t *buffer; + uint8_t previous_byte; + size_t buffer_pos; + int bufsize; + size_t global_pos; + int(*flush)(void*, unsigned int); + struct lzma_header *header; +}; + +struct cstate { + int state; + uint32_t rep0, rep1, rep2, rep3; +}; + +static inline size_t INIT get_pos(struct writer *wr) +{ + return + wr->global_pos + wr->buffer_pos; +} + +static inline uint8_t INIT peek_old_byte(struct writer *wr, + uint32_t offs) +{ + if (!wr->flush) { + int32_t pos; + while (offs > wr->header->dict_size) + offs -= wr->header->dict_size; + pos = wr->buffer_pos - offs; + return wr->buffer[pos]; + } else { + uint32_t pos = wr->buffer_pos - offs; + while (pos >= wr->header->dict_size) + pos += wr->header->dict_size; + return wr->buffer[pos]; + } + +} + +static inline void INIT write_byte(struct writer *wr, uint8_t byte) +{ + wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte; + if (wr->flush && wr->buffer_pos == wr->header->dict_size) { + wr->buffer_pos = 0; + wr->global_pos += wr->header->dict_size; + wr->flush((char *)wr->buffer, wr->header->dict_size); + } +} + + +static inline void INIT copy_byte(struct writer *wr, uint32_t offs) +{ + write_byte(wr, peek_old_byte(wr, offs)); +} + +static inline void INIT copy_bytes(struct writer *wr, + uint32_t rep0, int len) +{ + do { + copy_byte(wr, rep0); + len--; + } while (len != 0 && wr->buffer_pos < wr->header->dst_size); +} + +static inline void INIT process_bit0(struct writer *wr, struct rc *rc, + struct cstate *cst, uint16_t *p, + int pos_state, uint16_t *prob, + int lc, uint32_t literal_pos_mask) { + int mi = 1; + rc_update_bit_0(rc, prob); + prob = (p + LZMA_LITERAL + + (LZMA_LIT_SIZE + * (((get_pos(wr) & literal_pos_mask) << lc) + + (wr->previous_byte >> (8 - lc)))) + ); + + if (cst->state >= LZMA_NUM_LIT_STATES) { + int match_byte = peek_old_byte(wr, cst->rep0); + do { + int bit; + uint16_t *prob_lit; + + match_byte <<= 1; + bit = match_byte & 0x100; + prob_lit = prob + 0x100 + bit + mi; + if (rc_get_bit(rc, prob_lit, &mi)) { + if (!bit) + break; + } else { + if (bit) + break; + } + } while (mi < 0x100); + } + while (mi < 0x100) { + uint16_t *prob_lit = prob + mi; + rc_get_bit(rc, prob_lit, &mi); + } + write_byte(wr, mi); + if (cst->state < 4) + cst->state = 0; + else if (cst->state < 10) + cst->state -= 3; + else + cst->state -= 6; +} + +static inline void INIT process_bit1(struct writer *wr, struct rc *rc, + struct cstate *cst, uint16_t *p, + int pos_state, uint16_t *prob) { + int offset; + uint16_t *prob_len; + int num_bits; + int len; + + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + cst->rep3 = cst->rep2; + cst->rep2 = cst->rep1; + cst->rep1 = cst->rep0; + cst->state = cst->state < LZMA_NUM_LIT_STATES ? 0 : 3; + prob = p + LZMA_LEN_CODER; + } else { + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G0 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + prob = (p + LZMA_IS_REP_0_LONG + + (cst->state << + LZMA_NUM_POS_BITS_MAX) + + pos_state); + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + + cst->state = cst->state < LZMA_NUM_LIT_STATES ? + 9 : 11; + copy_byte(wr, cst->rep0); + return; + } else { + rc_update_bit_1(rc, prob); + } + } else { + uint32_t distance; + + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G1 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + distance = cst->rep1; + } else { + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G2 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + distance = cst->rep2; + } else { + rc_update_bit_1(rc, prob); + distance = cst->rep3; + cst->rep3 = cst->rep2; + } + cst->rep2 = cst->rep1; + } + cst->rep1 = cst->rep0; + cst->rep0 = distance; + } + cst->state = cst->state < LZMA_NUM_LIT_STATES ? 8 : 11; + prob = p + LZMA_REP_LEN_CODER; + } + + prob_len = prob + LZMA_LEN_CHOICE; + if (rc_is_bit_0(rc, prob_len)) { + rc_update_bit_0(rc, prob_len); + prob_len = (prob + LZMA_LEN_LOW + + (pos_state << + LZMA_LEN_NUM_LOW_BITS)); + offset = 0; + num_bits = LZMA_LEN_NUM_LOW_BITS; + } else { + rc_update_bit_1(rc, prob_len); + prob_len = prob + LZMA_LEN_CHOICE_2; + if (rc_is_bit_0(rc, prob_len)) { + rc_update_bit_0(rc, prob_len); + prob_len = (prob + LZMA_LEN_MID + + (pos_state << + LZMA_LEN_NUM_MID_BITS)); + offset = 1 << LZMA_LEN_NUM_LOW_BITS; + num_bits = LZMA_LEN_NUM_MID_BITS; + } else { + rc_update_bit_1(rc, prob_len); + prob_len = prob + LZMA_LEN_HIGH; + offset = ((1 << LZMA_LEN_NUM_LOW_BITS) + + (1 << LZMA_LEN_NUM_MID_BITS)); + num_bits = LZMA_LEN_NUM_HIGH_BITS; + } + } + + rc_bit_tree_decode(rc, prob_len, num_bits, &len); + len += offset; + + if (cst->state < 4) { + int pos_slot; + + cst->state += LZMA_NUM_LIT_STATES; + prob + p + LZMA_POS_SLOT + + ((len < + LZMA_NUM_LEN_TO_POS_STATES ? len : + LZMA_NUM_LEN_TO_POS_STATES - 1) + << LZMA_NUM_POS_SLOT_BITS); + rc_bit_tree_decode(rc, prob, + LZMA_NUM_POS_SLOT_BITS, + &pos_slot); + if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { + int i, mi; + num_bits = (pos_slot >> 1) - 1; + cst->rep0 = 2 | (pos_slot & 1); + if (pos_slot < LZMA_END_POS_MODEL_INDEX) { + cst->rep0 <<= num_bits; + prob = p + LZMA_SPEC_POS + + cst->rep0 - pos_slot - 1; + } else { + num_bits -= LZMA_NUM_ALIGN_BITS; + while (num_bits--) + cst->rep0 = (cst->rep0 << 1) | + rc_direct_bit(rc); + prob = p + LZMA_ALIGN; + cst->rep0 <<= LZMA_NUM_ALIGN_BITS; + num_bits = LZMA_NUM_ALIGN_BITS; + } + i = 1; + mi = 1; + while (num_bits--) { + if (rc_get_bit(rc, prob + mi, &mi)) + cst->rep0 |= i; + i <<= 1; + } + } else + cst->rep0 = pos_slot; + if (++(cst->rep0) == 0) + return; + } + + len += LZMA_MATCH_MIN_LEN; + + copy_bytes(wr, cst->rep0, len); +} + + + +STATIC inline int INIT unlzma(unsigned char *buf, unsigned int in_len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + unsigned int *posp, + void(*error_fn)(const char *x) + ) +{ + struct lzma_header header; + int lc, pb, lp; + uint32_t pos_state_mask; + uint32_t literal_pos_mask; + uint16_t *p; + int num_probs; + struct rc rc; + int i, mi; + struct writer wr; + struct cstate cst; + unsigned char *inbuf; + int ret = -1; + + set_error_fn(error_fn); + + if (buf) + inbuf = buf; + else + inbuf = malloc(LZMA_IOBUF_SIZE); + if (!inbuf) { + error("Could not allocate input bufer"); + goto exit_0; + } + + cst.state = 0; + cst.rep0 = cst.rep1 = cst.rep2 = cst.rep3 = 1; + + wr.header = &header; + wr.flush = flush; + wr.global_pos = 0; + wr.previous_byte = 0; + wr.buffer_pos = 0; + + rc_init(&rc, fill, inbuf, in_len); + + for (i = 0; i < sizeof(header); i++) { + if (rc.ptr >= rc.buffer_end) + rc_read(&rc); + ((unsigned char *)&header)[i] = *rc.ptr++; + } + + if (header.pos >= (9 * 5 * 5)) + error("bad header"); + + mi = 0; + lc = header.pos; + while (lc >= 9) { + mi++; + lc -= 9; + } + pb = 0; + lp = mi; + while (lp >= 5) { + pb++; + lp -= 5; + } + pos_state_mask = (1 << pb) - 1; + literal_pos_mask = (1 << lp) - 1; + + ENDIAN_CONVERT(header.dict_size); + ENDIAN_CONVERT(header.dst_size); + + if (header.dict_size == 0) + header.dict_size = 1; + + if (output) + wr.buffer = output; + else { + wr.bufsize = MIN(header.dst_size, header.dict_size); + wr.buffer = large_malloc(wr.bufsize); + } + if (wr.buffer == NULL) + goto exit_1; + + num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); + p = (uint16_t *) large_malloc(num_probs * sizeof(*p)); + if (p == 0) + goto exit_2; + num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); + for (i = 0; i < num_probs; i++) + p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; + + rc_init_code(&rc); + + while (get_pos(&wr) < header.dst_size) { + int pos_state = get_pos(&wr) & pos_state_mask; + uint16_t *prob = p + LZMA_IS_MATCH + + (cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state; + if (rc_is_bit_0(&rc, prob)) + process_bit0(&wr, &rc, &cst, p, pos_state, prob, + lc, literal_pos_mask); + else { + process_bit1(&wr, &rc, &cst, p, pos_state, prob); + if (cst.rep0 == 0) + break; + } + } + + if (posp) + *posp = rc.ptr-rc.buffer; + if (wr.flush) + wr.flush(wr.buffer, wr.buffer_pos); + ret = 0; + large_free(p); +exit_2: + if (!output) + large_free(wr.buffer); +exit_1: + if (!buf) + free(inbuf); +exit_0: + return ret; +} --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2009-10-27/xen/include/xen/decompress.h 2009-11-05 12:27:44.000000000 +0100 @@ -0,0 +1,38 @@ +#ifndef __XEN_GENERIC_H +#define __XEN_GENERIC_H + +typedef int decompress_fn(unsigned char *inbuf, unsigned int len, + int (*fill)(void*, unsigned int), + int (*flush)(void*, unsigned int), + unsigned char *outbuf, unsigned int *posp, + void (*error)(const char *x)); + +/* inbuf - input buffer + * len - len of pre-read data in inbuf + * fill - function to fill inbuf when empty + * flush - function to write out outbuf + * outbuf - output buffer + * posp - if non-null, input position (number of bytes read) will be + * returned here + * error - error reporting function + * + * If len != 0, inbuf should contain all the necessary input data, and fill + * should be NULL + * If len = 0, inbuf can be NULL, in which case the decompressor will allocate + * the input buffer. If inbuf != NULL it must be at least XXX_IOBUF_SIZE bytes. + * fill will be called (repeatedly...) to read data, at most XXX_IOBUF_SIZE + * bytes should be read per call. Replace XXX with the appropriate decompressor + * name, i.e. LZMA_IOBUF_SIZE. + * + * If flush = NULL, outbuf must be large enough to buffer all the expected + * output. If flush != NULL, the output buffer will be allocated by the + * decompressor (outbuf = NULL), and the flush function will be called to + * flush the output buffer at the appropriate time (decompressor and stream + * dependent). + */ + +decompress_fn bunzip2, unlzma; + +int decompress(void *inbuf, unsigned int len, void *outbuf); + +#endif _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Campbell
2009-Nov-06 08:56 UTC
Re: [Xen-devel] [PATCH] x86/dom0: support bzip2 and lzma compressed bzImage payloads
On Thu, 2009-11-05 at 14:24 +0000, Jan Beulich wrote:> This matches functionality in the tools already supporting the same for > DomU-s. > > Code taken from Linux 2.6.32-rc and adjusted as little as possible to > be usable in Xen.Looks good to me. The new files FOO.c are based on lib/decompress_FOO.c from Linux? Might be worth a comment in at least the changelog to aid the next person to come along who wants to resync them. (and why rename them?)> The question is whether, particularly for non-Linux Dom0-s, plain ELF > images compressed by bzip2 or lzma should also be supported.It should be fairly low cost to do so, so why not? Is there a reason to treat gzip differently to bzip2/lzma? i.e. why not route it through decompress.c as well? Perhaps that would involve updating xen/common/inflate.c but that is already just a fork of what is now in lib/decompress_inflate.c in Linux so it should be pretty trivial.> Signed-off-by: Jan Beulich <jbeulich@novell.com>Acked-by: Ian Campbell <ian.campbell@citrix.com>> > --- 2009-10-27.orig/xen/arch/x86/bzimage.c 2009-10-07 13:31:36.000000000 +0200 > +++ 2009-10-27/xen/arch/x86/bzimage.c 2009-11-05 12:23:05.000000000 +0100 > @@ -4,6 +4,7 @@ > #include <xen/mm.h> > #include <xen/string.h> > #include <xen/types.h> > +#include <xen/decompress.h> > #include <asm/bzimage.h> > > #define HEAPORDER 3 > @@ -93,28 +94,38 @@ static __init void flush_window(void) > outcnt = 0; > } > > -static __init int gzip_length(char *image, unsigned long image_len) > +static __init unsigned long output_length(char *image, unsigned long image_len) > { > return *(uint32_t *)&image[image_len - 4]; > } > > -static __init int perform_gunzip(char *output, char **_image_start, unsigned long *image_len) > +static __init int gzip_check(char *image, unsigned long image_len) > { > - char *image = *_image_start; > - int rc; > - unsigned char magic0 = (unsigned char)image[0]; > - unsigned char magic1 = (unsigned char)image[1]; > + unsigned char magic0, magic1; > > - if ( magic0 != 0x1f || ( (magic1 != 0x8b) && (magic1 != 0x9e) ) ) > + if ( image_len < 2 ) > return 0; > > + magic0 = (unsigned char)image[0]; > + magic1 = (unsigned char)image[1]; > + > + return (magic0 == 0x1f) && ((magic1 == 0x8b) || (magic1 == 0x9e)); > +} > + > +static __init int perform_gunzip(char *output, char *image, unsigned long image_len) > +{ > + int rc; > + > + if ( !gzip_check(image, image_len) ) > + return 1; > + > window = (unsigned char *)output; > > free_mem_ptr = (unsigned long)alloc_xenheap_pages(HEAPORDER, 0); > free_mem_end_ptr = free_mem_ptr + (PAGE_SIZE << HEAPORDER); > > inbuf = (unsigned char *)image; > - insize = *image_len; > + insize = image_len; > inptr = 0; > > makecrc(); > @@ -125,8 +136,6 @@ static __init int perform_gunzip(char * > } > else > { > - *_image_start = (char *)window; > - *image_len = gzip_length(image, *image_len); > rc = 0; > } > > @@ -203,9 +212,12 @@ int __init bzimage_headroom(char *image_ > img = image_start + (hdr->setup_sects+1) * 512; > img += hdr->payload_offset; > > - headroom = gzip_length(img, hdr->payload_length); > - headroom += headroom >> 12; /* Add 8 bytes for every 32K input block */ > - headroom += (32768 + 18); /* Add 32K + 18 bytes of extra headroom */ > + headroom = output_length(img, hdr->payload_length); > + if (gzip_check(img, hdr->payload_length)) { > + headroom += headroom >> 12; /* Add 8 bytes for every 32K input block */ > + headroom += (32768 + 18); /* Add 32K + 18 bytes of extra headroom */ > + } else > + headroom += hdr->payload_length; > headroom = (headroom + 4095) & ~4095; > > return headroom; > @@ -215,6 +227,7 @@ int __init bzimage_parse(char *image_bas > { > struct setup_header *hdr = (struct setup_header *)(*image_start); > int err = bzimage_check(hdr, *image_len); > + unsigned long output_len; > > if (err < 1) > return err; > @@ -224,11 +237,18 @@ int __init bzimage_parse(char *image_bas > *image_start += (hdr->setup_sects+1) * 512; > *image_start += hdr->payload_offset; > *image_len = hdr->payload_length; > + output_len = output_length(*image_start, *image_len); > > - if ( (err = perform_gunzip(image_base, image_start, image_len)) < 0 ) > - return err; > + if ( (err = perform_gunzip(image_base, *image_start, *image_len)) > 0 ) > + err = decompress(*image_start, *image_len, image_base); > + > + if ( !err ) > + { > + *image_start = image_base; > + *image_len = output_len; > + } > > - return 0; > + return err > 0 ? 0 : err; > } > > /* > --- 2009-10-27.orig/xen/common/Makefile 2009-05-27 13:54:07.000000000 +0200 > +++ 2009-10-27/xen/common/Makefile 2009-11-05 12:26:53.000000000 +0100 > @@ -35,6 +35,8 @@ obj-y += radix-tree.o > obj-y += rbtree.o > obj-y += lzo.o > > +obj-$(CONFIG_X86) += decompress.o bunzip2.o unlzma.o > + > obj-$(perfc) += perfc.o > obj-$(crash_debug) += gdbstub.o > obj-$(xenoprof) += xenoprof.o > --- /dev/null 1970-01-01 00:00:00.000000000 +0000 > +++ 2009-10-27/xen/common/bunzip2.c 2009-11-05 12:44:51.000000000 +0100 > @@ -0,0 +1,726 @@ > +/* vi: set sw = 4 ts = 4: */ > +/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net). > + > + Based on bzip2 decompression code by Julian R Seward (jseward@acm.org), > + which also acknowledges contributions by Mike Burrows, David Wheeler, > + Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten, > + Robert Sedgewick, and Jon L. Bentley. > + > + This code is licensed under the LGPLv2: > + LGPL (http://www.gnu.org/copyleft/lgpl.html > +*/ > + > +/* > + Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org). > + > + More efficient reading of Huffman codes, a streamlined read_bunzip() > + function, and various other tweaks. In (limited) tests, approximately > + 20% faster than bzcat on x86 and about 10% faster on arm. > + > + Note that about 2/3 of the time is spent in read_unzip() reversing > + the Burrows-Wheeler transformation. Much of that time is delay > + resulting from cache misses. > + > + I would ask that anyone benefiting from this work, especially those > + using it in commercial products, consider making a donation to my local > + non-profit hospice organization in the name of the woman I loved, who > + passed away Feb. 12, 2003. > + > + In memory of Toni W. Hagan > + > + Hospice of Acadiana, Inc. > + 2600 Johnston St., Suite 200 > + Lafayette, LA 70503-3240 > + > + Phone (337) 232-1234 or 1-800-738-2226 > + Fax (337) 232-1297 > + > + http://www.hospiceacadiana.com/ > + > + Manuel > + */ > + > +/* > + Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu) > +*/ > + > +#include "decompress.h" > + > +#ifndef INT_MAX > +#define INT_MAX 0x7fffffff > +#endif > + > +/* Constants for Huffman coding */ > +#define MAX_GROUPS 6 > +#define GROUP_SIZE 50 /* 64 would have been more efficient */ > +#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */ > +#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */ > +#define SYMBOL_RUNA 0 > +#define SYMBOL_RUNB 1 > + > +/* Status return values */ > +#define RETVAL_OK 0 > +#define RETVAL_LAST_BLOCK (-1) > +#define RETVAL_NOT_BZIP_DATA (-2) > +#define RETVAL_UNEXPECTED_INPUT_EOF (-3) > +#define RETVAL_UNEXPECTED_OUTPUT_EOF (-4) > +#define RETVAL_DATA_ERROR (-5) > +#define RETVAL_OUT_OF_MEMORY (-6) > +#define RETVAL_OBSOLETE_INPUT (-7) > + > +/* Other housekeeping constants */ > +#define BZIP2_IOBUF_SIZE 4096 > + > +/* This is what we know about each Huffman coding group */ > +struct group_data { > + /* We have an extra slot at the end of limit[] for a sentinal value. */ > + int limit[MAX_HUFCODE_BITS+1]; > + int base[MAX_HUFCODE_BITS]; > + int permute[MAX_SYMBOLS]; > + int minLen, maxLen; > +}; > + > +/* Structure holding all the housekeeping data, including IO buffers and > + memory that persists between calls to bunzip */ > +struct bunzip_data { > + /* State for interrupting output loop */ > + int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; > + /* I/O tracking data (file handles, buffers, positions, etc.) */ > + int (*fill)(void*, unsigned int); > + int inbufCount, inbufPos /*, outbufPos*/; > + unsigned char *inbuf /*,*outbuf*/; > + unsigned int inbufBitCount, inbufBits; > + /* The CRC values stored in the block header and calculated from the > + data */ > + unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC; > + /* Intermediate buffer and its size (in bytes) */ > + unsigned int *dbuf, dbufSize; > + /* These things are a bit too big to go on the stack */ > + unsigned char selectors[32768]; /* nSelectors = 15 bits */ > + struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ > + int io_error; /* non-zero if we have IO error */ > +}; > + > + > +/* Return the next nnn bits of input. All reads from the compressed input > + are done through this function. All reads are big endian */ > +static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted) > +{ > + unsigned int bits = 0; > + > + /* If we need to get more data from the byte buffer, do so. > + (Loop getting one byte at a time to enforce endianness and avoid > + unaligned access.) */ > + while (bd->inbufBitCount < bits_wanted) { > + /* If we need to read more data from file into byte buffer, do > + so */ > + if (bd->inbufPos == bd->inbufCount) { > + if (bd->io_error) > + return 0; > + bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE); > + if (bd->inbufCount <= 0) { > + bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF; > + return 0; > + } > + bd->inbufPos = 0; > + } > + /* Avoid 32-bit overflow (dump bit buffer to top of output) */ > + if (bd->inbufBitCount >= 24) { > + bits = bd->inbufBits&((1 << bd->inbufBitCount)-1); > + bits_wanted -= bd->inbufBitCount; > + bits <<= bits_wanted; > + bd->inbufBitCount = 0; > + } > + /* Grab next 8 bits of input from buffer. */ > + bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++]; > + bd->inbufBitCount += 8; > + } > + /* Calculate result */ > + bd->inbufBitCount -= bits_wanted; > + bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1); > + > + return bits; > +} > + > +/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ > + > +static int INIT get_next_block(struct bunzip_data *bd) > +{ > + struct group_data *hufGroup = NULL; > + int *base = NULL; > + int *limit = NULL; > + int dbufCount, nextSym, dbufSize, groupCount, selector, > + i, j, k, t, runPos, symCount, symTotal, nSelectors, > + byteCount[256]; > + unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; > + unsigned int *dbuf, origPtr; > + > + dbuf = bd->dbuf; > + dbufSize = bd->dbufSize; > + selectors = bd->selectors; > + > + /* Read in header signature and CRC, then validate signature. > + (last block signature means CRC is for whole file, return now) */ > + i = get_bits(bd, 24); > + j = get_bits(bd, 24); > + bd->headerCRC = get_bits(bd, 32); > + if ((i == 0x177245) && (j == 0x385090)) > + return RETVAL_LAST_BLOCK; > + if ((i != 0x314159) || (j != 0x265359)) > + return RETVAL_NOT_BZIP_DATA; > + /* We can add support for blockRandomised if anybody complains. > + There was some code for this in busybox 1.0.0-pre3, but nobody ever > + noticed that it didn''t actually work. */ > + if (get_bits(bd, 1)) > + return RETVAL_OBSOLETE_INPUT; > + origPtr = get_bits(bd, 24); > + if (origPtr > dbufSize) > + return RETVAL_DATA_ERROR; > + /* mapping table: if some byte values are never used (encoding things > + like ascii text), the compression code removes the gaps to have fewer > + symbols to deal with, and writes a sparse bitfield indicating which > + values were present. We make a translation table to convert the > + symbols back to the corresponding bytes. */ > + t = get_bits(bd, 16); > + symTotal = 0; > + for (i = 0; i < 16; i++) { > + if (t&(1 << (15-i))) { > + k = get_bits(bd, 16); > + for (j = 0; j < 16; j++) > + if (k&(1 << (15-j))) > + symToByte[symTotal++] = (16*i)+j; > + } > + } > + /* How many different Huffman coding groups does this block use? */ > + groupCount = get_bits(bd, 3); > + if (groupCount < 2 || groupCount > MAX_GROUPS) > + return RETVAL_DATA_ERROR; > + /* nSelectors: Every GROUP_SIZE many symbols we select a new > + Huffman coding group. Read in the group selector list, > + which is stored as MTF encoded bit runs. (MTF = Move To > + Front, as each value is used it''s moved to the start of the > + list.) */ > + nSelectors = get_bits(bd, 15); > + if (!nSelectors) > + return RETVAL_DATA_ERROR; > + for (i = 0; i < groupCount; i++) > + mtfSymbol[i] = i; > + for (i = 0; i < nSelectors; i++) { > + /* Get next value */ > + for (j = 0; get_bits(bd, 1); j++) > + if (j >= groupCount) > + return RETVAL_DATA_ERROR; > + /* Decode MTF to get the next selector */ > + uc = mtfSymbol[j]; > + for (; j; j--) > + mtfSymbol[j] = mtfSymbol[j-1]; > + mtfSymbol[0] = selectors[i] = uc; > + } > + /* Read the Huffman coding tables for each group, which code > + for symTotal literal symbols, plus two run symbols (RUNA, > + RUNB) */ > + symCount = symTotal+2; > + for (j = 0; j < groupCount; j++) { > + unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1]; > + int minLen, maxLen, pp; > + /* Read Huffman code lengths for each symbol. They''re > + stored in a way similar to mtf; record a starting > + value for the first symbol, and an offset from the > + previous value for everys symbol after that. > + (Subtracting 1 before the loop and then adding it > + back at the end is an optimization that makes the > + test inside the loop simpler: symbol length 0 > + becomes negative, so an unsigned inequality catches > + it.) */ > + t = get_bits(bd, 5)-1; > + for (i = 0; i < symCount; i++) { > + for (;;) { > + if (((unsigned)t) > (MAX_HUFCODE_BITS-1)) > + return RETVAL_DATA_ERROR; > + > + /* If first bit is 0, stop. Else > + second bit indicates whether to > + increment or decrement the value. > + Optimization: grab 2 bits and unget > + the second if the first was 0. */ > + > + k = get_bits(bd, 2); > + if (k < 2) { > + bd->inbufBitCount++; > + break; > + } > + /* Add one if second bit 1, else > + * subtract 1. Avoids if/else */ > + t += (((k+1)&2)-1); > + } > + /* Correct for the initial -1, to get the > + * final symbol length */ > + length[i] = t+1; > + } > + /* Find largest and smallest lengths in this group */ > + minLen = maxLen = length[0]; > + > + for (i = 1; i < symCount; i++) { > + if (length[i] > maxLen) > + maxLen = length[i]; > + else if (length[i] < minLen) > + minLen = length[i]; > + } > + > + /* Calculate permute[], base[], and limit[] tables from > + * length[]. > + * > + * permute[] is the lookup table for converting > + * Huffman coded symbols into decoded symbols. base[] > + * is the amount to subtract from the value of a > + * Huffman symbol of a given length when using > + * permute[]. > + * > + * limit[] indicates the largest numerical value a > + * symbol with a given number of bits can have. This > + * is how the Huffman codes can vary in length: each > + * code with a value > limit[length] needs another > + * bit. > + */ > + hufGroup = bd->groups+j; > + hufGroup->minLen = minLen; > + hufGroup->maxLen = maxLen; > + /* Note that minLen can''t be smaller than 1, so we > + adjust the base and limit array pointers so we''re > + not always wasting the first entry. We do this > + again when using them (during symbol decoding).*/ > + base = hufGroup->base-1; > + limit = hufGroup->limit-1; > + /* Calculate permute[]. Concurently, initialize > + * temp[] and limit[]. */ > + pp = 0; > + for (i = minLen; i <= maxLen; i++) { > + temp[i] = limit[i] = 0; > + for (t = 0; t < symCount; t++) > + if (length[t] == i) > + hufGroup->permute[pp++] = t; > + } > + /* Count symbols coded for at each bit length */ > + for (i = 0; i < symCount; i++) > + temp[length[i]]++; > + /* Calculate limit[] (the largest symbol-coding value > + *at each bit length, which is (previous limit << > + *1)+symbols at this level), and base[] (number of > + *symbols to ignore at each bit length, which is limit > + *minus the cumulative count of symbols coded for > + *already). */ > + pp = t = 0; > + for (i = minLen; i < maxLen; i++) { > + pp += temp[i]; > + /* We read the largest possible symbol size > + and then unget bits after determining how > + many we need, and those extra bits could be > + set to anything. (They''re noise from > + future symbols.) At each level we''re > + really only interested in the first few > + bits, so here we set all the trailing > + to-be-ignored bits to 1 so they don''t > + affect the value > limit[length] > + comparison. */ > + limit[i] = (pp << (maxLen - i)) - 1; > + pp <<= 1; > + base[i+1] = pp-(t += temp[i]); > + } > + limit[maxLen+1] = INT_MAX; /* Sentinal value for > + * reading next sym. */ > + limit[maxLen] = pp+temp[maxLen]-1; > + base[minLen] = 0; > + } > + /* We''ve finished reading and digesting the block header. Now > + read this block''s Huffman coded symbols from the file and > + undo the Huffman coding and run length encoding, saving the > + result into dbuf[dbufCount++] = uc */ > + > + /* Initialize symbol occurrence counters and symbol Move To > + * Front table */ > + for (i = 0; i < 256; i++) { > + byteCount[i] = 0; > + mtfSymbol[i] = (unsigned char)i; > + } > + /* Loop through compressed symbols. */ > + runPos = dbufCount = symCount = selector = 0; > + for (;;) { > + /* Determine which Huffman coding group to use. */ > + if (!(symCount--)) { > + symCount = GROUP_SIZE-1; > + if (selector >= nSelectors) > + return RETVAL_DATA_ERROR; > + hufGroup = bd->groups+selectors[selector++]; > + base = hufGroup->base-1; > + limit = hufGroup->limit-1; > + } > + /* Read next Huffman-coded symbol. */ > + /* Note: It is far cheaper to read maxLen bits and > + back up than it is to read minLen bits and then an > + additional bit at a time, testing as we go. > + Because there is a trailing last block (with file > + CRC), there is no danger of the overread causing an > + unexpected EOF for a valid compressed file. As a > + further optimization, we do the read inline > + (falling back to a call to get_bits if the buffer > + runs dry). The following (up to got_huff_bits:) is > + equivalent to j = get_bits(bd, hufGroup->maxLen); > + */ > + while (bd->inbufBitCount < hufGroup->maxLen) { > + if (bd->inbufPos == bd->inbufCount) { > + j = get_bits(bd, hufGroup->maxLen); > + goto got_huff_bits; > + } > + bd->inbufBits > + (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++]; > + bd->inbufBitCount += 8; > + }; > + bd->inbufBitCount -= hufGroup->maxLen; > + j = (bd->inbufBits >> bd->inbufBitCount)& > + ((1 << hufGroup->maxLen)-1); > +got_huff_bits: > + /* Figure how how many bits are in next symbol and > + * unget extras */ > + i = hufGroup->minLen; > + while (j > limit[i]) > + ++i; > + bd->inbufBitCount += (hufGroup->maxLen - i); > + /* Huffman decode value to get nextSym (with bounds checking) */ > + if ((i > hufGroup->maxLen) > + || (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i])) > + >= MAX_SYMBOLS)) > + return RETVAL_DATA_ERROR; > + nextSym = hufGroup->permute[j]; > + /* We have now decoded the symbol, which indicates > + either a new literal byte, or a repeated run of the > + most recent literal byte. First, check if nextSym > + indicates a repeated run, and if so loop collecting > + how many times to repeat the last literal. */ > + if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */ > + /* If this is the start of a new run, zero out > + * counter */ > + if (!runPos) { > + runPos = 1; > + t = 0; > + } > + /* Neat trick that saves 1 symbol: instead of > + or-ing 0 or 1 at each bit position, add 1 > + or 2 instead. For example, 1011 is 1 << 0 > + + 1 << 1 + 2 << 2. 1010 is 2 << 0 + 2 << 1 > + + 1 << 2. You can make any bit pattern > + that way using 1 less symbol than the basic > + or 0/1 method (except all bits 0, which > + would use no symbols, but a run of length 0 > + doesn''t mean anything in this context). > + Thus space is saved. */ > + t += (runPos << nextSym); > + /* +runPos if RUNA; +2*runPos if RUNB */ > + > + runPos <<= 1; > + continue; > + } > + /* When we hit the first non-run symbol after a run, > + we now know how many times to repeat the last > + literal, so append that many copies to our buffer > + of decoded symbols (dbuf) now. (The last literal > + used is the one at the head of the mtfSymbol > + array.) */ > + if (runPos) { > + runPos = 0; > + if (dbufCount+t >= dbufSize) > + return RETVAL_DATA_ERROR; > + > + uc = symToByte[mtfSymbol[0]]; > + byteCount[uc] += t; > + while (t--) > + dbuf[dbufCount++] = uc; > + } > + /* Is this the terminating symbol? */ > + if (nextSym > symTotal) > + break; > + /* At this point, nextSym indicates a new literal > + character. Subtract one to get the position in the > + MTF array at which this literal is currently to be > + found. (Note that the result can''t be -1 or 0, > + because 0 and 1 are RUNA and RUNB. But another > + instance of the first symbol in the mtf array, > + position 0, would have been handled as part of a > + run above. Therefore 1 unused mtf position minus 2 > + non-literal nextSym values equals -1.) */ > + if (dbufCount >= dbufSize) > + return RETVAL_DATA_ERROR; > + i = nextSym - 1; > + uc = mtfSymbol[i]; > + /* Adjust the MTF array. Since we typically expect to > + *move only a small number of symbols, and are bound > + *by 256 in any case, using memmove here would > + *typically be bigger and slower due to function call > + *overhead and other assorted setup costs. */ > + do { > + mtfSymbol[i] = mtfSymbol[i-1]; > + } while (--i); > + mtfSymbol[0] = uc; > + uc = symToByte[uc]; > + /* We have our literal byte. Save it into dbuf. */ > + byteCount[uc]++; > + dbuf[dbufCount++] = (unsigned int)uc; > + } > + /* At this point, we''ve read all the Huffman-coded symbols > + (and repeated runs) for this block from the input stream, > + and decoded them into the intermediate buffer. There are > + dbufCount many decoded bytes in dbuf[]. Now undo the > + Burrows-Wheeler transform on dbuf. See > + http://dogma.net/markn/articles/bwt/bwt.htm > + */ > + /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ > + j = 0; > + for (i = 0; i < 256; i++) { > + k = j+byteCount[i]; > + byteCount[i] = j; > + j = k; > + } > + /* Figure out what order dbuf would be in if we sorted it. */ > + for (i = 0; i < dbufCount; i++) { > + uc = (unsigned char)(dbuf[i] & 0xff); > + dbuf[byteCount[uc]] |= (i << 8); > + byteCount[uc]++; > + } > + /* Decode first byte by hand to initialize "previous" byte. > + Note that it doesn''t get output, and if the first three > + characters are identical it doesn''t qualify as a run (hence > + writeRunCountdown = 5). */ > + if (dbufCount) { > + if (origPtr >= dbufCount) > + return RETVAL_DATA_ERROR; > + bd->writePos = dbuf[origPtr]; > + bd->writeCurrent = (unsigned char)(bd->writePos&0xff); > + bd->writePos >>= 8; > + bd->writeRunCountdown = 5; > + } > + bd->writeCount = dbufCount; > + > + return RETVAL_OK; > +} > + > +/* Undo burrows-wheeler transform on intermediate buffer to produce output. > + If start_bunzip was initialized with out_fd =-1, then up to len bytes of > + data are written to outbuf. Return value is number of bytes written or > + error (all errors are negative numbers). If out_fd!=-1, outbuf and len > + are ignored, data is written to out_fd and return is RETVAL_OK or error. > +*/ > + > +static int INIT read_bunzip(struct bunzip_data *bd, unsigned char *outbuf, int len) > +{ > + const unsigned int *dbuf; > + int pos, xcurrent, previous, gotcount; > + > + /* If last read was short due to end of file, return last block now */ > + if (bd->writeCount < 0) > + return bd->writeCount; > + > + gotcount = 0; > + dbuf = bd->dbuf; > + pos = bd->writePos; > + xcurrent = bd->writeCurrent; > + > + /* We will always have pending decoded data to write into the output > + buffer unless this is the very first call (in which case we haven''t > + Huffman-decoded a block into the intermediate buffer yet). */ > + > + if (bd->writeCopies) { > + /* Inside the loop, writeCopies means extra copies (beyond 1) */ > + --bd->writeCopies; > + /* Loop outputting bytes */ > + for (;;) { > + /* If the output buffer is full, snapshot > + * state and return */ > + if (gotcount >= len) { > + bd->writePos = pos; > + bd->writeCurrent = xcurrent; > + bd->writeCopies++; > + return len; > + } > + /* Write next byte into output buffer, updating CRC */ > + outbuf[gotcount++] = xcurrent; > + bd->writeCRC = (((bd->writeCRC) << 8) > + ^bd->crc32Table[((bd->writeCRC) >> 24) > + ^xcurrent]); > + /* Loop now if we''re outputting multiple > + * copies of this byte */ > + if (bd->writeCopies) { > + --bd->writeCopies; > + continue; > + } > +decode_next_byte: > + if (!bd->writeCount--) > + break; > + /* Follow sequence vector to undo > + * Burrows-Wheeler transform */ > + previous = xcurrent; > + pos = dbuf[pos]; > + xcurrent = pos&0xff; > + pos >>= 8; > + /* After 3 consecutive copies of the same > + byte, the 4th is a repeat count. We count > + down from 4 instead *of counting up because > + testing for non-zero is faster */ > + if (--bd->writeRunCountdown) { > + if (xcurrent != previous) > + bd->writeRunCountdown = 4; > + } else { > + /* We have a repeated run, this byte > + * indicates the count */ > + bd->writeCopies = xcurrent; > + xcurrent = previous; > + bd->writeRunCountdown = 5; > + /* Sometimes there are just 3 bytes > + * (run length 0) */ > + if (!bd->writeCopies) > + goto decode_next_byte; > + /* Subtract the 1 copy we''d output > + * anyway to get extras */ > + --bd->writeCopies; > + } > + } > + /* Decompression of this block completed successfully */ > + bd->writeCRC = ~bd->writeCRC; > + bd->totalCRC = ((bd->totalCRC << 1) | > + (bd->totalCRC >> 31)) ^ bd->writeCRC; > + /* If this block had a CRC error, force file level CRC error. */ > + if (bd->writeCRC != bd->headerCRC) { > + bd->totalCRC = bd->headerCRC+1; > + return RETVAL_LAST_BLOCK; > + } > + } > + > + /* Refill the intermediate buffer by Huffman-decoding next > + * block of input */ > + /* (previous is just a convenient unused temp variable here) */ > + previous = get_next_block(bd); > + if (previous) { > + bd->writeCount = previous; > + return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount; > + } > + bd->writeCRC = 0xffffffffUL; > + pos = bd->writePos; > + xcurrent = bd->writeCurrent; > + goto decode_next_byte; > +} > + > +static int INIT nofill(void *buf, unsigned int len) > +{ > + return -1; > +} > + > +/* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain > + a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are > + ignored, and data is read from file handle into temporary buffer. */ > +static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len, > + int (*fill)(void*, unsigned int)) > +{ > + struct bunzip_data *bd; > + unsigned int i, j, c; > + const unsigned int BZh0 > + (((unsigned int)''B'') << 24)+(((unsigned int)''Z'') << 16) > + +(((unsigned int)''h'') << 8)+(unsigned int)''0''; > + > + /* Figure out how much data to allocate */ > + i = sizeof(struct bunzip_data); > + > + /* Allocate bunzip_data. Most fields initialize to zero. */ > + bd = *bdp = malloc(i); > + memset(bd, 0, sizeof(struct bunzip_data)); > + /* Setup input buffer */ > + bd->inbuf = inbuf; > + bd->inbufCount = len; > + if (fill != NULL) > + bd->fill = fill; > + else > + bd->fill = nofill; > + > + /* Init the CRC32 table (big endian) */ > + for (i = 0; i < 256; i++) { > + c = i << 24; > + for (j = 8; j; j--) > + c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1); > + bd->crc32Table[i] = c; > + } > + > + /* Ensure that file starts with "BZh[''1''-''9'']." */ > + i = get_bits(bd, 32); > + if (((unsigned int)(i-BZh0-1)) >= 9) > + return RETVAL_NOT_BZIP_DATA; > + > + /* Fourth byte (ascii ''1''-''9''), indicates block size in units of 100k of > + uncompressed data. Allocate intermediate buffer for block. */ > + bd->dbufSize = 100000*(i-BZh0); > + > + bd->dbuf = large_malloc(bd->dbufSize * sizeof(int)); > + return RETVAL_OK; > +} > + > +/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data, > + not end of file.) */ > +STATIC int INIT bunzip2(unsigned char *buf, unsigned int len, > + int(*fill)(void*, unsigned int), > + int(*flush)(void*, unsigned int), > + unsigned char *outbuf, > + unsigned int *pos, > + void(*error_fn)(const char *x)) > +{ > + struct bunzip_data *bd; > + int i = -1; > + unsigned char *inbuf; > + > + set_error_fn(error_fn); > + if (flush) > + outbuf = malloc(BZIP2_IOBUF_SIZE); > + > + if (!outbuf) { > + error("Could not allocate output bufer"); > + return -1; > + } > + if (buf) > + inbuf = buf; > + else > + inbuf = malloc(BZIP2_IOBUF_SIZE); > + if (!inbuf) { > + error("Could not allocate input bufer"); > + goto exit_0; > + } > + i = start_bunzip(&bd, inbuf, len, fill); > + if (!i) { > + for (;;) { > + i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE); > + if (i <= 0) > + break; > + if (!flush) > + outbuf += i; > + else > + if (i != flush(outbuf, i)) { > + i = RETVAL_UNEXPECTED_OUTPUT_EOF; > + break; > + } > + } > + } > + /* Check CRC and release memory */ > + if (i == RETVAL_LAST_BLOCK) { > + if (bd->headerCRC != bd->totalCRC) > + error("Data integrity error when decompressing."); > + else > + i = RETVAL_OK; > + } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) { > + error("Compressed file ends unexpectedly"); > + } > + if (bd->dbuf) > + large_free(bd->dbuf); > + if (pos) > + *pos = bd->inbufPos; > + free(bd); > + if (!buf) > + free(inbuf); > +exit_0: > + if (flush) > + free(outbuf); > + return i; > +} > --- /dev/null 1970-01-01 00:00:00.000000000 +0000 > +++ 2009-10-27/xen/common/decompress.c 2009-11-05 12:27:10.000000000 +0100 > @@ -0,0 +1,27 @@ > +#include <xen/config.h> > +#include <xen/init.h> > +#include <xen/lib.h> > +#include <xen/string.h> > +#include <xen/decompress.h> > + > +static void __init error(const char *msg) > +{ > + printk("%s\n", msg); > +} > + > +int __init decompress(void *inbuf, unsigned int len, void *outbuf) > +{ > +#if 0 /* Not needed here yet. */ > + if ( len >= 2 && > + (!memcmp(inbuf, "\037\213", 2) || !memcmp(inbuf, "\037\236", 2)) ) > + return gunzip(inbuf, len, NULL, NULL, outbuf, NULL, error); > +#endif > + > + if ( len >= 3 && !memcmp(inbuf, "\x42\x5a\x68", 3) ) > + return bunzip2(inbuf, len, NULL, NULL, outbuf, NULL, error); > + > + if ( len >= 2 && !memcmp(inbuf, "\135\000", 2) ) > + return unlzma(inbuf, len, NULL, NULL, outbuf, NULL, error); > + > + return 1; > +} > --- /dev/null 1970-01-01 00:00:00.000000000 +0000 > +++ 2009-10-27/xen/common/decompress.h 2009-11-05 15:21:52.000000000 +0100 > @@ -0,0 +1,19 @@ > +#include <xen/config.h> > +#include <xen/cache.h> > +#include <xen/decompress.h> > +#include <xen/init.h> > +#include <xen/string.h> > +#include <xen/types.h> > +#include <xen/xmalloc.h> > + > +#define STATIC > +#define INIT __init > + > +static void(*__initdata error)(const char *); > +#define set_error_fn(x) error = x; > + > +#define malloc xmalloc_bytes > +#define free xfree > + > +#define large_malloc xmalloc_bytes > +#define large_free xfree > --- /dev/null 1970-01-01 00:00:00.000000000 +0000 > +++ 2009-10-27/xen/common/unlzma.c 2009-11-05 12:45:37.000000000 +0100 > @@ -0,0 +1,647 @@ > +/* Lzma decompressor for Linux kernel. Shamelessly snarfed > + * from busybox 1.1.1 > + * > + * Linux kernel adaptation > + * Copyright (C) 2006 Alain < alain@knaff.lu > > + * > + * Based on small lzma deflate implementation/Small range coder > + * implementation for lzma. > + * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > > + * > + * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) > + * Copyright (C) 1999-2005 Igor Pavlov > + * > + * Copyrights of the parts, see headers below. > + * > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#include "decompress.h" > + > +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) > + > +static long long INIT read_int(unsigned char *ptr, int size) > +{ > + int i; > + long long ret = 0; > + > + for (i = 0; i < size; i++) > + ret = (ret << 8) | ptr[size-i-1]; > + return ret; > +} > + > +#define ENDIAN_CONVERT(x) \ > + x = (typeof(x))read_int((unsigned char *)&x, sizeof(x)) > + > + > +/* Small range coder implementation for lzma. > + * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > > + * > + * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) > + * Copyright (c) 1999-2005 Igor Pavlov > + */ > + > +#include <xen/compiler.h> > + > +#define LZMA_IOBUF_SIZE 0x10000 > + > +struct rc { > + int (*fill)(void*, unsigned int); > + uint8_t *ptr; > + uint8_t *buffer; > + uint8_t *buffer_end; > + int buffer_size; > + uint32_t code; > + uint32_t range; > + uint32_t bound; > +}; > + > + > +#define RC_TOP_BITS 24 > +#define RC_MOVE_BITS 5 > +#define RC_MODEL_TOTAL_BITS 11 > + > + > +static int nofill(void *buffer, unsigned int len) > +{ > + return -1; > +} > + > +/* Called twice: once at startup and once in rc_normalize() */ > +static void INIT rc_read(struct rc *rc) > +{ > + rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE); > + if (rc->buffer_size <= 0) > + error("unexpected EOF"); > + rc->ptr = rc->buffer; > + rc->buffer_end = rc->buffer + rc->buffer_size; > +} > + > +/* Called once */ > +static inline void INIT rc_init(struct rc *rc, > + int (*fill)(void*, unsigned int), > + unsigned char *buffer, int buffer_size) > +{ > + if (fill) > + rc->fill = fill; > + else > + rc->fill = nofill; > + rc->buffer = (uint8_t *)buffer; > + rc->buffer_size = buffer_size; > + rc->buffer_end = rc->buffer + rc->buffer_size; > + rc->ptr = rc->buffer; > + > + rc->code = 0; > + rc->range = 0xFFFFFFFF; > +} > + > +static inline void INIT rc_init_code(struct rc *rc) > +{ > + int i; > + > + for (i = 0; i < 5; i++) { > + if (rc->ptr >= rc->buffer_end) > + rc_read(rc); > + rc->code = (rc->code << 8) | *rc->ptr++; > + } > +} > + > + > +/* Called once. TODO: bb_maybe_free() */ > +static inline void INIT rc_free(struct rc *rc) > +{ > + free(rc->buffer); > +} > + > +/* Called twice, but one callsite is in inline''d rc_is_bit_0_helper() */ > +static void INIT rc_do_normalize(struct rc *rc) > +{ > + if (rc->ptr >= rc->buffer_end) > + rc_read(rc); > + rc->range <<= 8; > + rc->code = (rc->code << 8) | *rc->ptr++; > +} > +static inline void INIT rc_normalize(struct rc *rc) > +{ > + if (rc->range < (1 << RC_TOP_BITS)) > + rc_do_normalize(rc); > +} > + > +/* Called 9 times */ > +/* Why rc_is_bit_0_helper exists? > + *Because we want to always expose (rc->code < rc->bound) to optimizer > + */ > +static inline uint32_t INIT rc_is_bit_0_helper(struct rc *rc, uint16_t *p) > +{ > + rc_normalize(rc); > + rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); > + return rc->bound; > +} > +static inline int INIT rc_is_bit_0(struct rc *rc, uint16_t *p) > +{ > + uint32_t t = rc_is_bit_0_helper(rc, p); > + return rc->code < t; > +} > + > +/* Called ~10 times, but very small, thus inlined */ > +static inline void INIT rc_update_bit_0(struct rc *rc, uint16_t *p) > +{ > + rc->range = rc->bound; > + *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; > +} > +static inline void rc_update_bit_1(struct rc *rc, uint16_t *p) > +{ > + rc->range -= rc->bound; > + rc->code -= rc->bound; > + *p -= *p >> RC_MOVE_BITS; > +} > + > +/* Called 4 times in unlzma loop */ > +static int INIT rc_get_bit(struct rc *rc, uint16_t *p, int *symbol) > +{ > + if (rc_is_bit_0(rc, p)) { > + rc_update_bit_0(rc, p); > + *symbol *= 2; > + return 0; > + } else { > + rc_update_bit_1(rc, p); > + *symbol = *symbol * 2 + 1; > + return 1; > + } > +} > + > +/* Called once */ > +static inline int INIT rc_direct_bit(struct rc *rc) > +{ > + rc_normalize(rc); > + rc->range >>= 1; > + if (rc->code >= rc->range) { > + rc->code -= rc->range; > + return 1; > + } > + return 0; > +} > + > +/* Called twice */ > +static inline void INIT > +rc_bit_tree_decode(struct rc *rc, uint16_t *p, int num_levels, int *symbol) > +{ > + int i = num_levels; > + > + *symbol = 1; > + while (i--) > + rc_get_bit(rc, p + *symbol, symbol); > + *symbol -= 1 << num_levels; > +} > + > + > +/* > + * Small lzma deflate implementation. > + * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > > + * > + * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) > + * Copyright (C) 1999-2005 Igor Pavlov > + */ > + > + > +struct lzma_header { > + uint8_t pos; > + uint32_t dict_size; > + uint64_t dst_size; > +} __attribute__ ((packed)) ; > + > + > +#define LZMA_BASE_SIZE 1846 > +#define LZMA_LIT_SIZE 768 > + > +#define LZMA_NUM_POS_BITS_MAX 4 > + > +#define LZMA_LEN_NUM_LOW_BITS 3 > +#define LZMA_LEN_NUM_MID_BITS 3 > +#define LZMA_LEN_NUM_HIGH_BITS 8 > + > +#define LZMA_LEN_CHOICE 0 > +#define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1) > +#define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1) > +#define LZMA_LEN_MID (LZMA_LEN_LOW \ > + + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))) > +#define LZMA_LEN_HIGH (LZMA_LEN_MID \ > + +(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))) > +#define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)) > + > +#define LZMA_NUM_STATES 12 > +#define LZMA_NUM_LIT_STATES 7 > + > +#define LZMA_START_POS_MODEL_INDEX 4 > +#define LZMA_END_POS_MODEL_INDEX 14 > +#define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1)) > + > +#define LZMA_NUM_POS_SLOT_BITS 6 > +#define LZMA_NUM_LEN_TO_POS_STATES 4 > + > +#define LZMA_NUM_ALIGN_BITS 4 > + > +#define LZMA_MATCH_MIN_LEN 2 > + > +#define LZMA_IS_MATCH 0 > +#define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) > +#define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES) > +#define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES) > +#define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES) > +#define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES) > +#define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \ > + + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) > +#define LZMA_SPEC_POS (LZMA_POS_SLOT \ > + +(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)) > +#define LZMA_ALIGN (LZMA_SPEC_POS \ > + + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX) > +#define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)) > +#define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS) > +#define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS) > + > + > +struct writer { > + uint8_t *buffer; > + uint8_t previous_byte; > + size_t buffer_pos; > + int bufsize; > + size_t global_pos; > + int(*flush)(void*, unsigned int); > + struct lzma_header *header; > +}; > + > +struct cstate { > + int state; > + uint32_t rep0, rep1, rep2, rep3; > +}; > + > +static inline size_t INIT get_pos(struct writer *wr) > +{ > + return > + wr->global_pos + wr->buffer_pos; > +} > + > +static inline uint8_t INIT peek_old_byte(struct writer *wr, > + uint32_t offs) > +{ > + if (!wr->flush) { > + int32_t pos; > + while (offs > wr->header->dict_size) > + offs -= wr->header->dict_size; > + pos = wr->buffer_pos - offs; > + return wr->buffer[pos]; > + } else { > + uint32_t pos = wr->buffer_pos - offs; > + while (pos >= wr->header->dict_size) > + pos += wr->header->dict_size; > + return wr->buffer[pos]; > + } > + > +} > + > +static inline void INIT write_byte(struct writer *wr, uint8_t byte) > +{ > + wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte; > + if (wr->flush && wr->buffer_pos == wr->header->dict_size) { > + wr->buffer_pos = 0; > + wr->global_pos += wr->header->dict_size; > + wr->flush((char *)wr->buffer, wr->header->dict_size); > + } > +} > + > + > +static inline void INIT copy_byte(struct writer *wr, uint32_t offs) > +{ > + write_byte(wr, peek_old_byte(wr, offs)); > +} > + > +static inline void INIT copy_bytes(struct writer *wr, > + uint32_t rep0, int len) > +{ > + do { > + copy_byte(wr, rep0); > + len--; > + } while (len != 0 && wr->buffer_pos < wr->header->dst_size); > +} > + > +static inline void INIT process_bit0(struct writer *wr, struct rc *rc, > + struct cstate *cst, uint16_t *p, > + int pos_state, uint16_t *prob, > + int lc, uint32_t literal_pos_mask) { > + int mi = 1; > + rc_update_bit_0(rc, prob); > + prob = (p + LZMA_LITERAL + > + (LZMA_LIT_SIZE > + * (((get_pos(wr) & literal_pos_mask) << lc) > + + (wr->previous_byte >> (8 - lc)))) > + ); > + > + if (cst->state >= LZMA_NUM_LIT_STATES) { > + int match_byte = peek_old_byte(wr, cst->rep0); > + do { > + int bit; > + uint16_t *prob_lit; > + > + match_byte <<= 1; > + bit = match_byte & 0x100; > + prob_lit = prob + 0x100 + bit + mi; > + if (rc_get_bit(rc, prob_lit, &mi)) { > + if (!bit) > + break; > + } else { > + if (bit) > + break; > + } > + } while (mi < 0x100); > + } > + while (mi < 0x100) { > + uint16_t *prob_lit = prob + mi; > + rc_get_bit(rc, prob_lit, &mi); > + } > + write_byte(wr, mi); > + if (cst->state < 4) > + cst->state = 0; > + else if (cst->state < 10) > + cst->state -= 3; > + else > + cst->state -= 6; > +} > + > +static inline void INIT process_bit1(struct writer *wr, struct rc *rc, > + struct cstate *cst, uint16_t *p, > + int pos_state, uint16_t *prob) { > + int offset; > + uint16_t *prob_len; > + int num_bits; > + int len; > + > + rc_update_bit_1(rc, prob); > + prob = p + LZMA_IS_REP + cst->state; > + if (rc_is_bit_0(rc, prob)) { > + rc_update_bit_0(rc, prob); > + cst->rep3 = cst->rep2; > + cst->rep2 = cst->rep1; > + cst->rep1 = cst->rep0; > + cst->state = cst->state < LZMA_NUM_LIT_STATES ? 0 : 3; > + prob = p + LZMA_LEN_CODER; > + } else { > + rc_update_bit_1(rc, prob); > + prob = p + LZMA_IS_REP_G0 + cst->state; > + if (rc_is_bit_0(rc, prob)) { > + rc_update_bit_0(rc, prob); > + prob = (p + LZMA_IS_REP_0_LONG > + + (cst->state << > + LZMA_NUM_POS_BITS_MAX) + > + pos_state); > + if (rc_is_bit_0(rc, prob)) { > + rc_update_bit_0(rc, prob); > + > + cst->state = cst->state < LZMA_NUM_LIT_STATES ? > + 9 : 11; > + copy_byte(wr, cst->rep0); > + return; > + } else { > + rc_update_bit_1(rc, prob); > + } > + } else { > + uint32_t distance; > + > + rc_update_bit_1(rc, prob); > + prob = p + LZMA_IS_REP_G1 + cst->state; > + if (rc_is_bit_0(rc, prob)) { > + rc_update_bit_0(rc, prob); > + distance = cst->rep1; > + } else { > + rc_update_bit_1(rc, prob); > + prob = p + LZMA_IS_REP_G2 + cst->state; > + if (rc_is_bit_0(rc, prob)) { > + rc_update_bit_0(rc, prob); > + distance = cst->rep2; > + } else { > + rc_update_bit_1(rc, prob); > + distance = cst->rep3; > + cst->rep3 = cst->rep2; > + } > + cst->rep2 = cst->rep1; > + } > + cst->rep1 = cst->rep0; > + cst->rep0 = distance; > + } > + cst->state = cst->state < LZMA_NUM_LIT_STATES ? 8 : 11; > + prob = p + LZMA_REP_LEN_CODER; > + } > + > + prob_len = prob + LZMA_LEN_CHOICE; > + if (rc_is_bit_0(rc, prob_len)) { > + rc_update_bit_0(rc, prob_len); > + prob_len = (prob + LZMA_LEN_LOW > + + (pos_state << > + LZMA_LEN_NUM_LOW_BITS)); > + offset = 0; > + num_bits = LZMA_LEN_NUM_LOW_BITS; > + } else { > + rc_update_bit_1(rc, prob_len); > + prob_len = prob + LZMA_LEN_CHOICE_2; > + if (rc_is_bit_0(rc, prob_len)) { > + rc_update_bit_0(rc, prob_len); > + prob_len = (prob + LZMA_LEN_MID > + + (pos_state << > + LZMA_LEN_NUM_MID_BITS)); > + offset = 1 << LZMA_LEN_NUM_LOW_BITS; > + num_bits = LZMA_LEN_NUM_MID_BITS; > + } else { > + rc_update_bit_1(rc, prob_len); > + prob_len = prob + LZMA_LEN_HIGH; > + offset = ((1 << LZMA_LEN_NUM_LOW_BITS) > + + (1 << LZMA_LEN_NUM_MID_BITS)); > + num_bits = LZMA_LEN_NUM_HIGH_BITS; > + } > + } > + > + rc_bit_tree_decode(rc, prob_len, num_bits, &len); > + len += offset; > + > + if (cst->state < 4) { > + int pos_slot; > + > + cst->state += LZMA_NUM_LIT_STATES; > + prob > + p + LZMA_POS_SLOT + > + ((len < > + LZMA_NUM_LEN_TO_POS_STATES ? len : > + LZMA_NUM_LEN_TO_POS_STATES - 1) > + << LZMA_NUM_POS_SLOT_BITS); > + rc_bit_tree_decode(rc, prob, > + LZMA_NUM_POS_SLOT_BITS, > + &pos_slot); > + if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { > + int i, mi; > + num_bits = (pos_slot >> 1) - 1; > + cst->rep0 = 2 | (pos_slot & 1); > + if (pos_slot < LZMA_END_POS_MODEL_INDEX) { > + cst->rep0 <<= num_bits; > + prob = p + LZMA_SPEC_POS + > + cst->rep0 - pos_slot - 1; > + } else { > + num_bits -= LZMA_NUM_ALIGN_BITS; > + while (num_bits--) > + cst->rep0 = (cst->rep0 << 1) | > + rc_direct_bit(rc); > + prob = p + LZMA_ALIGN; > + cst->rep0 <<= LZMA_NUM_ALIGN_BITS; > + num_bits = LZMA_NUM_ALIGN_BITS; > + } > + i = 1; > + mi = 1; > + while (num_bits--) { > + if (rc_get_bit(rc, prob + mi, &mi)) > + cst->rep0 |= i; > + i <<= 1; > + } > + } else > + cst->rep0 = pos_slot; > + if (++(cst->rep0) == 0) > + return; > + } > + > + len += LZMA_MATCH_MIN_LEN; > + > + copy_bytes(wr, cst->rep0, len); > +} > + > + > + > +STATIC inline int INIT unlzma(unsigned char *buf, unsigned int in_len, > + int(*fill)(void*, unsigned int), > + int(*flush)(void*, unsigned int), > + unsigned char *output, > + unsigned int *posp, > + void(*error_fn)(const char *x) > + ) > +{ > + struct lzma_header header; > + int lc, pb, lp; > + uint32_t pos_state_mask; > + uint32_t literal_pos_mask; > + uint16_t *p; > + int num_probs; > + struct rc rc; > + int i, mi; > + struct writer wr; > + struct cstate cst; > + unsigned char *inbuf; > + int ret = -1; > + > + set_error_fn(error_fn); > + > + if (buf) > + inbuf = buf; > + else > + inbuf = malloc(LZMA_IOBUF_SIZE); > + if (!inbuf) { > + error("Could not allocate input bufer"); > + goto exit_0; > + } > + > + cst.state = 0; > + cst.rep0 = cst.rep1 = cst.rep2 = cst.rep3 = 1; > + > + wr.header = &header; > + wr.flush = flush; > + wr.global_pos = 0; > + wr.previous_byte = 0; > + wr.buffer_pos = 0; > + > + rc_init(&rc, fill, inbuf, in_len); > + > + for (i = 0; i < sizeof(header); i++) { > + if (rc.ptr >= rc.buffer_end) > + rc_read(&rc); > + ((unsigned char *)&header)[i] = *rc.ptr++; > + } > + > + if (header.pos >= (9 * 5 * 5)) > + error("bad header"); > + > + mi = 0; > + lc = header.pos; > + while (lc >= 9) { > + mi++; > + lc -= 9; > + } > + pb = 0; > + lp = mi; > + while (lp >= 5) { > + pb++; > + lp -= 5; > + } > + pos_state_mask = (1 << pb) - 1; > + literal_pos_mask = (1 << lp) - 1; > + > + ENDIAN_CONVERT(header.dict_size); > + ENDIAN_CONVERT(header.dst_size); > + > + if (header.dict_size == 0) > + header.dict_size = 1; > + > + if (output) > + wr.buffer = output; > + else { > + wr.bufsize = MIN(header.dst_size, header.dict_size); > + wr.buffer = large_malloc(wr.bufsize); > + } > + if (wr.buffer == NULL) > + goto exit_1; > + > + num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); > + p = (uint16_t *) large_malloc(num_probs * sizeof(*p)); > + if (p == 0) > + goto exit_2; > + num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); > + for (i = 0; i < num_probs; i++) > + p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; > + > + rc_init_code(&rc); > + > + while (get_pos(&wr) < header.dst_size) { > + int pos_state = get_pos(&wr) & pos_state_mask; > + uint16_t *prob = p + LZMA_IS_MATCH + > + (cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state; > + if (rc_is_bit_0(&rc, prob)) > + process_bit0(&wr, &rc, &cst, p, pos_state, prob, > + lc, literal_pos_mask); > + else { > + process_bit1(&wr, &rc, &cst, p, pos_state, prob); > + if (cst.rep0 == 0) > + break; > + } > + } > + > + if (posp) > + *posp = rc.ptr-rc.buffer; > + if (wr.flush) > + wr.flush(wr.buffer, wr.buffer_pos); > + ret = 0; > + large_free(p); > +exit_2: > + if (!output) > + large_free(wr.buffer); > +exit_1: > + if (!buf) > + free(inbuf); > +exit_0: > + return ret; > +} > --- /dev/null 1970-01-01 00:00:00.000000000 +0000 > +++ 2009-10-27/xen/include/xen/decompress.h 2009-11-05 12:27:44.000000000 +0100 > @@ -0,0 +1,38 @@ > +#ifndef __XEN_GENERIC_H > +#define __XEN_GENERIC_H > + > +typedef int decompress_fn(unsigned char *inbuf, unsigned int len, > + int (*fill)(void*, unsigned int), > + int (*flush)(void*, unsigned int), > + unsigned char *outbuf, unsigned int *posp, > + void (*error)(const char *x)); > + > +/* inbuf - input buffer > + * len - len of pre-read data in inbuf > + * fill - function to fill inbuf when empty > + * flush - function to write out outbuf > + * outbuf - output buffer > + * posp - if non-null, input position (number of bytes read) will be > + * returned here > + * error - error reporting function > + * > + * If len != 0, inbuf should contain all the necessary input data, and fill > + * should be NULL > + * If len = 0, inbuf can be NULL, in which case the decompressor will allocate > + * the input buffer. If inbuf != NULL it must be at least XXX_IOBUF_SIZE bytes. > + * fill will be called (repeatedly...) to read data, at most XXX_IOBUF_SIZE > + * bytes should be read per call. Replace XXX with the appropriate decompressor > + * name, i.e. LZMA_IOBUF_SIZE. > + * > + * If flush = NULL, outbuf must be large enough to buffer all the expected > + * output. If flush != NULL, the output buffer will be allocated by the > + * decompressor (outbuf = NULL), and the flush function will be called to > + * flush the output buffer at the appropriate time (decompressor and stream > + * dependent). > + */ > + > +decompress_fn bunzip2, unlzma; > + > +int decompress(void *inbuf, unsigned int len, void *outbuf); > + > +#endif >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2009-Nov-06 09:02 UTC
Re: [Xen-devel] [PATCH] x86/dom0: support bzip2 and lzma compressed bzImage payloads
>>> Ian Campbell <Ian.Campbell@citrix.com> 06.11.09 09:56 >>> >On Thu, 2009-11-05 at 14:24 +0000, Jan Beulich wrote: >> This matches functionality in the tools already supporting the same for >> DomU-s. >> >> Code taken from Linux 2.6.32-rc and adjusted as little as possible to >> be usable in Xen. > >Looks good to me. The new files FOO.c are based on lib/decompress_FOO.c >from Linux? Might be worth a comment in at least the changelog to aidYes.>the next person to come along who wants to resync them. (and why rename >them?)The decompress_* prefix seemed pointless to me.>> The question is whether, particularly for non-Linux Dom0-s, plain ELF >> images compressed by bzip2 or lzma should also be supported. > >It should be fairly low cost to do so, so why not? > >Is there a reason to treat gzip differently to bzip2/lzma? i.e. why not >route it through decompress.c as well? Perhaps that would involve >updating xen/common/inflate.c but that is already just a fork of what is >now in lib/decompress_inflate.c in Linux so it should be pretty trivial.Yes, I had considered that, but didn''t want the patch to become bigger than necessary. I''m certainly planning on cleaning this up as a subsequent (though lower priority) patch. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Campbell
2009-Nov-06 09:11 UTC
Re: [Xen-devel] [PATCH] x86/dom0: support bzip2 and lzma compressed bzImage payloads
On Fri, 2009-11-06 at 09:02 +0000, Jan Beulich wrote:> >>> Ian Campbell <Ian.Campbell@citrix.com> 06.11.09 09:56 >>> > >On Thu, 2009-11-05 at 14:24 +0000, Jan Beulich wrote:> >the next person to come along who wants to resync them. (and why rename > >them?) > > The decompress_* prefix seemed pointless to me.I guess it just highlights all those files as being part of the same "subsystem".> >> The question is whether, particularly for non-Linux Dom0-s, plain ELF > >> images compressed by bzip2 or lzma should also be supported. > > > >It should be fairly low cost to do so, so why not? > > > >Is there a reason to treat gzip differently to bzip2/lzma? i.e. why not > >route it through decompress.c as well? Perhaps that would involve > >updating xen/common/inflate.c but that is already just a fork of what is > >now in lib/decompress_inflate.c in Linux so it should be pretty trivial. > > Yes, I had considered that, but didn''t want the patch to become bigger > than necessary. I''m certainly planning on cleaning this up as a subsequent > (though lower priority) patch.Great, thanks! Ian. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel