linux/lib/decompress_bunzip2.c
<<
>>
Prefs
   1/* vi: set sw = 4 ts = 4: */
   2/*      Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
   3
   4        Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
   5        which also acknowledges contributions by Mike Burrows, David Wheeler,
   6        Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
   7        Robert Sedgewick, and Jon L. Bentley.
   8
   9        This code is licensed under the LGPLv2:
  10                LGPL (http://www.gnu.org/copyleft/lgpl.html
  11*/
  12
  13/*
  14        Size and speed optimizations by Manuel Novoa III  (mjn3@codepoet.org).
  15
  16        More efficient reading of Huffman codes, a streamlined read_bunzip()
  17        function, and various other tweaks.  In (limited) tests, approximately
  18        20% faster than bzcat on x86 and about 10% faster on arm.
  19
  20        Note that about 2/3 of the time is spent in read_unzip() reversing
  21        the Burrows-Wheeler transformation.  Much of that time is delay
  22        resulting from cache misses.
  23
  24        I would ask that anyone benefiting from this work, especially those
  25        using it in commercial products, consider making a donation to my local
  26        non-profit hospice organization in the name of the woman I loved, who
  27        passed away Feb. 12, 2003.
  28
  29                In memory of Toni W. Hagan
  30
  31                Hospice of Acadiana, Inc.
  32                2600 Johnston St., Suite 200
  33                Lafayette, LA 70503-3240
  34
  35                Phone (337) 232-1234 or 1-800-738-2226
  36                Fax   (337) 232-1297
  37
  38                http://www.hospiceacadiana.com/
  39
  40        Manuel
  41 */
  42
  43/*
  44        Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu)
  45*/
  46
  47
  48#ifdef STATIC
  49#define PREBOOT
  50#else
  51#include <linux/decompress/bunzip2.h>
  52#include <linux/slab.h>
  53#endif /* STATIC */
  54
  55#include <linux/decompress/mm.h>
  56
  57#ifndef INT_MAX
  58#define INT_MAX 0x7fffffff
  59#endif
  60
  61/* Constants for Huffman coding */
  62#define MAX_GROUPS              6
  63#define GROUP_SIZE              50      /* 64 would have been more efficient */
  64#define MAX_HUFCODE_BITS        20      /* Longest Huffman code allowed */
  65#define MAX_SYMBOLS             258     /* 256 literals + RUNA + RUNB */
  66#define SYMBOL_RUNA             0
  67#define SYMBOL_RUNB             1
  68
  69/* Status return values */
  70#define RETVAL_OK                       0
  71#define RETVAL_LAST_BLOCK               (-1)
  72#define RETVAL_NOT_BZIP_DATA            (-2)
  73#define RETVAL_UNEXPECTED_INPUT_EOF     (-3)
  74#define RETVAL_UNEXPECTED_OUTPUT_EOF    (-4)
  75#define RETVAL_DATA_ERROR               (-5)
  76#define RETVAL_OUT_OF_MEMORY            (-6)
  77#define RETVAL_OBSOLETE_INPUT           (-7)
  78
  79/* Other housekeeping constants */
  80#define BZIP2_IOBUF_SIZE                4096
  81
  82/* This is what we know about each Huffman coding group */
  83struct group_data {
  84        /* We have an extra slot at the end of limit[] for a sentinal value. */
  85        int limit[MAX_HUFCODE_BITS+1];
  86        int base[MAX_HUFCODE_BITS];
  87        int permute[MAX_SYMBOLS];
  88        int minLen, maxLen;
  89};
  90
  91/* Structure holding all the housekeeping data, including IO buffers and
  92   memory that persists between calls to bunzip */
  93struct bunzip_data {
  94        /* State for interrupting output loop */
  95        int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
  96        /* I/O tracking data (file handles, buffers, positions, etc.) */
  97        int (*fill)(void*, unsigned int);
  98        int inbufCount, inbufPos /*, outbufPos*/;
  99        unsigned char *inbuf /*,*outbuf*/;
 100        unsigned int inbufBitCount, inbufBits;
 101        /* The CRC values stored in the block header and calculated from the
 102        data */
 103        unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC;
 104        /* Intermediate buffer and its size (in bytes) */
 105        unsigned int *dbuf, dbufSize;
 106        /* These things are a bit too big to go on the stack */
 107        unsigned char selectors[32768];         /* nSelectors = 15 bits */
 108        struct group_data groups[MAX_GROUPS];   /* Huffman coding tables */
 109        int io_error;                   /* non-zero if we have IO error */
 110};
 111
 112
 113/* Return the next nnn bits of input.  All reads from the compressed input
 114   are done through this function.  All reads are big endian */
 115static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted)
 116{
 117        unsigned int bits = 0;
 118
 119        /* If we need to get more data from the byte buffer, do so.
 120           (Loop getting one byte at a time to enforce endianness and avoid
 121           unaligned access.) */
 122        while (bd->inbufBitCount < bits_wanted) {
 123                /* If we need to read more data from file into byte buffer, do
 124                   so */
 125                if (bd->inbufPos == bd->inbufCount) {
 126                        if (bd->io_error)
 127                                return 0;
 128                        bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE);
 129                        if (bd->inbufCount <= 0) {
 130                                bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF;
 131                                return 0;
 132                        }
 133                        bd->inbufPos = 0;
 134                }
 135                /* Avoid 32-bit overflow (dump bit buffer to top of output) */
 136                if (bd->inbufBitCount >= 24) {
 137                        bits = bd->inbufBits&((1 << bd->inbufBitCount)-1);
 138                        bits_wanted -= bd->inbufBitCount;
 139                        bits <<= bits_wanted;
 140                        bd->inbufBitCount = 0;
 141                }
 142                /* Grab next 8 bits of input from buffer. */
 143                bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
 144                bd->inbufBitCount += 8;
 145        }
 146        /* Calculate result */
 147        bd->inbufBitCount -= bits_wanted;
 148        bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1);
 149
 150        return bits;
 151}
 152
 153/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */
 154
 155static int INIT get_next_block(struct bunzip_data *bd)
 156{
 157        struct group_data *hufGroup = NULL;
 158        int *base = NULL;
 159        int *limit = NULL;
 160        int dbufCount, nextSym, dbufSize, groupCount, selector,
 161                i, j, k, t, runPos, symCount, symTotal, nSelectors,
 162                byteCount[256];
 163        unsigned char uc, symToByte[256], mtfSymbol[256], *selectors;
 164        unsigned int *dbuf, origPtr;
 165
 166        dbuf = bd->dbuf;
 167        dbufSize = bd->dbufSize;
 168        selectors = bd->selectors;
 169
 170        /* Read in header signature and CRC, then validate signature.
 171           (last block signature means CRC is for whole file, return now) */
 172        i = get_bits(bd, 24);
 173        j = get_bits(bd, 24);
 174        bd->headerCRC = get_bits(bd, 32);
 175        if ((i == 0x177245) && (j == 0x385090))
 176                return RETVAL_LAST_BLOCK;
 177        if ((i != 0x314159) || (j != 0x265359))
 178                return RETVAL_NOT_BZIP_DATA;
 179        /* We can add support for blockRandomised if anybody complains.
 180           There was some code for this in busybox 1.0.0-pre3, but nobody ever
 181           noticed that it didn't actually work. */
 182        if (get_bits(bd, 1))
 183                return RETVAL_OBSOLETE_INPUT;
 184        origPtr = get_bits(bd, 24);
 185        if (origPtr > dbufSize)
 186                return RETVAL_DATA_ERROR;
 187        /* mapping table: if some byte values are never used (encoding things
 188           like ascii text), the compression code removes the gaps to have fewer
 189           symbols to deal with, and writes a sparse bitfield indicating which
 190           values were present.  We make a translation table to convert the
 191           symbols back to the corresponding bytes. */
 192        t = get_bits(bd, 16);
 193        symTotal = 0;
 194        for (i = 0; i < 16; i++) {
 195                if (t&(1 << (15-i))) {
 196                        k = get_bits(bd, 16);
 197                        for (j = 0; j < 16; j++)
 198                                if (k&(1 << (15-j)))
 199                                        symToByte[symTotal++] = (16*i)+j;
 200                }
 201        }
 202        /* How many different Huffman coding groups does this block use? */
 203        groupCount = get_bits(bd, 3);
 204        if (groupCount < 2 || groupCount > MAX_GROUPS)
 205                return RETVAL_DATA_ERROR;
 206        /* nSelectors: Every GROUP_SIZE many symbols we select a new
 207           Huffman coding group.  Read in the group selector list,
 208           which is stored as MTF encoded bit runs.  (MTF = Move To
 209           Front, as each value is used it's moved to the start of the
 210           list.) */
 211        nSelectors = get_bits(bd, 15);
 212        if (!nSelectors)
 213                return RETVAL_DATA_ERROR;
 214        for (i = 0; i < groupCount; i++)
 215                mtfSymbol[i] = i;
 216        for (i = 0; i < nSelectors; i++) {
 217                /* Get next value */
 218                for (j = 0; get_bits(bd, 1); j++)
 219                        if (j >= groupCount)
 220                                return RETVAL_DATA_ERROR;
 221                /* Decode MTF to get the next selector */
 222                uc = mtfSymbol[j];
 223                for (; j; j--)
 224                        mtfSymbol[j] = mtfSymbol[j-1];
 225                mtfSymbol[0] = selectors[i] = uc;
 226        }
 227        /* Read the Huffman coding tables for each group, which code
 228           for symTotal literal symbols, plus two run symbols (RUNA,
 229           RUNB) */
 230        symCount = symTotal+2;
 231        for (j = 0; j < groupCount; j++) {
 232                unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1];
 233                int     minLen, maxLen, pp;
 234                /* Read Huffman code lengths for each symbol.  They're
 235                   stored in a way similar to mtf; record a starting
 236                   value for the first symbol, and an offset from the
 237                   previous value for everys symbol after that.
 238                   (Subtracting 1 before the loop and then adding it
 239                   back at the end is an optimization that makes the
 240                   test inside the loop simpler: symbol length 0
 241                   becomes negative, so an unsigned inequality catches
 242                   it.) */
 243                t = get_bits(bd, 5)-1;
 244                for (i = 0; i < symCount; i++) {
 245                        for (;;) {
 246                                if (((unsigned)t) > (MAX_HUFCODE_BITS-1))
 247                                        return RETVAL_DATA_ERROR;
 248
 249                                /* If first bit is 0, stop.  Else
 250                                   second bit indicates whether to
 251                                   increment or decrement the value.
 252                                   Optimization: grab 2 bits and unget
 253                                   the second if the first was 0. */
 254
 255                                k = get_bits(bd, 2);
 256                                if (k < 2) {
 257                                        bd->inbufBitCount++;
 258                                        break;
 259                                }
 260                                /* Add one if second bit 1, else
 261                                 * subtract 1.  Avoids if/else */
 262                                t += (((k+1)&2)-1);
 263                        }
 264                        /* Correct for the initial -1, to get the
 265                         * final symbol length */
 266                        length[i] = t+1;
 267                }
 268                /* Find largest and smallest lengths in this group */
 269                minLen = maxLen = length[0];
 270
 271                for (i = 1; i < symCount; i++) {
 272                        if (length[i] > maxLen)
 273                                maxLen = length[i];
 274                        else if (length[i] < minLen)
 275                                minLen = length[i];
 276                }
 277
 278                /* Calculate permute[], base[], and limit[] tables from
 279                 * length[].
 280                 *
 281                 * permute[] is the lookup table for converting
 282                 * Huffman coded symbols into decoded symbols.  base[]
 283                 * is the amount to subtract from the value of a
 284                 * Huffman symbol of a given length when using
 285                 * permute[].
 286                 *
 287                 * limit[] indicates the largest numerical value a
 288                 * symbol with a given number of bits can have.  This
 289                 * is how the Huffman codes can vary in length: each
 290                 * code with a value > limit[length] needs another
 291                 * bit.
 292                 */
 293                hufGroup = bd->groups+j;
 294                hufGroup->minLen = minLen;
 295                hufGroup->maxLen = maxLen;
 296                /* Note that minLen can't be smaller than 1, so we
 297                   adjust the base and limit array pointers so we're
 298                   not always wasting the first entry.  We do this
 299                   again when using them (during symbol decoding).*/
 300                base = hufGroup->base-1;
 301                limit = hufGroup->limit-1;
 302                /* Calculate permute[].  Concurrently, initialize
 303                 * temp[] and limit[]. */
 304                pp = 0;
 305                for (i = minLen; i <= maxLen; i++) {
 306                        temp[i] = limit[i] = 0;
 307                        for (t = 0; t < symCount; t++)
 308                                if (length[t] == i)
 309                                        hufGroup->permute[pp++] = t;
 310                }
 311                /* Count symbols coded for at each bit length */
 312                for (i = 0; i < symCount; i++)
 313                        temp[length[i]]++;
 314                /* Calculate limit[] (the largest symbol-coding value
 315                 *at each bit length, which is (previous limit <<
 316                 *1)+symbols at this level), and base[] (number of
 317                 *symbols to ignore at each bit length, which is limit
 318                 *minus the cumulative count of symbols coded for
 319                 *already). */
 320                pp = t = 0;
 321                for (i = minLen; i < maxLen; i++) {
 322                        pp += temp[i];
 323                        /* We read the largest possible symbol size
 324                           and then unget bits after determining how
 325                           many we need, and those extra bits could be
 326                           set to anything.  (They're noise from
 327                           future symbols.)  At each level we're
 328                           really only interested in the first few
 329                           bits, so here we set all the trailing
 330                           to-be-ignored bits to 1 so they don't
 331                           affect the value > limit[length]
 332                           comparison. */
 333                        limit[i] = (pp << (maxLen - i)) - 1;
 334                        pp <<= 1;
 335                        base[i+1] = pp-(t += temp[i]);
 336                }
 337                limit[maxLen+1] = INT_MAX; /* Sentinal value for
 338                                            * reading next sym. */
 339                limit[maxLen] = pp+temp[maxLen]-1;
 340                base[minLen] = 0;
 341        }
 342        /* We've finished reading and digesting the block header.  Now
 343           read this block's Huffman coded symbols from the file and
 344           undo the Huffman coding and run length encoding, saving the
 345           result into dbuf[dbufCount++] = uc */
 346
 347        /* Initialize symbol occurrence counters and symbol Move To
 348         * Front table */
 349        for (i = 0; i < 256; i++) {
 350                byteCount[i] = 0;
 351                mtfSymbol[i] = (unsigned char)i;
 352        }
 353        /* Loop through compressed symbols. */
 354        runPos = dbufCount = symCount = selector = 0;
 355        for (;;) {
 356                /* Determine which Huffman coding group to use. */
 357                if (!(symCount--)) {
 358                        symCount = GROUP_SIZE-1;
 359                        if (selector >= nSelectors)
 360                                return RETVAL_DATA_ERROR;
 361                        hufGroup = bd->groups+selectors[selector++];
 362                        base = hufGroup->base-1;
 363                        limit = hufGroup->limit-1;
 364                }
 365                /* Read next Huffman-coded symbol. */
 366                /* Note: It is far cheaper to read maxLen bits and
 367                   back up than it is to read minLen bits and then an
 368                   additional bit at a time, testing as we go.
 369                   Because there is a trailing last block (with file
 370                   CRC), there is no danger of the overread causing an
 371                   unexpected EOF for a valid compressed file.  As a
 372                   further optimization, we do the read inline
 373                   (falling back to a call to get_bits if the buffer
 374                   runs dry).  The following (up to got_huff_bits:) is
 375                   equivalent to j = get_bits(bd, hufGroup->maxLen);
 376                 */
 377                while (bd->inbufBitCount < hufGroup->maxLen) {
 378                        if (bd->inbufPos == bd->inbufCount) {
 379                                j = get_bits(bd, hufGroup->maxLen);
 380                                goto got_huff_bits;
 381                        }
 382                        bd->inbufBits =
 383                                (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
 384                        bd->inbufBitCount += 8;
 385                };
 386                bd->inbufBitCount -= hufGroup->maxLen;
 387                j = (bd->inbufBits >> bd->inbufBitCount)&
 388                        ((1 << hufGroup->maxLen)-1);
 389got_huff_bits:
 390                /* Figure how how many bits are in next symbol and
 391                 * unget extras */
 392                i = hufGroup->minLen;
 393                while (j > limit[i])
 394                        ++i;
 395                bd->inbufBitCount += (hufGroup->maxLen - i);
 396                /* Huffman decode value to get nextSym (with bounds checking) */
 397                if ((i > hufGroup->maxLen)
 398                        || (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i]))
 399                                >= MAX_SYMBOLS))
 400                        return RETVAL_DATA_ERROR;
 401                nextSym = hufGroup->permute[j];
 402                /* We have now decoded the symbol, which indicates
 403                   either a new literal byte, or a repeated run of the
 404                   most recent literal byte.  First, check if nextSym
 405                   indicates a repeated run, and if so loop collecting
 406                   how many times to repeat the last literal. */
 407                if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */
 408                        /* If this is the start of a new run, zero out
 409                         * counter */
 410                        if (!runPos) {
 411                                runPos = 1;
 412                                t = 0;
 413                        }
 414                        /* Neat trick that saves 1 symbol: instead of
 415                           or-ing 0 or 1 at each bit position, add 1
 416                           or 2 instead.  For example, 1011 is 1 << 0
 417                           + 1 << 1 + 2 << 2.  1010 is 2 << 0 + 2 << 1
 418                           + 1 << 2.  You can make any bit pattern
 419                           that way using 1 less symbol than the basic
 420                           or 0/1 method (except all bits 0, which
 421                           would use no symbols, but a run of length 0
 422                           doesn't mean anything in this context).
 423                           Thus space is saved. */
 424                        t += (runPos << nextSym);
 425                        /* +runPos if RUNA; +2*runPos if RUNB */
 426
 427                        runPos <<= 1;
 428                        continue;
 429                }
 430                /* When we hit the first non-run symbol after a run,
 431                   we now know how many times to repeat the last
 432                   literal, so append that many copies to our buffer
 433                   of decoded symbols (dbuf) now.  (The last literal
 434                   used is the one at the head of the mtfSymbol
 435                   array.) */
 436                if (runPos) {
 437                        runPos = 0;
 438                        if (dbufCount+t >= dbufSize)
 439                                return RETVAL_DATA_ERROR;
 440
 441                        uc = symToByte[mtfSymbol[0]];
 442                        byteCount[uc] += t;
 443                        while (t--)
 444                                dbuf[dbufCount++] = uc;
 445                }
 446                /* Is this the terminating symbol? */
 447                if (nextSym > symTotal)
 448                        break;
 449                /* At this point, nextSym indicates a new literal
 450                   character.  Subtract one to get the position in the
 451                   MTF array at which this literal is currently to be
 452                   found.  (Note that the result can't be -1 or 0,
 453                   because 0 and 1 are RUNA and RUNB.  But another
 454                   instance of the first symbol in the mtf array,
 455                   position 0, would have been handled as part of a
 456                   run above.  Therefore 1 unused mtf position minus 2
 457                   non-literal nextSym values equals -1.) */
 458                if (dbufCount >= dbufSize)
 459                        return RETVAL_DATA_ERROR;
 460                i = nextSym - 1;
 461                uc = mtfSymbol[i];
 462                /* Adjust the MTF array.  Since we typically expect to
 463                 *move only a small number of symbols, and are bound
 464                 *by 256 in any case, using memmove here would
 465                 *typically be bigger and slower due to function call
 466                 *overhead and other assorted setup costs. */
 467                do {
 468                        mtfSymbol[i] = mtfSymbol[i-1];
 469                } while (--i);
 470                mtfSymbol[0] = uc;
 471                uc = symToByte[uc];
 472                /* We have our literal byte.  Save it into dbuf. */
 473                byteCount[uc]++;
 474                dbuf[dbufCount++] = (unsigned int)uc;
 475        }
 476        /* At this point, we've read all the Huffman-coded symbols
 477           (and repeated runs) for this block from the input stream,
 478           and decoded them into the intermediate buffer.  There are
 479           dbufCount many decoded bytes in dbuf[].  Now undo the
 480           Burrows-Wheeler transform on dbuf.  See
 481           http://dogma.net/markn/articles/bwt/bwt.htm
 482         */
 483        /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
 484        j = 0;
 485        for (i = 0; i < 256; i++) {
 486                k = j+byteCount[i];
 487                byteCount[i] = j;
 488                j = k;
 489        }
 490        /* Figure out what order dbuf would be in if we sorted it. */
 491        for (i = 0; i < dbufCount; i++) {
 492                uc = (unsigned char)(dbuf[i] & 0xff);
 493                dbuf[byteCount[uc]] |= (i << 8);
 494                byteCount[uc]++;
 495        }
 496        /* Decode first byte by hand to initialize "previous" byte.
 497           Note that it doesn't get output, and if the first three
 498           characters are identical it doesn't qualify as a run (hence
 499           writeRunCountdown = 5). */
 500        if (dbufCount) {
 501                if (origPtr >= dbufCount)
 502                        return RETVAL_DATA_ERROR;
 503                bd->writePos = dbuf[origPtr];
 504                bd->writeCurrent = (unsigned char)(bd->writePos&0xff);
 505                bd->writePos >>= 8;
 506                bd->writeRunCountdown = 5;
 507        }
 508        bd->writeCount = dbufCount;
 509
 510        return RETVAL_OK;
 511}
 512
 513/* Undo burrows-wheeler transform on intermediate buffer to produce output.
 514   If start_bunzip was initialized with out_fd =-1, then up to len bytes of
 515   data are written to outbuf.  Return value is number of bytes written or
 516   error (all errors are negative numbers).  If out_fd!=-1, outbuf and len
 517   are ignored, data is written to out_fd and return is RETVAL_OK or error.
 518*/
 519
 520static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len)
 521{
 522        const unsigned int *dbuf;
 523        int pos, xcurrent, previous, gotcount;
 524
 525        /* If last read was short due to end of file, return last block now */
 526        if (bd->writeCount < 0)
 527                return bd->writeCount;
 528
 529        gotcount = 0;
 530        dbuf = bd->dbuf;
 531        pos = bd->writePos;
 532        xcurrent = bd->writeCurrent;
 533
 534        /* We will always have pending decoded data to write into the output
 535           buffer unless this is the very first call (in which case we haven't
 536           Huffman-decoded a block into the intermediate buffer yet). */
 537
 538        if (bd->writeCopies) {
 539                /* Inside the loop, writeCopies means extra copies (beyond 1) */
 540                --bd->writeCopies;
 541                /* Loop outputting bytes */
 542                for (;;) {
 543                        /* If the output buffer is full, snapshot
 544                         * state and return */
 545                        if (gotcount >= len) {
 546                                bd->writePos = pos;
 547                                bd->writeCurrent = xcurrent;
 548                                bd->writeCopies++;
 549                                return len;
 550                        }
 551                        /* Write next byte into output buffer, updating CRC */
 552                        outbuf[gotcount++] = xcurrent;
 553                        bd->writeCRC = (((bd->writeCRC) << 8)
 554                                ^bd->crc32Table[((bd->writeCRC) >> 24)
 555                                ^xcurrent]);
 556                        /* Loop now if we're outputting multiple
 557                         * copies of this byte */
 558                        if (bd->writeCopies) {
 559                                --bd->writeCopies;
 560                                continue;
 561                        }
 562decode_next_byte:
 563                        if (!bd->writeCount--)
 564                                break;
 565                        /* Follow sequence vector to undo
 566                         * Burrows-Wheeler transform */
 567                        previous = xcurrent;
 568                        pos = dbuf[pos];
 569                        xcurrent = pos&0xff;
 570                        pos >>= 8;
 571                        /* After 3 consecutive copies of the same
 572                           byte, the 4th is a repeat count.  We count
 573                           down from 4 instead *of counting up because
 574                           testing for non-zero is faster */
 575                        if (--bd->writeRunCountdown) {
 576                                if (xcurrent != previous)
 577                                        bd->writeRunCountdown = 4;
 578                        } else {
 579                                /* We have a repeated run, this byte
 580                                 * indicates the count */
 581                                bd->writeCopies = xcurrent;
 582                                xcurrent = previous;
 583                                bd->writeRunCountdown = 5;
 584                                /* Sometimes there are just 3 bytes
 585                                 * (run length 0) */
 586                                if (!bd->writeCopies)
 587                                        goto decode_next_byte;
 588                                /* Subtract the 1 copy we'd output
 589                                 * anyway to get extras */
 590                                --bd->writeCopies;
 591                        }
 592                }
 593                /* Decompression of this block completed successfully */
 594                bd->writeCRC = ~bd->writeCRC;
 595                bd->totalCRC = ((bd->totalCRC << 1) |
 596                                (bd->totalCRC >> 31)) ^ bd->writeCRC;
 597                /* If this block had a CRC error, force file level CRC error. */
 598                if (bd->writeCRC != bd->headerCRC) {
 599                        bd->totalCRC = bd->headerCRC+1;
 600                        return RETVAL_LAST_BLOCK;
 601                }
 602        }
 603
 604        /* Refill the intermediate buffer by Huffman-decoding next
 605         * block of input */
 606        /* (previous is just a convenient unused temp variable here) */
 607        previous = get_next_block(bd);
 608        if (previous) {
 609                bd->writeCount = previous;
 610                return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;
 611        }
 612        bd->writeCRC = 0xffffffffUL;
 613        pos = bd->writePos;
 614        xcurrent = bd->writeCurrent;
 615        goto decode_next_byte;
 616}
 617
 618static int INIT nofill(void *buf, unsigned int len)
 619{
 620        return -1;
 621}
 622
 623/* Allocate the structure, read file header.  If in_fd ==-1, inbuf must contain
 624   a complete bunzip file (len bytes long).  If in_fd!=-1, inbuf and len are
 625   ignored, and data is read from file handle into temporary buffer. */
 626static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
 627                             int (*fill)(void*, unsigned int))
 628{
 629        struct bunzip_data *bd;
 630        unsigned int i, j, c;
 631        const unsigned int BZh0 =
 632                (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)
 633                +(((unsigned int)'h') << 8)+(unsigned int)'0';
 634
 635        /* Figure out how much data to allocate */
 636        i = sizeof(struct bunzip_data);
 637
 638        /* Allocate bunzip_data.  Most fields initialize to zero. */
 639        bd = *bdp = malloc(i);
 640        if (!bd)
 641                return RETVAL_OUT_OF_MEMORY;
 642        memset(bd, 0, sizeof(struct bunzip_data));
 643        /* Setup input buffer */
 644        bd->inbuf = inbuf;
 645        bd->inbufCount = len;
 646        if (fill != NULL)
 647                bd->fill = fill;
 648        else
 649                bd->fill = nofill;
 650
 651        /* Init the CRC32 table (big endian) */
 652        for (i = 0; i < 256; i++) {
 653                c = i << 24;
 654                for (j = 8; j; j--)
 655                        c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1);
 656                bd->crc32Table[i] = c;
 657        }
 658
 659        /* Ensure that file starts with "BZh['1'-'9']." */
 660        i = get_bits(bd, 32);
 661        if (((unsigned int)(i-BZh0-1)) >= 9)
 662                return RETVAL_NOT_BZIP_DATA;
 663
 664        /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of
 665           uncompressed data.  Allocate intermediate buffer for block. */
 666        bd->dbufSize = 100000*(i-BZh0);
 667
 668        bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));
 669        if (!bd->dbuf)
 670                return RETVAL_OUT_OF_MEMORY;
 671        return RETVAL_OK;
 672}
 673
 674/* Example usage: decompress src_fd to dst_fd.  (Stops at end of bzip2 data,
 675   not end of file.) */
 676STATIC int INIT bunzip2(unsigned char *buf, int len,
 677                        int(*fill)(void*, unsigned int),
 678                        int(*flush)(void*, unsigned int),
 679                        unsigned char *outbuf,
 680                        int *pos,
 681                        void(*error_fn)(char *x))
 682{
 683        struct bunzip_data *bd;
 684        int i = -1;
 685        unsigned char *inbuf;
 686
 687        set_error_fn(error_fn);
 688        if (flush)
 689                outbuf = malloc(BZIP2_IOBUF_SIZE);
 690
 691        if (!outbuf) {
 692                error("Could not allocate output bufer");
 693                return RETVAL_OUT_OF_MEMORY;
 694        }
 695        if (buf)
 696                inbuf = buf;
 697        else
 698                inbuf = malloc(BZIP2_IOBUF_SIZE);
 699        if (!inbuf) {
 700                error("Could not allocate input bufer");
 701                i = RETVAL_OUT_OF_MEMORY;
 702                goto exit_0;
 703        }
 704        i = start_bunzip(&bd, inbuf, len, fill);
 705        if (!i) {
 706                for (;;) {
 707                        i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE);
 708                        if (i <= 0)
 709                                break;
 710                        if (!flush)
 711                                outbuf += i;
 712                        else
 713                                if (i != flush(outbuf, i)) {
 714                                        i = RETVAL_UNEXPECTED_OUTPUT_EOF;
 715                                        break;
 716                                }
 717                }
 718        }
 719        /* Check CRC and release memory */
 720        if (i == RETVAL_LAST_BLOCK) {
 721                if (bd->headerCRC != bd->totalCRC)
 722                        error("Data integrity error when decompressing.");
 723                else
 724                        i = RETVAL_OK;
 725        } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {
 726                error("Compressed file ends unexpectedly");
 727        }
 728        if (!bd)
 729                goto exit_1;
 730        if (bd->dbuf)
 731                large_free(bd->dbuf);
 732        if (pos)
 733                *pos = bd->inbufPos;
 734        free(bd);
 735exit_1:
 736        if (!buf)
 737                free(inbuf);
 738exit_0:
 739        if (flush)
 740                free(outbuf);
 741        return i;
 742}
 743
 744#ifdef PREBOOT
 745STATIC int INIT decompress(unsigned char *buf, int len,
 746                        int(*fill)(void*, unsigned int),
 747                        int(*flush)(void*, unsigned int),
 748                        unsigned char *outbuf,
 749                        int *pos,
 750                        void(*error_fn)(char *x))
 751{
 752        return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error_fn);
 753}
 754#endif
 755
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.