001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020/*
021 * This package is based on the work done by Keiron Liddle, Aftex Software
022 * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
023 * great code.
024 */
025package org.apache.commons.compress.compressors.bzip2;
026
027import java.io.IOException;
028import java.io.InputStream;
029import java.nio.ByteOrder;
030import java.util.Arrays;
031
032import org.apache.commons.compress.compressors.CompressorInputStream;
033import org.apache.commons.compress.utils.BitInputStream;
034import org.apache.commons.compress.utils.CloseShieldFilterInputStream;
035import org.apache.commons.compress.utils.InputStreamStatistics;
036
037/**
038 * An input stream that decompresses from the BZip2 format to be read as any other stream.
039 *
040 * @NotThreadSafe
041 */
042public class BZip2CompressorInputStream extends CompressorInputStream
043    implements BZip2Constants, InputStreamStatistics {
044
045    /**
046     * Index of the last char in the block, so the block size == last + 1.
047     */
048    private int last;
049
050    /**
051     * Index in zptr[] of original string after sorting.
052     */
053    private int origPtr;
054
055    /**
056     * always: in the range 0 .. 9. The current block size is 100000 * this
057     * number.
058     */
059    private int blockSize100k;
060
061    private boolean blockRandomised;
062
063    private final CRC crc = new CRC();
064
065    private int nInUse;
066
067    private BitInputStream bin;
068    private final boolean decompressConcatenated;
069
070    private static final int EOF = 0;
071    private static final int START_BLOCK_STATE = 1;
072    private static final int RAND_PART_A_STATE = 2;
073    private static final int RAND_PART_B_STATE = 3;
074    private static final int RAND_PART_C_STATE = 4;
075    private static final int NO_RAND_PART_A_STATE = 5;
076    private static final int NO_RAND_PART_B_STATE = 6;
077    private static final int NO_RAND_PART_C_STATE = 7;
078
079    private int currentState = START_BLOCK_STATE;
080
081    private int storedBlockCRC, storedCombinedCRC;
082    private int computedBlockCRC, computedCombinedCRC;
083
084    // Variables used by setup* methods exclusively
085
086    private int su_count;
087    private int su_ch2;
088    private int su_chPrev;
089    private int su_i2;
090    private int su_j2;
091    private int su_rNToGo;
092    private int su_rTPos;
093    private int su_tPos;
094    private char su_z;
095
096    /**
097     * All memory intensive stuff. This field is initialized by initBlock().
098     */
099    private BZip2CompressorInputStream.Data data;
100
101    /**
102     * Constructs a new BZip2CompressorInputStream which decompresses bytes
103     * read from the specified stream. This doesn't support decompressing
104     * concatenated .bz2 files.
105     *
106     * @param in the InputStream from which this object should be created
107     * @throws IOException
108     *             if the stream content is malformed or an I/O error occurs.
109     * @throws NullPointerException
110     *             if {@code in == null}
111     */
112    public BZip2CompressorInputStream(final InputStream in) throws IOException {
113        this(in, false);
114    }
115
116    /**
117     * Constructs a new BZip2CompressorInputStream which decompresses bytes
118     * read from the specified stream.
119     *
120     * @param in the InputStream from which this object should be created
121     * @param decompressConcatenated
122     *                     if true, decompress until the end of the input;
123     *                     if false, stop after the first .bz2 stream and
124     *                     leave the input position to point to the next
125     *                     byte after the .bz2 stream
126     *
127     * @throws IOException
128     *             if {@code in == null}, the stream content is malformed, or an I/O error occurs.
129     */
130    public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
131        this.bin = new BitInputStream(in == System.in ? new CloseShieldFilterInputStream(in) : in,
132            ByteOrder.BIG_ENDIAN);
133        this.decompressConcatenated = decompressConcatenated;
134
135        init(true);
136        initBlock();
137    }
138
139    @Override
140    public int read() throws IOException {
141        if (this.bin != null) {
142            final int r = read0();
143            count(r < 0 ? -1 : 1);
144            return r;
145        }
146        throw new IOException("Stream closed");
147    }
148
149    /*
150     * (non-Javadoc)
151     *
152     * @see java.io.InputStream#read(byte[], int, int)
153     */
154    @Override
155    public int read(final byte[] dest, final int offs, final int len)
156        throws IOException {
157        if (offs < 0) {
158            throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
159        }
160        if (len < 0) {
161            throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
162        }
163        if (offs + len > dest.length) {
164            throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
165                                                + len + ") > dest.length(" + dest.length + ").");
166        }
167        if (this.bin == null) {
168            throw new IOException("Stream closed");
169        }
170        if (len == 0) {
171            return 0;
172        }
173
174        final int hi = offs + len;
175        int destOffs = offs;
176        int b;
177        while (destOffs < hi && ((b = read0()) >= 0)) {
178            dest[destOffs++] = (byte) b;
179            count(1);
180        }
181
182        return (destOffs == offs) ? -1 : (destOffs - offs);
183    }
184
185    /**
186     * @since 1.17
187     */
188    @Override
189    public long getCompressedCount() {
190        return bin.getBytesRead();
191    }
192
193    private void makeMaps() {
194        final boolean[] inUse = this.data.inUse;
195        final byte[] seqToUnseq = this.data.seqToUnseq;
196
197        int nInUseShadow = 0;
198
199        for (int i = 0; i < 256; i++) {
200            if (inUse[i]) {
201                seqToUnseq[nInUseShadow++] = (byte) i;
202            }
203        }
204
205        this.nInUse = nInUseShadow;
206    }
207
208    private int read0() throws IOException {
209        switch (currentState) {
210        case EOF:
211            return -1;
212
213        case START_BLOCK_STATE:
214            return setupBlock();
215
216        case RAND_PART_A_STATE:
217            throw new IllegalStateException();
218
219        case RAND_PART_B_STATE:
220            return setupRandPartB();
221
222        case RAND_PART_C_STATE:
223            return setupRandPartC();
224
225        case NO_RAND_PART_A_STATE:
226            throw new IllegalStateException();
227
228        case NO_RAND_PART_B_STATE:
229            return setupNoRandPartB();
230
231        case NO_RAND_PART_C_STATE:
232            return setupNoRandPartC();
233
234        default:
235            throw new IllegalStateException();
236        }
237    }
238
239    private int readNextByte(final BitInputStream in) throws IOException {
240        final long b = in.readBits(8);
241        return (int) b;
242    }
243
244    private boolean init(final boolean isFirstStream) throws IOException {
245        if (null == bin) {
246            throw new IOException("No InputStream");
247        }
248
249        if (!isFirstStream) {
250            bin.clearBitCache();
251        }
252
253        final int magic0 = readNextByte(this.bin);
254        if (magic0 == -1 && !isFirstStream) {
255            return false;
256        }
257        final int magic1 = readNextByte(this.bin);
258        final int magic2 = readNextByte(this.bin);
259
260        if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') {
261            throw new IOException(isFirstStream
262                    ? "Stream is not in the BZip2 format"
263                    : "Garbage after a valid BZip2 stream");
264        }
265
266        final int blockSize = readNextByte(this.bin);
267        if ((blockSize < '1') || (blockSize > '9')) {
268            throw new IOException("BZip2 block size is invalid");
269        }
270
271        this.blockSize100k = blockSize - '0';
272
273        this.computedCombinedCRC = 0;
274
275        return true;
276    }
277
278    private void initBlock() throws IOException {
279        final BitInputStream bin = this.bin;
280        char magic0;
281        char magic1;
282        char magic2;
283        char magic3;
284        char magic4;
285        char magic5;
286
287        while (true) {
288            // Get the block magic bytes.
289            magic0 = bsGetUByte(bin);
290            magic1 = bsGetUByte(bin);
291            magic2 = bsGetUByte(bin);
292            magic3 = bsGetUByte(bin);
293            magic4 = bsGetUByte(bin);
294            magic5 = bsGetUByte(bin);
295
296            // If isn't end of stream magic, break out of the loop.
297            if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45
298                    || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) {
299                break;
300            }
301
302            // End of stream was reached. Check the combined CRC and
303            // advance to the next .bz2 stream if decoding concatenated
304            // streams.
305            if (complete()) {
306                return;
307            }
308        }
309
310        if (magic0 != 0x31 || // '1'
311            magic1 != 0x41 || // ')'
312            magic2 != 0x59 || // 'Y'
313            magic3 != 0x26 || // '&'
314            magic4 != 0x53 || // 'S'
315            magic5 != 0x59 // 'Y'
316            ) {
317            this.currentState = EOF;
318            throw new IOException("Bad block header");
319        }
320        this.storedBlockCRC = bsGetInt(bin);
321        this.blockRandomised = bsR(bin, 1) == 1;
322
323        /*
324         * Allocate data here instead in constructor, so we do not allocate
325         * it if the input file is empty.
326         */
327        if (this.data == null) {
328            this.data = new Data(this.blockSize100k);
329        }
330
331        // currBlockNo++;
332        getAndMoveToFrontDecode();
333
334        this.crc.initializeCRC();
335        this.currentState = START_BLOCK_STATE;
336    }
337
338    private void endBlock() throws IOException {
339        this.computedBlockCRC = this.crc.getFinalCRC();
340
341        // A bad CRC is considered a fatal error.
342        if (this.storedBlockCRC != this.computedBlockCRC) {
343            // make next blocks readable without error
344            // (repair feature, not yet documented, not tested)
345            this.computedCombinedCRC = (this.storedCombinedCRC << 1)
346                | (this.storedCombinedCRC >>> 31);
347            this.computedCombinedCRC ^= this.storedBlockCRC;
348
349            throw new IOException("BZip2 CRC error");
350        }
351
352        this.computedCombinedCRC = (this.computedCombinedCRC << 1)
353            | (this.computedCombinedCRC >>> 31);
354        this.computedCombinedCRC ^= this.computedBlockCRC;
355    }
356
357    private boolean complete() throws IOException {
358        this.storedCombinedCRC = bsGetInt(bin);
359        this.currentState = EOF;
360        this.data = null;
361
362        if (this.storedCombinedCRC != this.computedCombinedCRC) {
363            throw new IOException("BZip2 CRC error");
364        }
365
366        // Look for the next .bz2 stream if decompressing
367        // concatenated files.
368        return !decompressConcatenated || !init(false);
369    }
370
371    @Override
372    public void close() throws IOException {
373        final BitInputStream inShadow = this.bin;
374        if (inShadow != null) {
375            try {
376                inShadow.close();
377            } finally {
378                this.data = null;
379                this.bin = null;
380            }
381        }
382    }
383
384    /**
385     * read bits from the input stream
386     * @param n the number of bits to read, must not exceed 32?
387     * @return the requested bits combined into an int
388     * @throws IOException
389     */
390    private static int bsR(final BitInputStream bin, final int n) throws IOException {
391        final long thech = bin.readBits(n);
392        if (thech < 0) {
393            throw new IOException("Unexpected end of stream");
394        }
395        return (int) thech;
396    }
397
398    private static boolean bsGetBit(final BitInputStream bin) throws IOException {
399        return bsR(bin, 1) != 0;
400    }
401
402    private static char bsGetUByte(final BitInputStream bin) throws IOException {
403        return (char) bsR(bin, 8);
404    }
405
406    private static int bsGetInt(final BitInputStream bin) throws IOException {
407        return bsR(bin, 32);
408    }
409
410    private static void checkBounds(final int checkVal, final int limitExclusive, final String name)
411        throws IOException {
412        if (checkVal < 0) {
413            throw new IOException("Corrupted input, " + name + " value negative");
414        }
415        if (checkVal >= limitExclusive) {
416            throw new IOException("Corrupted input, " + name + " value too big");
417        }
418    }
419
420    /**
421     * Called by createHuffmanDecodingTables() exclusively.
422     */
423    private static void hbCreateDecodeTables(final int[] limit,
424                                             final int[] base, final int[] perm, final char[] length,
425                                             final int minLen, final int maxLen, final int alphaSize)
426        throws IOException {
427        for (int i = minLen, pp = 0; i <= maxLen; i++) {
428            for (int j = 0; j < alphaSize; j++) {
429                if (length[j] == i) {
430                    perm[pp++] = j;
431                }
432            }
433        }
434
435        for (int i = MAX_CODE_LEN; --i > 0;) {
436            base[i] = 0;
437            limit[i] = 0;
438        }
439
440        for (int i = 0; i < alphaSize; i++) {
441            final int l = length[i];
442            checkBounds(l, MAX_ALPHA_SIZE, "length");
443            base[l + 1]++;
444        }
445
446        for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
447            b += base[i];
448            base[i] = b;
449        }
450
451        for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
452            final int nb = base[i + 1];
453            vec += nb - b;
454            b = nb;
455            limit[i] = vec - 1;
456            vec <<= 1;
457        }
458
459        for (int i = minLen + 1; i <= maxLen; i++) {
460            base[i] = ((limit[i - 1] + 1) << 1) - base[i];
461        }
462    }
463
464    private void recvDecodingTables() throws IOException {
465        final BitInputStream bin = this.bin;
466        final Data dataShadow = this.data;
467        final boolean[] inUse = dataShadow.inUse;
468        final byte[] pos = dataShadow.recvDecodingTables_pos;
469        final byte[] selector = dataShadow.selector;
470        final byte[] selectorMtf = dataShadow.selectorMtf;
471
472        int inUse16 = 0;
473
474        /* Receive the mapping table */
475        for (int i = 0; i < 16; i++) {
476            if (bsGetBit(bin)) {
477                inUse16 |= 1 << i;
478            }
479        }
480
481        Arrays.fill(inUse, false);
482        for (int i = 0; i < 16; i++) {
483            if ((inUse16 & (1 << i)) != 0) {
484                final int i16 = i << 4;
485                for (int j = 0; j < 16; j++) {
486                    if (bsGetBit(bin)) {
487                        inUse[i16 + j] = true;
488                    }
489                }
490            }
491        }
492
493        makeMaps();
494        final int alphaSize = this.nInUse + 2;
495        /* Now the selectors */
496        final int nGroups = bsR(bin, 3);
497        final int selectors = bsR(bin, 15);
498        if (selectors < 0) {
499            throw new IOException("Corrupted input, nSelectors value negative");
500        }
501        checkBounds(alphaSize, MAX_ALPHA_SIZE + 1, "alphaSize");
502        checkBounds(nGroups, N_GROUPS + 1, "nGroups");
503
504        // Don't fail on nSelectors overflowing boundaries but discard the values in overflow
505        // See https://gnu.wildebeest.org/blog/mjw/2019/08/02/bzip2-and-the-cve-that-wasnt/
506        // and https://sourceware.org/ml/bzip2-devel/2019-q3/msg00007.html
507
508        for (int i = 0; i < selectors; i++) {
509            int j = 0;
510            while (bsGetBit(bin)) {
511                j++;
512            }
513            if (i < MAX_SELECTORS) {
514                selectorMtf[i] = (byte) j;
515            }
516        }
517        final int nSelectors = selectors > MAX_SELECTORS ? MAX_SELECTORS : selectors;
518
519        /* Undo the MTF values for the selectors. */
520        for (int v = nGroups; --v >= 0;) {
521            pos[v] = (byte) v;
522        }
523
524        for (int i = 0; i < nSelectors; i++) {
525            int v = selectorMtf[i] & 0xff;
526            checkBounds(v, N_GROUPS, "selectorMtf");
527            final byte tmp = pos[v];
528            while (v > 0) {
529                // nearly all times v is zero, 4 in most other cases
530                pos[v] = pos[v - 1];
531                v--;
532            }
533            pos[0] = tmp;
534            selector[i] = tmp;
535        }
536
537        final char[][] len = dataShadow.temp_charArray2d;
538
539        /* Now the coding tables */
540        for (int t = 0; t < nGroups; t++) {
541            int curr = bsR(bin, 5);
542            final char[] len_t = len[t];
543            for (int i = 0; i < alphaSize; i++) {
544                while (bsGetBit(bin)) {
545                    curr += bsGetBit(bin) ? -1 : 1;
546                }
547                len_t[i] = (char) curr;
548            }
549        }
550
551        // finally create the Huffman tables
552        createHuffmanDecodingTables(alphaSize, nGroups);
553    }
554
555    /**
556     * Called by recvDecodingTables() exclusively.
557     */
558    private void createHuffmanDecodingTables(final int alphaSize,
559                                             final int nGroups) throws IOException {
560        final Data dataShadow = this.data;
561        final char[][] len = dataShadow.temp_charArray2d;
562        final int[] minLens = dataShadow.minLens;
563        final int[][] limit = dataShadow.limit;
564        final int[][] base = dataShadow.base;
565        final int[][] perm = dataShadow.perm;
566
567        for (int t = 0; t < nGroups; t++) {
568            int minLen = 32;
569            int maxLen = 0;
570            final char[] len_t = len[t];
571            for (int i = alphaSize; --i >= 0;) {
572                final char lent = len_t[i];
573                if (lent > maxLen) {
574                    maxLen = lent;
575                }
576                if (lent < minLen) {
577                    minLen = lent;
578                }
579            }
580            hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
581                                 maxLen, alphaSize);
582            minLens[t] = minLen;
583        }
584    }
585
586    private void getAndMoveToFrontDecode() throws IOException {
587        final BitInputStream bin = this.bin;
588        this.origPtr = bsR(bin, 24);
589        recvDecodingTables();
590
591        final Data dataShadow = this.data;
592        final byte[] ll8 = dataShadow.ll8;
593        final int[] unzftab = dataShadow.unzftab;
594        final byte[] selector = dataShadow.selector;
595        final byte[] seqToUnseq = dataShadow.seqToUnseq;
596        final char[] yy = dataShadow.getAndMoveToFrontDecode_yy;
597        final int[] minLens = dataShadow.minLens;
598        final int[][] limit = dataShadow.limit;
599        final int[][] base = dataShadow.base;
600        final int[][] perm = dataShadow.perm;
601        final int limitLast = this.blockSize100k * 100000;
602
603        /*
604         * Setting up the unzftab entries here is not strictly necessary, but it
605         * does save having to do it later in a separate pass, and so saves a
606         * block's worth of cache misses.
607         */
608        for (int i = 256; --i >= 0;) {
609            yy[i] = (char) i;
610            unzftab[i] = 0;
611        }
612
613        int groupNo = 0;
614        int groupPos = G_SIZE - 1;
615        final int eob = this.nInUse + 1;
616        int nextSym = getAndMoveToFrontDecode0();
617        int lastShadow = -1;
618        int zt = selector[groupNo] & 0xff;
619        checkBounds(zt, N_GROUPS, "zt");
620        int[] base_zt = base[zt];
621        int[] limit_zt = limit[zt];
622        int[] perm_zt = perm[zt];
623        int minLens_zt = minLens[zt];
624
625        while (nextSym != eob) {
626            if ((nextSym == RUNA) || (nextSym == RUNB)) {
627                int s = -1;
628
629                for (int n = 1; true; n <<= 1) {
630                    if (nextSym == RUNA) {
631                        s += n;
632                    } else if (nextSym == RUNB) {
633                        s += n << 1;
634                    } else {
635                        break;
636                    }
637
638                    if (groupPos == 0) {
639                        groupPos = G_SIZE - 1;
640                        checkBounds(++groupNo, MAX_SELECTORS, "groupNo");
641                        zt = selector[groupNo] & 0xff;
642                        checkBounds(zt, N_GROUPS, "zt");
643                        base_zt = base[zt];
644                        limit_zt = limit[zt];
645                        perm_zt = perm[zt];
646                        minLens_zt = minLens[zt];
647                    } else {
648                        groupPos--;
649                    }
650
651                    int zn = minLens_zt;
652                    checkBounds(zn, MAX_ALPHA_SIZE, "zn");
653                    int zvec = bsR(bin, zn);
654                    while(zvec > limit_zt[zn]) {
655                        checkBounds(++zn, MAX_ALPHA_SIZE, "zn");
656                        zvec = (zvec << 1) | bsR(bin, 1);
657                    }
658                    final int tmp = zvec - base_zt[zn];
659                    checkBounds(tmp, MAX_ALPHA_SIZE, "zvec");
660                    nextSym = perm_zt[tmp];
661                }
662                checkBounds(s, this.data.ll8.length, "s");
663
664                final int yy0 = yy[0];
665                checkBounds(yy0, 256, "yy");
666                final byte ch = seqToUnseq[yy0];
667                unzftab[ch & 0xff] += s + 1;
668
669                final int from = ++lastShadow;
670                lastShadow += s;
671                checkBounds(lastShadow, this.data.ll8.length, "lastShadow");
672                Arrays.fill(ll8, from, lastShadow + 1, ch);
673
674                if (lastShadow >= limitLast) {
675                    throw new IOException("Block overrun while expanding RLE in MTF, "
676                        + lastShadow + " exceeds " + limitLast);
677                }
678            } else {
679                if (++lastShadow >= limitLast) {
680                    throw new IOException("Block overrun in MTF, "
681                        + lastShadow + " exceeds " + limitLast);
682                }
683                checkBounds(nextSym, 256 + 1, "nextSym");
684
685                final char tmp = yy[nextSym - 1];
686                checkBounds(tmp, 256, "yy");
687                unzftab[seqToUnseq[tmp] & 0xff]++;
688                ll8[lastShadow] = seqToUnseq[tmp];
689
690                /*
691                 * This loop is hammered during decompression, hence avoid
692                 * native method call overhead of System.arraycopy for very
693                 * small ranges to copy.
694                 */
695                if (nextSym <= 16) {
696                    for (int j = nextSym - 1; j > 0;) {
697                        yy[j] = yy[--j];
698                    }
699                } else {
700                    System.arraycopy(yy, 0, yy, 1, nextSym - 1);
701                }
702
703                yy[0] = tmp;
704
705                if (groupPos == 0) {
706                    groupPos = G_SIZE - 1;
707                    checkBounds(++groupNo, MAX_SELECTORS, "groupNo");
708                    zt = selector[groupNo] & 0xff;
709                    checkBounds(zt, N_GROUPS, "zt");
710                    base_zt = base[zt];
711                    limit_zt = limit[zt];
712                    perm_zt = perm[zt];
713                    minLens_zt = minLens[zt];
714                } else {
715                    groupPos--;
716                }
717
718                int zn = minLens_zt;
719                checkBounds(zn, MAX_ALPHA_SIZE, "zn");
720                int zvec = bsR(bin, zn);
721                while(zvec > limit_zt[zn]) {
722                    checkBounds(++zn, MAX_ALPHA_SIZE, "zn");
723                    zvec = (zvec << 1) | bsR(bin, 1);
724                }
725                final int idx = zvec - base_zt[zn];
726                checkBounds(idx, MAX_ALPHA_SIZE, "zvec");
727                nextSym = perm_zt[idx];
728            }
729        }
730
731        this.last = lastShadow;
732    }
733
734    private int getAndMoveToFrontDecode0() throws IOException {
735        final Data dataShadow = this.data;
736        final int zt = dataShadow.selector[0] & 0xff;
737        checkBounds(zt, N_GROUPS, "zt");
738        final int[] limit_zt = dataShadow.limit[zt];
739        int zn = dataShadow.minLens[zt];
740        checkBounds(zn, MAX_ALPHA_SIZE, "zn");
741        int zvec = bsR(bin, zn);
742        while (zvec > limit_zt[zn]) {
743            checkBounds(++zn, MAX_ALPHA_SIZE, "zn");
744            zvec = (zvec << 1) | bsR(bin, 1);
745        }
746        final int tmp = zvec - dataShadow.base[zt][zn];
747        checkBounds(tmp, MAX_ALPHA_SIZE, "zvec");
748
749        return dataShadow.perm[zt][tmp];
750    }
751
752    private int setupBlock() throws IOException {
753        if (currentState == EOF || this.data == null) {
754            return -1;
755        }
756
757        final int[] cftab = this.data.cftab;
758        final int ttLen = this.last + 1;
759        final int[] tt = this.data.initTT(ttLen);
760        final byte[] ll8 = this.data.ll8;
761        cftab[0] = 0;
762        System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);
763
764        for (int i = 1, c = cftab[0]; i <= 256; i++) {
765            c += cftab[i];
766            cftab[i] = c;
767        }
768
769        for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
770            final int tmp = cftab[ll8[i] & 0xff]++;
771            checkBounds(tmp, ttLen, "tt index");
772            tt[tmp] = i;
773        }
774
775        if ((this.origPtr < 0) || (this.origPtr >= tt.length)) {
776            throw new IOException("Stream corrupted");
777        }
778
779        this.su_tPos = tt[this.origPtr];
780        this.su_count = 0;
781        this.su_i2 = 0;
782        this.su_ch2 = 256; /* not a char and not EOF */
783
784        if (this.blockRandomised) {
785            this.su_rNToGo = 0;
786            this.su_rTPos = 0;
787            return setupRandPartA();
788        }
789        return setupNoRandPartA();
790    }
791
792    private int setupRandPartA() throws IOException {
793        if (this.su_i2 <= this.last) {
794            this.su_chPrev = this.su_ch2;
795            int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
796            checkBounds(this.su_tPos, this.data.tt.length, "su_tPos");
797            this.su_tPos = this.data.tt[this.su_tPos];
798            if (this.su_rNToGo == 0) {
799                this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
800                if (++this.su_rTPos == 512) {
801                    this.su_rTPos = 0;
802                }
803            } else {
804                this.su_rNToGo--;
805            }
806            this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0;
807            this.su_i2++;
808            this.currentState = RAND_PART_B_STATE;
809            this.crc.updateCRC(su_ch2Shadow);
810            return su_ch2Shadow;
811        }
812        endBlock();
813        initBlock();
814        return setupBlock();
815    }
816
817    private int setupNoRandPartA() throws IOException {
818        if (this.su_i2 <= this.last) {
819            this.su_chPrev = this.su_ch2;
820            final int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
821            this.su_ch2 = su_ch2Shadow;
822            checkBounds(this.su_tPos, this.data.tt.length, "su_tPos");
823            this.su_tPos = this.data.tt[this.su_tPos];
824            this.su_i2++;
825            this.currentState = NO_RAND_PART_B_STATE;
826            this.crc.updateCRC(su_ch2Shadow);
827            return su_ch2Shadow;
828        }
829        this.currentState = NO_RAND_PART_A_STATE;
830        endBlock();
831        initBlock();
832        return setupBlock();
833    }
834
835    private int setupRandPartB() throws IOException {
836        if (this.su_ch2 != this.su_chPrev) {
837            this.currentState = RAND_PART_A_STATE;
838            this.su_count = 1;
839            return setupRandPartA();
840        }
841        if (++this.su_count < 4) {
842            this.currentState = RAND_PART_A_STATE;
843            return setupRandPartA();
844        }
845        this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
846        checkBounds(this.su_tPos, this.data.tt.length, "su_tPos");
847        this.su_tPos = this.data.tt[this.su_tPos];
848        if (this.su_rNToGo == 0) {
849            this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
850            if (++this.su_rTPos == 512) {
851                this.su_rTPos = 0;
852            }
853        } else {
854            this.su_rNToGo--;
855        }
856        this.su_j2 = 0;
857        this.currentState = RAND_PART_C_STATE;
858        if (this.su_rNToGo == 1) {
859            this.su_z ^= 1;
860        }
861        return setupRandPartC();
862    }
863
864    private int setupRandPartC() throws IOException {
865        if (this.su_j2 < this.su_z) {
866            this.crc.updateCRC(this.su_ch2);
867            this.su_j2++;
868            return this.su_ch2;
869        }
870        this.currentState = RAND_PART_A_STATE;
871        this.su_i2++;
872        this.su_count = 0;
873        return setupRandPartA();
874    }
875
876    private int setupNoRandPartB() throws IOException {
877        if (this.su_ch2 != this.su_chPrev) {
878            this.su_count = 1;
879            return setupNoRandPartA();
880        }
881        if (++this.su_count >= 4) {
882            checkBounds(this.su_tPos, this.data.ll8.length, "su_tPos");
883            this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
884            this.su_tPos = this.data.tt[this.su_tPos];
885            this.su_j2 = 0;
886            return setupNoRandPartC();
887        }
888        return setupNoRandPartA();
889    }
890
891    private int setupNoRandPartC() throws IOException {
892        if (this.su_j2 < this.su_z) {
893            final int su_ch2Shadow = this.su_ch2;
894            this.crc.updateCRC(su_ch2Shadow);
895            this.su_j2++;
896            this.currentState = NO_RAND_PART_C_STATE;
897            return su_ch2Shadow;
898        }
899        this.su_i2++;
900        this.su_count = 0;
901        return setupNoRandPartA();
902    }
903
904    private static final class Data {
905
906        // (with blockSize 900k)
907        final boolean[] inUse = new boolean[256]; // 256 byte
908
909        final byte[] seqToUnseq = new byte[256]; // 256 byte
910        final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
911        final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
912
913        /**
914         * Freq table collected to save a pass over the data during
915         * decompression.
916         */
917        final int[] unzftab = new int[256]; // 1024 byte
918
919        final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
920        final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
921        final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
922        final int[] minLens = new int[N_GROUPS]; // 24 byte
923
924        final int[] cftab = new int[257]; // 1028 byte
925        final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte
926        final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
927        // byte
928        final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte
929        // ---------------
930        // 60798 byte
931
932        int[] tt; // 3600000 byte
933        final byte[] ll8; // 900000 byte
934
935        // ---------------
936        // 4560782 byte
937        // ===============
938
939        Data(final int blockSize100k) {
940            this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE];
941        }
942
943        /**
944         * Initializes the {@link #tt} array.
945         *
946         * This method is called when the required length of the array is known.
947         * I don't initialize it at construction time to avoid unnecessary
948         * memory allocation when compressing small files.
949         */
950        int[] initTT(final int length) {
951            int[] ttShadow = this.tt;
952
953            // tt.length should always be >= length, but theoretically
954            // it can happen, if the compressor mixed small and large
955            // blocks. Normally only the last block will be smaller
956            // than others.
957            if ((ttShadow == null) || (ttShadow.length < length)) {
958                this.tt = ttShadow = new int[length];
959            }
960
961            return ttShadow;
962        }
963
964    }
965
966    /**
967     * Checks if the signature matches what is expected for a bzip2 file.
968     *
969     * @param signature
970     *            the bytes to check
971     * @param length
972     *            the number of bytes to check
973     * @return true, if this stream is a bzip2 compressed stream, false otherwise
974     *
975     * @since 1.1
976     */
977    public static boolean matches(final byte[] signature, final int length) {
978        return length >= 3 && signature[0] == 'B' &&
979                signature[1] == 'Z' && signature[2] == 'h';
980    }
981}