View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  /*
21   * This package is based on the work done by Keiron Liddle, Aftex Software
22   * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
23   * great code.
24   */
25  package org.apache.commons.compress.compressors.bzip2;
26  
27  import java.io.IOException;
28  import java.io.InputStream;
29  import java.nio.ByteOrder;
30  import java.util.Arrays;
31  
32  import org.apache.commons.compress.compressors.CompressorInputStream;
33  import org.apache.commons.compress.utils.BitInputStream;
34  import org.apache.commons.compress.utils.CloseShieldFilterInputStream;
35  
36  /**
37   * An input stream that decompresses from the BZip2 format to be read as any other stream.
38   * 
39   * @NotThreadSafe
40   */
41  public class BZip2CompressorInputStream extends CompressorInputStream implements
42                                                                            BZip2Constants {
43  
44      /**
45       * Index of the last char in the block, so the block size == last + 1.
46       */
47      private int last;
48  
49      /**
50       * Index in zptr[] of original string after sorting.
51       */
52      private int origPtr;
53  
54      /**
55       * always: in the range 0 .. 9. The current block size is 100000 * this
56       * number.
57       */
58      private int blockSize100k;
59  
60      private boolean blockRandomised;
61  
62      private final CRC crc = new CRC();
63  
64      private int nInUse;
65  
66      private BitInputStream bin;
67      private final boolean decompressConcatenated;
68  
69      private static final int EOF = 0;
70      private static final int START_BLOCK_STATE = 1;
71      private static final int RAND_PART_A_STATE = 2;
72      private static final int RAND_PART_B_STATE = 3;
73      private static final int RAND_PART_C_STATE = 4;
74      private static final int NO_RAND_PART_A_STATE = 5;
75      private static final int NO_RAND_PART_B_STATE = 6;
76      private static final int NO_RAND_PART_C_STATE = 7;
77  
78      private int currentState = START_BLOCK_STATE;
79  
80      private int storedBlockCRC, storedCombinedCRC;
81      private int computedBlockCRC, computedCombinedCRC;
82  
83      // Variables used by setup* methods exclusively
84  
85      private int su_count;
86      private int su_ch2;
87      private int su_chPrev;
88      private int su_i2;
89      private int su_j2;
90      private int su_rNToGo;
91      private int su_rTPos;
92      private int su_tPos;
93      private char su_z;
94  
95      /**
96       * All memory intensive stuff. This field is initialized by initBlock().
97       */
98      private BZip2CompressorInputStream.Data data;
99  
100     /**
101      * Constructs a new BZip2CompressorInputStream which decompresses bytes
102      * read from the specified stream. This doesn't suppprt decompressing
103      * concatenated .bz2 files.
104      * 
105      * @param in the InputStream from which this object should be created
106      * @throws IOException
107      *             if the stream content is malformed or an I/O error occurs.
108      * @throws NullPointerException
109      *             if {@code in == null}
110      */
111     public BZip2CompressorInputStream(final InputStream in) throws IOException {
112         this(in, false);
113     }
114 
115     /**
116      * Constructs a new BZip2CompressorInputStream which decompresses bytes
117      * read from the specified stream.
118      *
119      * @param in the InputStream from which this object should be created
120      * @param decompressConcatenated
121      *                     if true, decompress until the end of the input;
122      *                     if false, stop after the first .bz2 stream and
123      *                     leave the input position to point to the next
124      *                     byte after the .bz2 stream
125      *
126      * @throws IOException
127      *             if {@code in == null}, the stream content is malformed, or an I/O error occurs.
128      */
129     public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
130         this.bin = new BitInputStream(in == System.in ? new CloseShieldFilterInputStream(in) : in,
131             ByteOrder.BIG_ENDIAN);
132         this.decompressConcatenated = decompressConcatenated;
133 
134         init(true);
135         initBlock();
136     }
137 
138     @Override
139     public int read() throws IOException {
140         if (this.bin != null) {
141             final int r = read0();
142             count(r < 0 ? -1 : 1);
143             return r;
144         }
145         throw new IOException("stream closed");
146     }
147 
148     /*
149      * (non-Javadoc)
150      * 
151      * @see java.io.InputStream#read(byte[], int, int)
152      */
153     @Override
154     public int read(final byte[] dest, final int offs, final int len)
155         throws IOException {
156         if (offs < 0) {
157             throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
158         }
159         if (len < 0) {
160             throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
161         }
162         if (offs + len > dest.length) {
163             throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
164                                                 + len + ") > dest.length(" + dest.length + ").");
165         }
166         if (this.bin == null) {
167             throw new IOException("stream closed");
168         }
169         if (len == 0) {
170             return 0;
171         }
172 
173         final int hi = offs + len;
174         int destOffs = offs;
175         int b;
176         while (destOffs < hi && ((b = read0()) >= 0)) {
177             dest[destOffs++] = (byte) b;
178             count(1);
179         }
180 
181         final int c = (destOffs == offs) ? -1 : (destOffs - offs);
182         return c;
183     }
184 
185     private void makeMaps() {
186         final boolean[] inUse = this.data.inUse;
187         final byte[] seqToUnseq = this.data.seqToUnseq;
188 
189         int nInUseShadow = 0;
190 
191         for (int i = 0; i < 256; i++) {
192             if (inUse[i]) {
193                 seqToUnseq[nInUseShadow++] = (byte) i;
194             }
195         }
196 
197         this.nInUse = nInUseShadow;
198     }
199 
200     private int read0() throws IOException {
201         switch (currentState) {
202         case EOF:
203             return -1;
204 
205         case START_BLOCK_STATE:
206             return setupBlock();
207 
208         case RAND_PART_A_STATE:
209             throw new IllegalStateException();
210 
211         case RAND_PART_B_STATE:
212             return setupRandPartB();
213 
214         case RAND_PART_C_STATE:
215             return setupRandPartC();
216 
217         case NO_RAND_PART_A_STATE:
218             throw new IllegalStateException();
219 
220         case NO_RAND_PART_B_STATE:
221             return setupNoRandPartB();
222 
223         case NO_RAND_PART_C_STATE:
224             return setupNoRandPartC();
225 
226         default:
227             throw new IllegalStateException();
228         }
229     }
230 
231     private int readNextByte(BitInputStream in) throws IOException {
232         long b = in.readBits(8);
233         return (int) b;
234     }
235 
236     private boolean init(final boolean isFirstStream) throws IOException {
237         if (null == bin) {
238             throw new IOException("No InputStream");
239         }
240 
241         if (!isFirstStream) {
242             bin.clearBitCache();
243         }
244 
245         final int magic0 = readNextByte(this.bin);
246         if (magic0 == -1 && !isFirstStream) {
247             return false;
248         }
249         final int magic1 = readNextByte(this.bin);
250         final int magic2 = readNextByte(this.bin);
251 
252         if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') {
253             throw new IOException(isFirstStream
254                     ? "Stream is not in the BZip2 format"
255                     : "Garbage after a valid BZip2 stream");
256         }
257 
258         final int blockSize = readNextByte(this.bin);
259         if ((blockSize < '1') || (blockSize > '9')) {
260             throw new IOException("BZip2 block size is invalid");
261         }
262 
263         this.blockSize100k = blockSize - '0';
264 
265         this.computedCombinedCRC = 0;
266 
267         return true;
268     }
269 
270     private void initBlock() throws IOException {
271         BitInputStream bin = this.bin;
272         char magic0;
273         char magic1;
274         char magic2;
275         char magic3;
276         char magic4;
277         char magic5;
278 
279         while (true) {
280             // Get the block magic bytes.
281             magic0 = bsGetUByte(bin);
282             magic1 = bsGetUByte(bin);
283             magic2 = bsGetUByte(bin);
284             magic3 = bsGetUByte(bin);
285             magic4 = bsGetUByte(bin);
286             magic5 = bsGetUByte(bin);
287 
288             // If isn't end of stream magic, break out of the loop.
289             if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45
290                     || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) {
291                 break;
292             }
293 
294             // End of stream was reached. Check the combined CRC and
295             // advance to the next .bz2 stream if decoding concatenated
296             // streams.
297             if (complete()) {
298                 return;
299             }
300         }
301 
302         if (magic0 != 0x31 || // '1'
303             magic1 != 0x41 || // ')'
304             magic2 != 0x59 || // 'Y'
305             magic3 != 0x26 || // '&'
306             magic4 != 0x53 || // 'S'
307             magic5 != 0x59 // 'Y'
308             ) {
309             this.currentState = EOF;
310             throw new IOException("bad block header");
311         }
312         this.storedBlockCRC = bsGetInt(bin);
313         this.blockRandomised = bsR(bin, 1) == 1;
314 
315         /**
316          * Allocate data here instead in constructor, so we do not allocate
317          * it if the input file is empty.
318          */
319         if (this.data == null) {
320             this.data = new Data(this.blockSize100k);
321         }
322 
323         // currBlockNo++;
324         getAndMoveToFrontDecode();
325 
326         this.crc.initialiseCRC();
327         this.currentState = START_BLOCK_STATE;
328     }
329 
330     private void endBlock() throws IOException {
331         this.computedBlockCRC = this.crc.getFinalCRC();
332 
333         // A bad CRC is considered a fatal error.
334         if (this.storedBlockCRC != this.computedBlockCRC) {
335             // make next blocks readable without error
336             // (repair feature, not yet documented, not tested)
337             this.computedCombinedCRC = (this.storedCombinedCRC << 1)
338                 | (this.storedCombinedCRC >>> 31);
339             this.computedCombinedCRC ^= this.storedBlockCRC;
340 
341             throw new IOException("BZip2 CRC error");
342         }
343 
344         this.computedCombinedCRC = (this.computedCombinedCRC << 1)
345             | (this.computedCombinedCRC >>> 31);
346         this.computedCombinedCRC ^= this.computedBlockCRC;
347     }
348 
349     private boolean complete() throws IOException {
350         this.storedCombinedCRC = bsGetInt(bin);
351         this.currentState = EOF;
352         this.data = null;
353 
354         if (this.storedCombinedCRC != this.computedCombinedCRC) {
355             throw new IOException("BZip2 CRC error");
356         }
357 
358         // Look for the next .bz2 stream if decompressing
359         // concatenated files.
360         return !decompressConcatenated || !init(false);
361     }
362 
363     @Override
364     public void close() throws IOException {
365         final BitInputStream inShadow = this.bin;
366         if (inShadow != null) {
367             try {
368                 inShadow.close();
369             } finally {
370                 this.data = null;
371                 this.bin = null;
372             }
373         }
374     }
375 
376     /**
377      * read bits from the input stream
378      * @param n the number of bits to read, must not exceed 32?
379      * @return
380      * @throws IOException
381      */
382     private static int bsR(BitInputStream bin, final int n) throws IOException {
383         long thech = bin.readBits(n);
384         if (thech < 0) {
385             throw new IOException("unexpected end of stream");
386         }
387         return (int) thech;
388     }
389 
390     private static boolean bsGetBit(BitInputStream bin) throws IOException {
391         return bsR(bin, 1) != 0;
392     }
393 
394     private static char bsGetUByte(BitInputStream bin) throws IOException {
395         return (char) bsR(bin, 8);
396     }
397 
398     private static int bsGetInt(BitInputStream bin) throws IOException {
399         return bsR(bin, 32);
400     }
401 
402     /**
403      * Called by createHuffmanDecodingTables() exclusively.
404      */
405     private static void hbCreateDecodeTables(final int[] limit,
406                                              final int[] base, final int[] perm, final char[] length,
407                                              final int minLen, final int maxLen, final int alphaSize) {
408         for (int i = minLen, pp = 0; i <= maxLen; i++) {
409             for (int j = 0; j < alphaSize; j++) {
410                 if (length[j] == i) {
411                     perm[pp++] = j;
412                 }
413             }
414         }
415 
416         for (int i = MAX_CODE_LEN; --i > 0;) {
417             base[i] = 0;
418             limit[i] = 0;
419         }
420 
421         for (int i = 0; i < alphaSize; i++) {
422             base[length[i] + 1]++;
423         }
424 
425         for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
426             b += base[i];
427             base[i] = b;
428         }
429 
430         for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
431             final int nb = base[i + 1];
432             vec += nb - b;
433             b = nb;
434             limit[i] = vec - 1;
435             vec <<= 1;
436         }
437 
438         for (int i = minLen + 1; i <= maxLen; i++) {
439             base[i] = ((limit[i - 1] + 1) << 1) - base[i];
440         }
441     }
442 
443     private void recvDecodingTables() throws IOException {
444         final BitInputStream bin = this.bin;
445         final Data dataShadow = this.data;
446         final boolean[] inUse = dataShadow.inUse;
447         final byte[] pos = dataShadow.recvDecodingTables_pos;
448         final byte[] selector = dataShadow.selector;
449         final byte[] selectorMtf = dataShadow.selectorMtf;
450 
451         int inUse16 = 0;
452 
453         /* Receive the mapping table */
454         for (int i = 0; i < 16; i++) {
455             if (bsGetBit(bin)) {
456                 inUse16 |= 1 << i;
457             }
458         }
459 
460         Arrays.fill(inUse, false);
461         for (int i = 0; i < 16; i++) {
462             if ((inUse16 & (1 << i)) != 0) {
463                 final int i16 = i << 4;
464                 for (int j = 0; j < 16; j++) {
465                     if (bsGetBit(bin)) {
466                         inUse[i16 + j] = true;
467                     }
468                 }
469             }
470         }
471 
472         makeMaps();
473         final int alphaSize = this.nInUse + 2;
474         /* Now the selectors */
475         final int nGroups = bsR(bin, 3);
476         final int nSelectors = bsR(bin, 15);
477 
478         for (int i = 0; i < nSelectors; i++) {
479             int j = 0;
480             while (bsGetBit(bin)) {
481                 j++;
482             }
483             selectorMtf[i] = (byte) j;
484         }
485 
486         /* Undo the MTF values for the selectors. */
487         for (int v = nGroups; --v >= 0;) {
488             pos[v] = (byte) v;
489         }
490 
491         for (int i = 0; i < nSelectors; i++) {
492             int v = selectorMtf[i] & 0xff;
493             final byte tmp = pos[v];
494             while (v > 0) {
495                 // nearly all times v is zero, 4 in most other cases
496                 pos[v] = pos[v - 1];
497                 v--;
498             }
499             pos[0] = tmp;
500             selector[i] = tmp;
501         }
502 
503         final char[][] len = dataShadow.temp_charArray2d;
504 
505         /* Now the coding tables */
506         for (int t = 0; t < nGroups; t++) {
507             int curr = bsR(bin, 5);
508             final char[] len_t = len[t];
509             for (int i = 0; i < alphaSize; i++) {
510                 while (bsGetBit(bin)) {
511                     curr += bsGetBit(bin) ? -1 : 1;
512                 }
513                 len_t[i] = (char) curr;
514             }
515         }
516 
517         // finally create the Huffman tables
518         createHuffmanDecodingTables(alphaSize, nGroups);
519     }
520 
521     /**
522      * Called by recvDecodingTables() exclusively.
523      */
524     private void createHuffmanDecodingTables(final int alphaSize,
525                                              final int nGroups) {
526         final Data dataShadow = this.data;
527         final char[][] len = dataShadow.temp_charArray2d;
528         final int[] minLens = dataShadow.minLens;
529         final int[][] limit = dataShadow.limit;
530         final int[][] base = dataShadow.base;
531         final int[][] perm = dataShadow.perm;
532 
533         for (int t = 0; t < nGroups; t++) {
534             int minLen = 32;
535             int maxLen = 0;
536             final char[] len_t = len[t];
537             for (int i = alphaSize; --i >= 0;) {
538                 final char lent = len_t[i];
539                 if (lent > maxLen) {
540                     maxLen = lent;
541                 }
542                 if (lent < minLen) {
543                     minLen = lent;
544                 }
545             }
546             hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
547                                  maxLen, alphaSize);
548             minLens[t] = minLen;
549         }
550     }
551 
552     private void getAndMoveToFrontDecode() throws IOException {
553         final BitInputStream bin = this.bin;
554         this.origPtr = bsR(bin, 24);
555         recvDecodingTables();
556 
557         final Data dataShadow = this.data;
558         final byte[] ll8 = dataShadow.ll8;
559         final int[] unzftab = dataShadow.unzftab;
560         final byte[] selector = dataShadow.selector;
561         final byte[] seqToUnseq = dataShadow.seqToUnseq;
562         final char[] yy = dataShadow.getAndMoveToFrontDecode_yy;
563         final int[] minLens = dataShadow.minLens;
564         final int[][] limit = dataShadow.limit;
565         final int[][] base = dataShadow.base;
566         final int[][] perm = dataShadow.perm;
567         final int limitLast = this.blockSize100k * 100000;
568 
569         /*
570          * Setting up the unzftab entries here is not strictly necessary, but it
571          * does save having to do it later in a separate pass, and so saves a
572          * block's worth of cache misses.
573          */
574         for (int i = 256; --i >= 0;) {
575             yy[i] = (char) i;
576             unzftab[i] = 0;
577         }
578 
579         int groupNo = 0;
580         int groupPos = G_SIZE - 1;
581         final int eob = this.nInUse + 1;
582         int nextSym = getAndMoveToFrontDecode0(0);
583         int lastShadow = -1;
584         int zt = selector[groupNo] & 0xff;
585         int[] base_zt = base[zt];
586         int[] limit_zt = limit[zt];
587         int[] perm_zt = perm[zt];
588         int minLens_zt = minLens[zt];
589 
590         while (nextSym != eob) {
591             if ((nextSym == RUNA) || (nextSym == RUNB)) {
592                 int s = -1;
593 
594                 for (int n = 1; true; n <<= 1) {
595                     if (nextSym == RUNA) {
596                         s += n;
597                     } else if (nextSym == RUNB) {
598                         s += n << 1;
599                     } else {
600                         break;
601                     }
602 
603                     if (groupPos == 0) {
604                         groupPos = G_SIZE - 1;
605                         zt = selector[++groupNo] & 0xff;
606                         base_zt = base[zt];
607                         limit_zt = limit[zt];
608                         perm_zt = perm[zt];
609                         minLens_zt = minLens[zt];
610                     } else {
611                         groupPos--;
612                     }
613 
614                     int zn = minLens_zt;
615                     int zvec = bsR(bin, zn);
616                     while(zvec > limit_zt[zn]) {
617                         zn++;
618                         zvec = (zvec << 1) | bsR(bin, 1);
619                     }
620                     nextSym = perm_zt[zvec - base_zt[zn]];
621                 }
622 
623                 final byte ch = seqToUnseq[yy[0]];
624                 unzftab[ch & 0xff] += s + 1;
625 
626                 while (s-- >= 0) {
627                     ll8[++lastShadow] = ch;
628                 }
629 
630                 if (lastShadow >= limitLast) {
631                     throw new IOException("block overrun");
632                 }
633             } else {
634                 if (++lastShadow >= limitLast) {
635                     throw new IOException("block overrun");
636                 }
637 
638                 final char tmp = yy[nextSym - 1];
639                 unzftab[seqToUnseq[tmp] & 0xff]++;
640                 ll8[lastShadow] = seqToUnseq[tmp];
641 
642                 /*
643                  * This loop is hammered during decompression, hence avoid
644                  * native method call overhead of System.arraycopy for very
645                  * small ranges to copy.
646                  */
647                 if (nextSym <= 16) {
648                     for (int j = nextSym - 1; j > 0;) {
649                         yy[j] = yy[--j];
650                     }
651                 } else {
652                     System.arraycopy(yy, 0, yy, 1, nextSym - 1);
653                 }
654 
655                 yy[0] = tmp;
656 
657                 if (groupPos == 0) {
658                     groupPos = G_SIZE - 1;
659                     zt = selector[++groupNo] & 0xff;
660                     base_zt = base[zt];
661                     limit_zt = limit[zt];
662                     perm_zt = perm[zt];
663                     minLens_zt = minLens[zt];
664                 } else {
665                     groupPos--;
666                 }
667 
668                 int zn = minLens_zt;
669                 int zvec = bsR(bin, zn);
670                 while(zvec > limit_zt[zn]) {
671                     zn++;
672                     zvec = (zvec << 1) | bsR(bin, 1);
673                 }
674                 nextSym = perm_zt[zvec - base_zt[zn]];
675             }
676         }
677 
678         this.last = lastShadow;
679     }
680 
681     private int getAndMoveToFrontDecode0(final int groupNo) throws IOException {
682         final Data dataShadow = this.data;
683         final int zt = dataShadow.selector[groupNo] & 0xff;
684         final int[] limit_zt = dataShadow.limit[zt];
685         int zn = dataShadow.minLens[zt];
686         int zvec = bsR(bin, zn);
687         while (zvec > limit_zt[zn]) {
688             zn++;
689             zvec = (zvec << 1) | bsR(bin, 1);
690         }
691 
692         return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]];
693     }
694 
695     private int setupBlock() throws IOException {
696         if (currentState == EOF || this.data == null) {
697             return -1;
698         }
699 
700         final int[] cftab = this.data.cftab;
701         final int[] tt = this.data.initTT(this.last + 1);
702         final byte[] ll8 = this.data.ll8;
703         cftab[0] = 0;
704         System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);
705 
706         for (int i = 1, c = cftab[0]; i <= 256; i++) {
707             c += cftab[i];
708             cftab[i] = c;
709         }
710 
711         for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
712             tt[cftab[ll8[i] & 0xff]++] = i;
713         }
714 
715         if ((this.origPtr < 0) || (this.origPtr >= tt.length)) {
716             throw new IOException("stream corrupted");
717         }
718 
719         this.su_tPos = tt[this.origPtr];
720         this.su_count = 0;
721         this.su_i2 = 0;
722         this.su_ch2 = 256; /* not a char and not EOF */
723 
724         if (this.blockRandomised) {
725             this.su_rNToGo = 0;
726             this.su_rTPos = 0;
727             return setupRandPartA();
728         }
729         return setupNoRandPartA();
730     }
731 
732     private int setupRandPartA() throws IOException {
733         if (this.su_i2 <= this.last) {
734             this.su_chPrev = this.su_ch2;
735             int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
736             this.su_tPos = this.data.tt[this.su_tPos];
737             if (this.su_rNToGo == 0) {
738                 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
739                 if (++this.su_rTPos == 512) {
740                     this.su_rTPos = 0;
741                 }
742             } else {
743                 this.su_rNToGo--;
744             }
745             this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0;
746             this.su_i2++;
747             this.currentState = RAND_PART_B_STATE;
748             this.crc.updateCRC(su_ch2Shadow);
749             return su_ch2Shadow;
750         }
751         endBlock();
752         initBlock();
753         return setupBlock();
754     }
755 
756     private int setupNoRandPartA() throws IOException {
757         if (this.su_i2 <= this.last) {
758             this.su_chPrev = this.su_ch2;
759             final int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
760             this.su_ch2 = su_ch2Shadow;
761             this.su_tPos = this.data.tt[this.su_tPos];
762             this.su_i2++;
763             this.currentState = NO_RAND_PART_B_STATE;
764             this.crc.updateCRC(su_ch2Shadow);
765             return su_ch2Shadow;
766         }
767         this.currentState = NO_RAND_PART_A_STATE;
768         endBlock();
769         initBlock();
770         return setupBlock();
771     }
772 
773     private int setupRandPartB() throws IOException {
774         if (this.su_ch2 != this.su_chPrev) {
775             this.currentState = RAND_PART_A_STATE;
776             this.su_count = 1;
777             return setupRandPartA();
778         } else if (++this.su_count >= 4) {
779             this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
780             this.su_tPos = this.data.tt[this.su_tPos];
781             if (this.su_rNToGo == 0) {
782                 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
783                 if (++this.su_rTPos == 512) {
784                     this.su_rTPos = 0;
785                 }
786             } else {
787                 this.su_rNToGo--;
788             }
789             this.su_j2 = 0;
790             this.currentState = RAND_PART_C_STATE;
791             if (this.su_rNToGo == 1) {
792                 this.su_z ^= 1;
793             }
794             return setupRandPartC();
795         } else {
796             this.currentState = RAND_PART_A_STATE;
797             return setupRandPartA();
798         }
799     }
800 
801     private int setupRandPartC() throws IOException {
802         if (this.su_j2 < this.su_z) {
803             this.crc.updateCRC(this.su_ch2);
804             this.su_j2++;
805             return this.su_ch2;
806         }
807         this.currentState = RAND_PART_A_STATE;
808         this.su_i2++;
809         this.su_count = 0;
810         return setupRandPartA();
811     }
812 
813     private int setupNoRandPartB() throws IOException {
814         if (this.su_ch2 != this.su_chPrev) {
815             this.su_count = 1;
816             return setupNoRandPartA();
817         } else if (++this.su_count >= 4) {
818             this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
819             this.su_tPos = this.data.tt[this.su_tPos];
820             this.su_j2 = 0;
821             return setupNoRandPartC();
822         } else {
823             return setupNoRandPartA();
824         }
825     }
826 
827     private int setupNoRandPartC() throws IOException {
828         if (this.su_j2 < this.su_z) {
829             final int su_ch2Shadow = this.su_ch2;
830             this.crc.updateCRC(su_ch2Shadow);
831             this.su_j2++;
832             this.currentState = NO_RAND_PART_C_STATE;
833             return su_ch2Shadow;
834         }
835         this.su_i2++;
836         this.su_count = 0;
837         return setupNoRandPartA();
838     }
839 
840     private static final class Data {
841 
842         // (with blockSize 900k)
843         final boolean[] inUse = new boolean[256]; // 256 byte
844 
845         final byte[] seqToUnseq = new byte[256]; // 256 byte
846         final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
847         final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
848 
849         /**
850          * Freq table collected to save a pass over the data during
851          * decompression.
852          */
853         final int[] unzftab = new int[256]; // 1024 byte
854 
855         final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
856         final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
857         final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
858         final int[] minLens = new int[N_GROUPS]; // 24 byte
859 
860         final int[] cftab = new int[257]; // 1028 byte
861         final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte
862         final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
863         // byte
864         final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte
865         // ---------------
866         // 60798 byte
867 
868         int[] tt; // 3600000 byte
869         byte[] ll8; // 900000 byte
870 
871         // ---------------
872         // 4560782 byte
873         // ===============
874 
875         Data(final int blockSize100k) {
876             this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE];
877         }
878 
879         /**
880          * Initializes the {@link #tt} array.
881          * 
882          * This method is called when the required length of the array is known.
883          * I don't initialize it at construction time to avoid unneccessary
884          * memory allocation when compressing small files.
885          */
886         int[] initTT(final int length) {
887             int[] ttShadow = this.tt;
888 
889             // tt.length should always be >= length, but theoretically
890             // it can happen, if the compressor mixed small and large
891             // blocks. Normally only the last block will be smaller
892             // than others.
893             if ((ttShadow == null) || (ttShadow.length < length)) {
894                 this.tt = ttShadow = new int[length];
895             }
896 
897             return ttShadow;
898         }
899 
900     }
901 
902     /**
903      * Checks if the signature matches what is expected for a bzip2 file.
904      * 
905      * @param signature
906      *            the bytes to check
907      * @param length
908      *            the number of bytes to check
909      * @return true, if this stream is a bzip2 compressed stream, false otherwise
910      * 
911      * @since 1.1
912      */
913     public static boolean matches(final byte[] signature, final int length) {
914 
915         if (length < 3) {
916             return false;
917         }
918 
919         if (signature[0] != 'B') {
920             return false;
921         }
922 
923         if (signature[1] != 'Z') {
924             return false;
925         }
926 
927         if (signature[2] != 'h') {
928             return false;
929         }
930 
931         return true;
932     }
933 }