View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  /*
21   * This package is based on the work done by Keiron Liddle, Aftex Software
22   * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
23   * great code.
24   */
25  package org.apache.commons.compress.compressors.bzip2;
26  
27  import java.io.IOException;
28  import java.io.InputStream;
29  
30  import org.apache.commons.compress.compressors.CompressorInputStream;
31  
32  /**
33   * An input stream that decompresses from the BZip2 format to be read as any other stream.
34   * 
35   * @NotThreadSafe
36   */
37  public class BZip2CompressorInputStream extends CompressorInputStream implements
38                                                                            BZip2Constants {
39  
40      /**
41       * Index of the last char in the block, so the block size == last + 1.
42       */
43      private int last;
44  
45      /**
46       * Index in zptr[] of original string after sorting.
47       */
48      private int origPtr;
49  
50      /**
51       * always: in the range 0 .. 9. The current block size is 100000 * this
52       * number.
53       */
54      private int blockSize100k;
55  
56      private boolean blockRandomised;
57  
58      private int bsBuff;
59      private int bsLive;
60      private final CRC crc = new CRC();
61  
62      private int nInUse;
63  
64      private InputStream in;
65      private final boolean decompressConcatenated;
66  
67      private static final int EOF = 0;
68      private static final int START_BLOCK_STATE = 1;
69      private static final int RAND_PART_A_STATE = 2;
70      private static final int RAND_PART_B_STATE = 3;
71      private static final int RAND_PART_C_STATE = 4;
72      private static final int NO_RAND_PART_A_STATE = 5;
73      private static final int NO_RAND_PART_B_STATE = 6;
74      private static final int NO_RAND_PART_C_STATE = 7;
75  
76      private int currentState = START_BLOCK_STATE;
77  
78      private int storedBlockCRC, storedCombinedCRC;
79      private int computedBlockCRC, computedCombinedCRC;
80  
81      // Variables used by setup* methods exclusively
82  
83      private int su_count;
84      private int su_ch2;
85      private int su_chPrev;
86      private int su_i2;
87      private int su_j2;
88      private int su_rNToGo;
89      private int su_rTPos;
90      private int su_tPos;
91      private char su_z;
92  
93      /**
94       * All memory intensive stuff. This field is initialized by initBlock().
95       */
96      private BZip2CompressorInputStream.Data data;
97  
98      /**
99       * Constructs a new BZip2CompressorInputStream which decompresses bytes
100      * read from the specified stream. This doesn't suppprt decompressing
101      * concatenated .bz2 files.
102      * 
103      * @param in the InputStream from which this object should be created
104      * @throws IOException
105      *             if the stream content is malformed or an I/O error occurs.
106      * @throws NullPointerException
107      *             if {@code in == null}
108      */
109     public BZip2CompressorInputStream(final InputStream in) throws IOException {
110         this(in, false);
111     }
112 
113     /**
114      * Constructs a new BZip2CompressorInputStream which decompresses bytes
115      * read from the specified stream.
116      *
117      * @param in the InputStream from which this object should be created
118      * @param decompressConcatenated
119      *                     if true, decompress until the end of the input;
120      *                     if false, stop after the first .bz2 stream and
121      *                     leave the input position to point to the next
122      *                     byte after the .bz2 stream
123      *
124      * @throws IOException
125      *             if the stream content is malformed or an I/O error occurs.
126      * @throws NullPointerException
127      *             if {@code in == null}
128      */
129     public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
130         this.in = in;
131         this.decompressConcatenated = decompressConcatenated;
132 
133         init(true);
134         initBlock();
135     }
136 
137     @Override
138     public int read() throws IOException {
139         if (this.in != null) {
140             int r = read0();
141             count(r < 0 ? -1 : 1);
142             return r;
143         } else {
144             throw new IOException("stream closed");
145         }
146     }
147 
148     /*
149      * (non-Javadoc)
150      * 
151      * @see java.io.InputStream#read(byte[], int, int)
152      */
153     @Override
154     public int read(final byte[] dest, final int offs, final int len)
155         throws IOException {
156         if (offs < 0) {
157             throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
158         }
159         if (len < 0) {
160             throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
161         }
162         if (offs + len > dest.length) {
163             throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
164                                                 + len + ") > dest.length(" + dest.length + ").");
165         }
166         if (this.in == null) {
167             throw new IOException("stream closed");
168         }
169         if (len == 0) {
170             return 0;
171         }
172 
173         final int hi = offs + len;
174         int destOffs = offs;
175         int b;
176         while (destOffs < hi && ((b = read0()) >= 0)) {
177             dest[destOffs++] = (byte) b;
178             count(1);
179         }
180 
181         int c = (destOffs == offs) ? -1 : (destOffs - offs);
182         return c;
183     }
184 
185     private void makeMaps() {
186         final boolean[] inUse = this.data.inUse;
187         final byte[] seqToUnseq = this.data.seqToUnseq;
188 
189         int nInUseShadow = 0;
190 
191         for (int i = 0; i < 256; i++) {
192             if (inUse[i]) {
193                 seqToUnseq[nInUseShadow++] = (byte) i;
194             }
195         }
196 
197         this.nInUse = nInUseShadow;
198     }
199 
200     private int read0() throws IOException {
201         switch (currentState) {
202         case EOF:
203             return -1;
204 
205         case START_BLOCK_STATE:
206             return setupBlock();
207 
208         case RAND_PART_A_STATE:
209             throw new IllegalStateException();
210 
211         case RAND_PART_B_STATE:
212             return setupRandPartB();
213 
214         case RAND_PART_C_STATE:
215             return setupRandPartC();
216 
217         case NO_RAND_PART_A_STATE:
218             throw new IllegalStateException();
219 
220         case NO_RAND_PART_B_STATE:
221             return setupNoRandPartB();
222 
223         case NO_RAND_PART_C_STATE:
224             return setupNoRandPartC();
225 
226         default:
227             throw new IllegalStateException();
228         }
229     }
230 
231     private boolean init(boolean isFirstStream) throws IOException {
232         if (null == in) {
233             throw new IOException("No InputStream");
234         }
235 
236         int magic0 = this.in.read();
237         if (magic0 == -1 && !isFirstStream) {
238             return false;
239         }
240         int magic1 = this.in.read();
241         int magic2 = this.in.read();
242 
243         if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') {
244             throw new IOException(isFirstStream
245                     ? "Stream is not in the BZip2 format"
246                     : "Garbage after a valid BZip2 stream");
247         }
248 
249         int blockSize = this.in.read();
250         if ((blockSize < '1') || (blockSize > '9')) {
251             throw new IOException("BZip2 block size is invalid");
252         }
253 
254         this.blockSize100k = blockSize - '0';
255 
256         this.bsLive = 0;
257         this.computedCombinedCRC = 0;
258 
259         return true;
260     }
261 
262     private void initBlock() throws IOException {
263         char magic0;
264         char magic1;
265         char magic2;
266         char magic3;
267         char magic4;
268         char magic5;
269 
270         while (true) {
271             // Get the block magic bytes.
272             magic0 = bsGetUByte();
273             magic1 = bsGetUByte();
274             magic2 = bsGetUByte();
275             magic3 = bsGetUByte();
276             magic4 = bsGetUByte();
277             magic5 = bsGetUByte();
278 
279             // If isn't end of stream magic, break out of the loop.
280             if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45
281                     || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) {
282                 break;
283             }
284 
285             // End of stream was reached. Check the combined CRC and
286             // advance to the next .bz2 stream if decoding concatenated
287             // streams.
288             if (complete()) {
289                 return;
290             }
291         }
292 
293         if (magic0 != 0x31 || // '1'
294             magic1 != 0x41 || // ')'
295             magic2 != 0x59 || // 'Y'
296             magic3 != 0x26 || // '&'
297             magic4 != 0x53 || // 'S'
298             magic5 != 0x59 // 'Y'
299             ) {
300             this.currentState = EOF;
301             throw new IOException("bad block header");
302         } else {
303             this.storedBlockCRC = bsGetInt();
304             this.blockRandomised = bsR(1) == 1;
305 
306             /**
307              * Allocate data here instead in constructor, so we do not allocate
308              * it if the input file is empty.
309              */
310             if (this.data == null) {
311                 this.data = new Data(this.blockSize100k);
312             }
313 
314             // currBlockNo++;
315             getAndMoveToFrontDecode();
316 
317             this.crc.initialiseCRC();
318             this.currentState = START_BLOCK_STATE;
319         }
320     }
321 
322     private void endBlock() throws IOException {
323         this.computedBlockCRC = this.crc.getFinalCRC();
324 
325         // A bad CRC is considered a fatal error.
326         if (this.storedBlockCRC != this.computedBlockCRC) {
327             // make next blocks readable without error
328             // (repair feature, not yet documented, not tested)
329             this.computedCombinedCRC = (this.storedCombinedCRC << 1)
330                 | (this.storedCombinedCRC >>> 31);
331             this.computedCombinedCRC ^= this.storedBlockCRC;
332 
333             throw new IOException("BZip2 CRC error");
334         }
335 
336         this.computedCombinedCRC = (this.computedCombinedCRC << 1)
337             | (this.computedCombinedCRC >>> 31);
338         this.computedCombinedCRC ^= this.computedBlockCRC;
339     }
340 
341     private boolean complete() throws IOException {
342         this.storedCombinedCRC = bsGetInt();
343         this.currentState = EOF;
344         this.data = null;
345 
346         if (this.storedCombinedCRC != this.computedCombinedCRC) {
347             throw new IOException("BZip2 CRC error");
348         }
349 
350         // Look for the next .bz2 stream if decompressing
351         // concatenated files.
352         return !decompressConcatenated || !init(false);
353     }
354 
355     @Override
356     public void close() throws IOException {
357         InputStream inShadow = this.in;
358         if (inShadow != null) {
359             try {
360                 if (inShadow != System.in) {
361                     inShadow.close();
362                 }
363             } finally {
364                 this.data = null;
365                 this.in = null;
366             }
367         }
368     }
369 
370     private int bsR(final int n) throws IOException {
371         int bsLiveShadow = this.bsLive;
372         int bsBuffShadow = this.bsBuff;
373 
374         if (bsLiveShadow < n) {
375             final InputStream inShadow = this.in;
376             do {
377                 int thech = inShadow.read();
378 
379                 if (thech < 0) {
380                     throw new IOException("unexpected end of stream");
381                 }
382 
383                 bsBuffShadow = (bsBuffShadow << 8) | thech;
384                 bsLiveShadow += 8;
385             } while (bsLiveShadow < n);
386 
387             this.bsBuff = bsBuffShadow;
388         }
389 
390         this.bsLive = bsLiveShadow - n;
391         return (bsBuffShadow >> (bsLiveShadow - n)) & ((1 << n) - 1);
392     }
393 
394     private boolean bsGetBit() throws IOException {
395         return bsR(1) != 0;
396     }
397 
398     private char bsGetUByte() throws IOException {
399         return (char) bsR(8);
400     }
401 
402     private int bsGetInt() throws IOException {
403         return (((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8);
404     }
405 
406     /**
407      * Called by createHuffmanDecodingTables() exclusively.
408      */
409     private static void hbCreateDecodeTables(final int[] limit,
410                                              final int[] base, final int[] perm, final char[] length,
411                                              final int minLen, final int maxLen, final int alphaSize) {
412         for (int i = minLen, pp = 0; i <= maxLen; i++) {
413             for (int j = 0; j < alphaSize; j++) {
414                 if (length[j] == i) {
415                     perm[pp++] = j;
416                 }
417             }
418         }
419 
420         for (int i = MAX_CODE_LEN; --i > 0;) {
421             base[i] = 0;
422             limit[i] = 0;
423         }
424 
425         for (int i = 0; i < alphaSize; i++) {
426             base[length[i] + 1]++;
427         }
428 
429         for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
430             b += base[i];
431             base[i] = b;
432         }
433 
434         for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
435             final int nb = base[i + 1];
436             vec += nb - b;
437             b = nb;
438             limit[i] = vec - 1;
439             vec <<= 1;
440         }
441 
442         for (int i = minLen + 1; i <= maxLen; i++) {
443             base[i] = ((limit[i - 1] + 1) << 1) - base[i];
444         }
445     }
446 
447     private void recvDecodingTables() throws IOException {
448         final Data dataShadow = this.data;
449         final boolean[] inUse = dataShadow.inUse;
450         final byte[] pos = dataShadow.recvDecodingTables_pos;
451         final byte[] selector = dataShadow.selector;
452         final byte[] selectorMtf = dataShadow.selectorMtf;
453 
454         int inUse16 = 0;
455 
456         /* Receive the mapping table */
457         for (int i = 0; i < 16; i++) {
458             if (bsGetBit()) {
459                 inUse16 |= 1 << i;
460             }
461         }
462 
463         for (int i = 256; --i >= 0;) {
464             inUse[i] = false;
465         }
466 
467         for (int i = 0; i < 16; i++) {
468             if ((inUse16 & (1 << i)) != 0) {
469                 final int i16 = i << 4;
470                 for (int j = 0; j < 16; j++) {
471                     if (bsGetBit()) {
472                         inUse[i16 + j] = true;
473                     }
474                 }
475             }
476         }
477 
478         makeMaps();
479         final int alphaSize = this.nInUse + 2;
480 
481         /* Now the selectors */
482         final int nGroups = bsR(3);
483         final int nSelectors = bsR(15);
484 
485         for (int i = 0; i < nSelectors; i++) {
486             int j = 0;
487             while (bsGetBit()) {
488                 j++;
489             }
490             selectorMtf[i] = (byte) j;
491         }
492 
493         /* Undo the MTF values for the selectors. */
494         for (int v = nGroups; --v >= 0;) {
495             pos[v] = (byte) v;
496         }
497 
498         for (int i = 0; i < nSelectors; i++) {
499             int v = selectorMtf[i] & 0xff;
500             final byte tmp = pos[v];
501             while (v > 0) {
502                 // nearly all times v is zero, 4 in most other cases
503                 pos[v] = pos[v - 1];
504                 v--;
505             }
506             pos[0] = tmp;
507             selector[i] = tmp;
508         }
509 
510         final char[][] len = dataShadow.temp_charArray2d;
511 
512         /* Now the coding tables */
513         for (int t = 0; t < nGroups; t++) {
514             int curr = bsR(5);
515             final char[] len_t = len[t];
516             for (int i = 0; i < alphaSize; i++) {
517                 while (bsGetBit()) {
518                     curr += bsGetBit() ? -1 : 1;
519                 }
520                 len_t[i] = (char) curr;
521             }
522         }
523 
524         // finally create the Huffman tables
525         createHuffmanDecodingTables(alphaSize, nGroups);
526     }
527 
528     /**
529      * Called by recvDecodingTables() exclusively.
530      */
531     private void createHuffmanDecodingTables(final int alphaSize,
532                                              final int nGroups) {
533         final Data dataShadow = this.data;
534         final char[][] len = dataShadow.temp_charArray2d;
535         final int[] minLens = dataShadow.minLens;
536         final int[][] limit = dataShadow.limit;
537         final int[][] base = dataShadow.base;
538         final int[][] perm = dataShadow.perm;
539 
540         for (int t = 0; t < nGroups; t++) {
541             int minLen = 32;
542             int maxLen = 0;
543             final char[] len_t = len[t];
544             for (int i = alphaSize; --i >= 0;) {
545                 final char lent = len_t[i];
546                 if (lent > maxLen) {
547                     maxLen = lent;
548                 }
549                 if (lent < minLen) {
550                     minLen = lent;
551                 }
552             }
553             hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
554                                  maxLen, alphaSize);
555             minLens[t] = minLen;
556         }
557     }
558 
559     private void getAndMoveToFrontDecode() throws IOException {
560         this.origPtr = bsR(24);
561         recvDecodingTables();
562 
563         final InputStream inShadow = this.in;
564         final Data dataShadow = this.data;
565         final byte[] ll8 = dataShadow.ll8;
566         final int[] unzftab = dataShadow.unzftab;
567         final byte[] selector = dataShadow.selector;
568         final byte[] seqToUnseq = dataShadow.seqToUnseq;
569         final char[] yy = dataShadow.getAndMoveToFrontDecode_yy;
570         final int[] minLens = dataShadow.minLens;
571         final int[][] limit = dataShadow.limit;
572         final int[][] base = dataShadow.base;
573         final int[][] perm = dataShadow.perm;
574         final int limitLast = this.blockSize100k * 100000;
575 
576         /*
577          * Setting up the unzftab entries here is not strictly necessary, but it
578          * does save having to do it later in a separate pass, and so saves a
579          * block's worth of cache misses.
580          */
581         for (int i = 256; --i >= 0;) {
582             yy[i] = (char) i;
583             unzftab[i] = 0;
584         }
585 
586         int groupNo = 0;
587         int groupPos = G_SIZE - 1;
588         final int eob = this.nInUse + 1;
589         int nextSym = getAndMoveToFrontDecode0(0);
590         int bsBuffShadow = this.bsBuff;
591         int bsLiveShadow = this.bsLive;
592         int lastShadow = -1;
593         int zt = selector[groupNo] & 0xff;
594         int[] base_zt = base[zt];
595         int[] limit_zt = limit[zt];
596         int[] perm_zt = perm[zt];
597         int minLens_zt = minLens[zt];
598 
599         while (nextSym != eob) {
600             if ((nextSym == RUNA) || (nextSym == RUNB)) {
601                 int s = -1;
602 
603                 for (int n = 1; true; n <<= 1) {
604                     if (nextSym == RUNA) {
605                         s += n;
606                     } else if (nextSym == RUNB) {
607                         s += n << 1;
608                     } else {
609                         break;
610                     }
611 
612                     if (groupPos == 0) {
613                         groupPos = G_SIZE - 1;
614                         zt = selector[++groupNo] & 0xff;
615                         base_zt = base[zt];
616                         limit_zt = limit[zt];
617                         perm_zt = perm[zt];
618                         minLens_zt = minLens[zt];
619                     } else {
620                         groupPos--;
621                     }
622 
623                     int zn = minLens_zt;
624 
625                     // Inlined:
626                     // int zvec = bsR(zn);
627                     while (bsLiveShadow < zn) {
628                         final int thech = inShadow.read();
629                         if (thech >= 0) {
630                             bsBuffShadow = (bsBuffShadow << 8) | thech;
631                             bsLiveShadow += 8;
632                             continue;
633                         } else {
634                             throw new IOException("unexpected end of stream");
635                         }
636                     }
637                     int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
638                         & ((1 << zn) - 1);
639                     bsLiveShadow -= zn;
640 
641                     while (zvec > limit_zt[zn]) {
642                         zn++;
643                         while (bsLiveShadow < 1) {
644                             final int thech = inShadow.read();
645                             if (thech >= 0) {
646                                 bsBuffShadow = (bsBuffShadow << 8) | thech;
647                                 bsLiveShadow += 8;
648                                 continue;
649                             } else {
650                                 throw new IOException(
651                                                       "unexpected end of stream");
652                             }
653                         }
654                         bsLiveShadow--;
655                         zvec = (zvec << 1)
656                             | ((bsBuffShadow >> bsLiveShadow) & 1);
657                     }
658                     nextSym = perm_zt[zvec - base_zt[zn]];
659                 }
660 
661                 final byte ch = seqToUnseq[yy[0]];
662                 unzftab[ch & 0xff] += s + 1;
663 
664                 while (s-- >= 0) {
665                     ll8[++lastShadow] = ch;
666                 }
667 
668                 if (lastShadow >= limitLast) {
669                     throw new IOException("block overrun");
670                 }
671             } else {
672                 if (++lastShadow >= limitLast) {
673                     throw new IOException("block overrun");
674                 }
675 
676                 final char tmp = yy[nextSym - 1];
677                 unzftab[seqToUnseq[tmp] & 0xff]++;
678                 ll8[lastShadow] = seqToUnseq[tmp];
679 
680                 /*
681                  * This loop is hammered during decompression, hence avoid
682                  * native method call overhead of System.arraycopy for very
683                  * small ranges to copy.
684                  */
685                 if (nextSym <= 16) {
686                     for (int j = nextSym - 1; j > 0;) {
687                         yy[j] = yy[--j];
688                     }
689                 } else {
690                     System.arraycopy(yy, 0, yy, 1, nextSym - 1);
691                 }
692 
693                 yy[0] = tmp;
694 
695                 if (groupPos == 0) {
696                     groupPos = G_SIZE - 1;
697                     zt = selector[++groupNo] & 0xff;
698                     base_zt = base[zt];
699                     limit_zt = limit[zt];
700                     perm_zt = perm[zt];
701                     minLens_zt = minLens[zt];
702                 } else {
703                     groupPos--;
704                 }
705 
706                 int zn = minLens_zt;
707 
708                 // Inlined:
709                 // int zvec = bsR(zn);
710                 while (bsLiveShadow < zn) {
711                     final int thech = inShadow.read();
712                     if (thech >= 0) {
713                         bsBuffShadow = (bsBuffShadow << 8) | thech;
714                         bsLiveShadow += 8;
715                         continue;
716                     } else {
717                         throw new IOException("unexpected end of stream");
718                     }
719                 }
720                 int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
721                     & ((1 << zn) - 1);
722                 bsLiveShadow -= zn;
723 
724                 while (zvec > limit_zt[zn]) {
725                     zn++;
726                     while (bsLiveShadow < 1) {
727                         final int thech = inShadow.read();
728                         if (thech >= 0) {
729                             bsBuffShadow = (bsBuffShadow << 8) | thech;
730                             bsLiveShadow += 8;
731                             continue;
732                         } else {
733                             throw new IOException("unexpected end of stream");
734                         }
735                     }
736                     bsLiveShadow--;
737                     zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
738                 }
739                 nextSym = perm_zt[zvec - base_zt[zn]];
740             }
741         }
742 
743         this.last = lastShadow;
744         this.bsLive = bsLiveShadow;
745         this.bsBuff = bsBuffShadow;
746     }
747 
748     private int getAndMoveToFrontDecode0(final int groupNo) throws IOException {
749         final InputStream inShadow = this.in;
750         final Data dataShadow = this.data;
751         final int zt = dataShadow.selector[groupNo] & 0xff;
752         final int[] limit_zt = dataShadow.limit[zt];
753         int zn = dataShadow.minLens[zt];
754         int zvec = bsR(zn);
755         int bsLiveShadow = this.bsLive;
756         int bsBuffShadow = this.bsBuff;
757 
758         while (zvec > limit_zt[zn]) {
759             zn++;
760             while (bsLiveShadow < 1) {
761                 final int thech = inShadow.read();
762 
763                 if (thech >= 0) {
764                     bsBuffShadow = (bsBuffShadow << 8) | thech;
765                     bsLiveShadow += 8;
766                     continue;
767                 } else {
768                     throw new IOException("unexpected end of stream");
769                 }
770             }
771             bsLiveShadow--;
772             zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
773         }
774 
775         this.bsLive = bsLiveShadow;
776         this.bsBuff = bsBuffShadow;
777 
778         return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]];
779     }
780 
781     private int setupBlock() throws IOException {
782         if (currentState == EOF || this.data == null) {
783             return -1;
784         }
785 
786         final int[] cftab = this.data.cftab;
787         final int[] tt = this.data.initTT(this.last + 1);
788         final byte[] ll8 = this.data.ll8;
789         cftab[0] = 0;
790         System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);
791 
792         for (int i = 1, c = cftab[0]; i <= 256; i++) {
793             c += cftab[i];
794             cftab[i] = c;
795         }
796 
797         for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
798             tt[cftab[ll8[i] & 0xff]++] = i;
799         }
800 
801         if ((this.origPtr < 0) || (this.origPtr >= tt.length)) {
802             throw new IOException("stream corrupted");
803         }
804 
805         this.su_tPos = tt[this.origPtr];
806         this.su_count = 0;
807         this.su_i2 = 0;
808         this.su_ch2 = 256; /* not a char and not EOF */
809 
810         if (this.blockRandomised) {
811             this.su_rNToGo = 0;
812             this.su_rTPos = 0;
813             return setupRandPartA();
814         }
815         return setupNoRandPartA();
816     }
817 
818     private int setupRandPartA() throws IOException {
819         if (this.su_i2 <= this.last) {
820             this.su_chPrev = this.su_ch2;
821             int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
822             this.su_tPos = this.data.tt[this.su_tPos];
823             if (this.su_rNToGo == 0) {
824                 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
825                 if (++this.su_rTPos == 512) {
826                     this.su_rTPos = 0;
827                 }
828             } else {
829                 this.su_rNToGo--;
830             }
831             this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0;
832             this.su_i2++;
833             this.currentState = RAND_PART_B_STATE;
834             this.crc.updateCRC(su_ch2Shadow);
835             return su_ch2Shadow;
836         } else {
837             endBlock();
838             initBlock();
839             return setupBlock();
840         }
841     }
842 
843     private int setupNoRandPartA() throws IOException {
844         if (this.su_i2 <= this.last) {
845             this.su_chPrev = this.su_ch2;
846             int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
847             this.su_ch2 = su_ch2Shadow;
848             this.su_tPos = this.data.tt[this.su_tPos];
849             this.su_i2++;
850             this.currentState = NO_RAND_PART_B_STATE;
851             this.crc.updateCRC(su_ch2Shadow);
852             return su_ch2Shadow;
853         } else {
854             this.currentState = NO_RAND_PART_A_STATE;
855             endBlock();
856             initBlock();
857             return setupBlock();
858         }
859     }
860 
861     private int setupRandPartB() throws IOException {
862         if (this.su_ch2 != this.su_chPrev) {
863             this.currentState = RAND_PART_A_STATE;
864             this.su_count = 1;
865             return setupRandPartA();
866         } else if (++this.su_count >= 4) {
867             this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
868             this.su_tPos = this.data.tt[this.su_tPos];
869             if (this.su_rNToGo == 0) {
870                 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
871                 if (++this.su_rTPos == 512) {
872                     this.su_rTPos = 0;
873                 }
874             } else {
875                 this.su_rNToGo--;
876             }
877             this.su_j2 = 0;
878             this.currentState = RAND_PART_C_STATE;
879             if (this.su_rNToGo == 1) {
880                 this.su_z ^= 1;
881             }
882             return setupRandPartC();
883         } else {
884             this.currentState = RAND_PART_A_STATE;
885             return setupRandPartA();
886         }
887     }
888 
889     private int setupRandPartC() throws IOException {
890         if (this.su_j2 < this.su_z) {
891             this.crc.updateCRC(this.su_ch2);
892             this.su_j2++;
893             return this.su_ch2;
894         } else {
895             this.currentState = RAND_PART_A_STATE;
896             this.su_i2++;
897             this.su_count = 0;
898             return setupRandPartA();
899         }
900     }
901 
902     private int setupNoRandPartB() throws IOException {
903         if (this.su_ch2 != this.su_chPrev) {
904             this.su_count = 1;
905             return setupNoRandPartA();
906         } else if (++this.su_count >= 4) {
907             this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
908             this.su_tPos = this.data.tt[this.su_tPos];
909             this.su_j2 = 0;
910             return setupNoRandPartC();
911         } else {
912             return setupNoRandPartA();
913         }
914     }
915 
916     private int setupNoRandPartC() throws IOException {
917         if (this.su_j2 < this.su_z) {
918             int su_ch2Shadow = this.su_ch2;
919             this.crc.updateCRC(su_ch2Shadow);
920             this.su_j2++;
921             this.currentState = NO_RAND_PART_C_STATE;
922             return su_ch2Shadow;
923         } else {
924             this.su_i2++;
925             this.su_count = 0;
926             return setupNoRandPartA();
927         }
928     }
929 
930     private static final class Data {
931 
932         // (with blockSize 900k)
933         final boolean[] inUse = new boolean[256]; // 256 byte
934 
935         final byte[] seqToUnseq = new byte[256]; // 256 byte
936         final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
937         final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
938 
939         /**
940          * Freq table collected to save a pass over the data during
941          * decompression.
942          */
943         final int[] unzftab = new int[256]; // 1024 byte
944 
945         final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
946         final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
947         final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
948         final int[] minLens = new int[N_GROUPS]; // 24 byte
949 
950         final int[] cftab = new int[257]; // 1028 byte
951         final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte
952         final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
953         // byte
954         final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte
955         // ---------------
956         // 60798 byte
957 
958         int[] tt; // 3600000 byte
959         byte[] ll8; // 900000 byte
960 
961         // ---------------
962         // 4560782 byte
963         // ===============
964 
965         Data(int blockSize100k) {
966             this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE];
967         }
968 
969         /**
970          * Initializes the {@link #tt} array.
971          * 
972          * This method is called when the required length of the array is known.
973          * I don't initialize it at construction time to avoid unneccessary
974          * memory allocation when compressing small files.
975          */
976         int[] initTT(int length) {
977             int[] ttShadow = this.tt;
978 
979             // tt.length should always be >= length, but theoretically
980             // it can happen, if the compressor mixed small and large
981             // blocks. Normally only the last block will be smaller
982             // than others.
983             if ((ttShadow == null) || (ttShadow.length < length)) {
984                 this.tt = ttShadow = new int[length];
985             }
986 
987             return ttShadow;
988         }
989 
990     }
991 
992     /**
993      * Checks if the signature matches what is expected for a bzip2 file.
994      * 
995      * @param signature
996      *            the bytes to check
997      * @param length
998      *            the number of bytes to check
999      * @return true, if this stream is a bzip2 compressed stream, false otherwise
1000      * 
1001      * @since 1.1
1002      */
1003     public static boolean matches(byte[] signature, int length) {
1004 
1005         if (length < 3) {
1006             return false;
1007         }
1008 
1009         if (signature[0] != 'B') {
1010             return false;
1011         }
1012 
1013         if (signature[1] != 'Z') {
1014             return false;
1015         }
1016 
1017         if (signature[2] != 'h') {
1018             return false;
1019         }
1020 
1021         return true;
1022     }
1023 }