View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  /*
21   * This package is based on the work done by Keiron Liddle, Aftex Software
22   * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
23   * great code.
24   */
25  package org.apache.commons.compress.compressors.bzip2;
26  
27  import java.io.IOException;
28  import java.io.InputStream;
29  
30  import org.apache.commons.compress.compressors.CompressorInputStream;
31  
32  /**
33   * An input stream that decompresses from the BZip2 format to be read as any other stream.
34   * 
35   * @NotThreadSafe
36   */
37  public class BZip2CompressorInputStream extends CompressorInputStream implements
38                                                                            BZip2Constants {
39  
40      /**
41       * Index of the last char in the block, so the block size == last + 1.
42       */
43      private int last;
44  
45      /**
46       * Index in zptr[] of original string after sorting.
47       */
48      private int origPtr;
49  
50      /**
51       * always: in the range 0 .. 9. The current block size is 100000 * this
52       * number.
53       */
54      private int blockSize100k;
55  
56      private boolean blockRandomised;
57  
58      private int bsBuff;
59      private int bsLive;
60      private final CRC crc = new CRC();
61  
62      private int nInUse;
63  
64      private InputStream in;
65      private final boolean decompressConcatenated;
66  
67      private static final int EOF = 0;
68      private static final int START_BLOCK_STATE = 1;
69      private static final int RAND_PART_A_STATE = 2;
70      private static final int RAND_PART_B_STATE = 3;
71      private static final int RAND_PART_C_STATE = 4;
72      private static final int NO_RAND_PART_A_STATE = 5;
73      private static final int NO_RAND_PART_B_STATE = 6;
74      private static final int NO_RAND_PART_C_STATE = 7;
75  
76      private int currentState = START_BLOCK_STATE;
77  
78      private int storedBlockCRC, storedCombinedCRC;
79      private int computedBlockCRC, computedCombinedCRC;
80  
81      // Variables used by setup* methods exclusively
82  
83      private int su_count;
84      private int su_ch2;
85      private int su_chPrev;
86      private int su_i2;
87      private int su_j2;
88      private int su_rNToGo;
89      private int su_rTPos;
90      private int su_tPos;
91      private char su_z;
92  
93      /**
94       * All memory intensive stuff. This field is initialized by initBlock().
95       */
96      private BZip2CompressorInputStream.Data data;
97  
98      /**
99       * Constructs a new BZip2CompressorInputStream which decompresses bytes
100      * read from the specified stream. This doesn't suppprt decompressing
101      * concatenated .bz2 files.
102      * 
103      * @throws IOException
104      *             if the stream content is malformed or an I/O error occurs.
105      * @throws NullPointerException
106      *             if {@code in == null}
107      */
108     public BZip2CompressorInputStream(final InputStream in) throws IOException {
109         this(in, false);
110     }
111 
112     /**
113      * Constructs a new BZip2CompressorInputStream which decompresses bytes
114      * read from the specified stream.
115      *
116      * @param in the InputStream from which this object should be created
117      * @param decompressConcatenated
118      *                     if true, decompress until the end of the input;
119      *                     if false, stop after the first .bz2 stream and
120      *                     leave the input position to point to the next
121      *                     byte after the .bz2 stream
122      *
123      * @throws IOException
124      *             if the stream content is malformed or an I/O error occurs.
125      * @throws NullPointerException
126      *             if {@code in == null}
127      */
128     public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
129         this.in = in;
130         this.decompressConcatenated = decompressConcatenated;
131 
132         init(true);
133         initBlock();
134     }
135 
136     @Override
137     public int read() throws IOException {
138         if (this.in != null) {
139             int r = read0();
140             count(r < 0 ? -1 : 1);
141             return r;
142         } else {
143             throw new IOException("stream closed");
144         }
145     }
146 
147     /*
148      * (non-Javadoc)
149      * 
150      * @see java.io.InputStream#read(byte[], int, int)
151      */
152     @Override
153     public int read(final byte[] dest, final int offs, final int len)
154         throws IOException {
155         if (offs < 0) {
156             throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
157         }
158         if (len < 0) {
159             throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
160         }
161         if (offs + len > dest.length) {
162             throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
163                                                 + len + ") > dest.length(" + dest.length + ").");
164         }
165         if (this.in == null) {
166             throw new IOException("stream closed");
167         }
168 
169         final int hi = offs + len;
170         int destOffs = offs;
171         int b;
172         while (destOffs < hi && ((b = read0()) >= 0)) {
173             dest[destOffs++] = (byte) b;
174             count(1);
175         }
176 
177         int c = (destOffs == offs) ? -1 : (destOffs - offs);
178         return c;
179     }
180 
181     private void makeMaps() {
182         final boolean[] inUse = this.data.inUse;
183         final byte[] seqToUnseq = this.data.seqToUnseq;
184 
185         int nInUseShadow = 0;
186 
187         for (int i = 0; i < 256; i++) {
188             if (inUse[i]) {
189                 seqToUnseq[nInUseShadow++] = (byte) i;
190             }
191         }
192 
193         this.nInUse = nInUseShadow;
194     }
195 
196     private int read0() throws IOException {
197         switch (currentState) {
198         case EOF:
199             return -1;
200 
201         case START_BLOCK_STATE:
202             return setupBlock();
203 
204         case RAND_PART_A_STATE:
205             throw new IllegalStateException();
206 
207         case RAND_PART_B_STATE:
208             return setupRandPartB();
209 
210         case RAND_PART_C_STATE:
211             return setupRandPartC();
212 
213         case NO_RAND_PART_A_STATE:
214             throw new IllegalStateException();
215 
216         case NO_RAND_PART_B_STATE:
217             return setupNoRandPartB();
218 
219         case NO_RAND_PART_C_STATE:
220             return setupNoRandPartC();
221 
222         default:
223             throw new IllegalStateException();
224         }
225     }
226 
227     private boolean init(boolean isFirstStream) throws IOException {
228         if (null == in) {
229             throw new IOException("No InputStream");
230         }
231 
232         int magic0 = this.in.read();
233         if (magic0 == -1 && !isFirstStream) {
234             return false;
235         }
236         int magic1 = this.in.read();
237         int magic2 = this.in.read();
238 
239         if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') {
240             throw new IOException(isFirstStream
241                     ? "Stream is not in the BZip2 format"
242                     : "Garbage after a valid BZip2 stream");
243         }
244 
245         int blockSize = this.in.read();
246         if ((blockSize < '1') || (blockSize > '9')) {
247             throw new IOException("BZip2 block size is invalid");
248         }
249 
250         this.blockSize100k = blockSize - '0';
251 
252         this.bsLive = 0;
253         this.computedCombinedCRC = 0;
254 
255         return true;
256     }
257 
258     private void initBlock() throws IOException {
259         char magic0;
260         char magic1;
261         char magic2;
262         char magic3;
263         char magic4;
264         char magic5;
265 
266         while (true) {
267             // Get the block magic bytes.
268             magic0 = bsGetUByte();
269             magic1 = bsGetUByte();
270             magic2 = bsGetUByte();
271             magic3 = bsGetUByte();
272             magic4 = bsGetUByte();
273             magic5 = bsGetUByte();
274 
275             // If isn't end of stream magic, break out of the loop.
276             if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45
277                     || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) {
278                 break;
279             }
280 
281             // End of stream was reached. Check the combined CRC and
282             // advance to the next .bz2 stream if decoding concatenated
283             // streams.
284             if (complete()) {
285                 return;
286             }
287         }
288 
289         if (magic0 != 0x31 || // '1'
290             magic1 != 0x41 || // ')'
291             magic2 != 0x59 || // 'Y'
292             magic3 != 0x26 || // '&'
293             magic4 != 0x53 || // 'S'
294             magic5 != 0x59 // 'Y'
295             ) {
296             this.currentState = EOF;
297             throw new IOException("bad block header");
298         } else {
299             this.storedBlockCRC = bsGetInt();
300             this.blockRandomised = bsR(1) == 1;
301 
302             /**
303              * Allocate data here instead in constructor, so we do not allocate
304              * it if the input file is empty.
305              */
306             if (this.data == null) {
307                 this.data = new Data(this.blockSize100k);
308             }
309 
310             // currBlockNo++;
311             getAndMoveToFrontDecode();
312 
313             this.crc.initialiseCRC();
314             this.currentState = START_BLOCK_STATE;
315         }
316     }
317 
318     private void endBlock() throws IOException {
319         this.computedBlockCRC = this.crc.getFinalCRC();
320 
321         // A bad CRC is considered a fatal error.
322         if (this.storedBlockCRC != this.computedBlockCRC) {
323             // make next blocks readable without error
324             // (repair feature, not yet documented, not tested)
325             this.computedCombinedCRC = (this.storedCombinedCRC << 1)
326                 | (this.storedCombinedCRC >>> 31);
327             this.computedCombinedCRC ^= this.storedBlockCRC;
328 
329             throw new IOException("BZip2 CRC error");
330         }
331 
332         this.computedCombinedCRC = (this.computedCombinedCRC << 1)
333             | (this.computedCombinedCRC >>> 31);
334         this.computedCombinedCRC ^= this.computedBlockCRC;
335     }
336 
337     private boolean complete() throws IOException {
338         this.storedCombinedCRC = bsGetInt();
339         this.currentState = EOF;
340         this.data = null;
341 
342         if (this.storedCombinedCRC != this.computedCombinedCRC) {
343             throw new IOException("BZip2 CRC error");
344         }
345 
346         // Look for the next .bz2 stream if decompressing
347         // concatenated files.
348         return !decompressConcatenated || !init(false);
349     }
350 
351     @Override
352     public void close() throws IOException {
353         InputStream inShadow = this.in;
354         if (inShadow != null) {
355             try {
356                 if (inShadow != System.in) {
357                     inShadow.close();
358                 }
359             } finally {
360                 this.data = null;
361                 this.in = null;
362             }
363         }
364     }
365 
366     private int bsR(final int n) throws IOException {
367         int bsLiveShadow = this.bsLive;
368         int bsBuffShadow = this.bsBuff;
369 
370         if (bsLiveShadow < n) {
371             final InputStream inShadow = this.in;
372             do {
373                 int thech = inShadow.read();
374 
375                 if (thech < 0) {
376                     throw new IOException("unexpected end of stream");
377                 }
378 
379                 bsBuffShadow = (bsBuffShadow << 8) | thech;
380                 bsLiveShadow += 8;
381             } while (bsLiveShadow < n);
382 
383             this.bsBuff = bsBuffShadow;
384         }
385 
386         this.bsLive = bsLiveShadow - n;
387         return (bsBuffShadow >> (bsLiveShadow - n)) & ((1 << n) - 1);
388     }
389 
390     private boolean bsGetBit() throws IOException {
391         int bsLiveShadow = this.bsLive;
392         int bsBuffShadow = this.bsBuff;
393 
394         if (bsLiveShadow < 1) {
395             int thech = this.in.read();
396 
397             if (thech < 0) {
398                 throw new IOException("unexpected end of stream");
399             }
400 
401             bsBuffShadow = (bsBuffShadow << 8) | thech;
402             bsLiveShadow += 8;
403             this.bsBuff = bsBuffShadow;
404         }
405 
406         this.bsLive = bsLiveShadow - 1;
407         return ((bsBuffShadow >> (bsLiveShadow - 1)) & 1) != 0;
408     }
409 
410     private char bsGetUByte() throws IOException {
411         return (char) bsR(8);
412     }
413 
414     private int bsGetInt() throws IOException {
415         return (((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8);
416     }
417 
418     /**
419      * Called by createHuffmanDecodingTables() exclusively.
420      */
421     private static void hbCreateDecodeTables(final int[] limit,
422                                              final int[] base, final int[] perm, final char[] length,
423                                              final int minLen, final int maxLen, final int alphaSize) {
424         for (int i = minLen, pp = 0; i <= maxLen; i++) {
425             for (int j = 0; j < alphaSize; j++) {
426                 if (length[j] == i) {
427                     perm[pp++] = j;
428                 }
429             }
430         }
431 
432         for (int i = MAX_CODE_LEN; --i > 0;) {
433             base[i] = 0;
434             limit[i] = 0;
435         }
436 
437         for (int i = 0; i < alphaSize; i++) {
438             base[length[i] + 1]++;
439         }
440 
441         for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
442             b += base[i];
443             base[i] = b;
444         }
445 
446         for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
447             final int nb = base[i + 1];
448             vec += nb - b;
449             b = nb;
450             limit[i] = vec - 1;
451             vec <<= 1;
452         }
453 
454         for (int i = minLen + 1; i <= maxLen; i++) {
455             base[i] = ((limit[i - 1] + 1) << 1) - base[i];
456         }
457     }
458 
459     private void recvDecodingTables() throws IOException {
460         final Data dataShadow = this.data;
461         final boolean[] inUse = dataShadow.inUse;
462         final byte[] pos = dataShadow.recvDecodingTables_pos;
463         final byte[] selector = dataShadow.selector;
464         final byte[] selectorMtf = dataShadow.selectorMtf;
465 
466         int inUse16 = 0;
467 
468         /* Receive the mapping table */
469         for (int i = 0; i < 16; i++) {
470             if (bsGetBit()) {
471                 inUse16 |= 1 << i;
472             }
473         }
474 
475         for (int i = 256; --i >= 0;) {
476             inUse[i] = false;
477         }
478 
479         for (int i = 0; i < 16; i++) {
480             if ((inUse16 & (1 << i)) != 0) {
481                 final int i16 = i << 4;
482                 for (int j = 0; j < 16; j++) {
483                     if (bsGetBit()) {
484                         inUse[i16 + j] = true;
485                     }
486                 }
487             }
488         }
489 
490         makeMaps();
491         final int alphaSize = this.nInUse + 2;
492 
493         /* Now the selectors */
494         final int nGroups = bsR(3);
495         final int nSelectors = bsR(15);
496 
497         for (int i = 0; i < nSelectors; i++) {
498             int j = 0;
499             while (bsGetBit()) {
500                 j++;
501             }
502             selectorMtf[i] = (byte) j;
503         }
504 
505         /* Undo the MTF values for the selectors. */
506         for (int v = nGroups; --v >= 0;) {
507             pos[v] = (byte) v;
508         }
509 
510         for (int i = 0; i < nSelectors; i++) {
511             int v = selectorMtf[i] & 0xff;
512             final byte tmp = pos[v];
513             while (v > 0) {
514                 // nearly all times v is zero, 4 in most other cases
515                 pos[v] = pos[v - 1];
516                 v--;
517             }
518             pos[0] = tmp;
519             selector[i] = tmp;
520         }
521 
522         final char[][] len = dataShadow.temp_charArray2d;
523 
524         /* Now the coding tables */
525         for (int t = 0; t < nGroups; t++) {
526             int curr = bsR(5);
527             final char[] len_t = len[t];
528             for (int i = 0; i < alphaSize; i++) {
529                 while (bsGetBit()) {
530                     curr += bsGetBit() ? -1 : 1;
531                 }
532                 len_t[i] = (char) curr;
533             }
534         }
535 
536         // finally create the Huffman tables
537         createHuffmanDecodingTables(alphaSize, nGroups);
538     }
539 
540     /**
541      * Called by recvDecodingTables() exclusively.
542      */
543     private void createHuffmanDecodingTables(final int alphaSize,
544                                              final int nGroups) {
545         final Data dataShadow = this.data;
546         final char[][] len = dataShadow.temp_charArray2d;
547         final int[] minLens = dataShadow.minLens;
548         final int[][] limit = dataShadow.limit;
549         final int[][] base = dataShadow.base;
550         final int[][] perm = dataShadow.perm;
551 
552         for (int t = 0; t < nGroups; t++) {
553             int minLen = 32;
554             int maxLen = 0;
555             final char[] len_t = len[t];
556             for (int i = alphaSize; --i >= 0;) {
557                 final char lent = len_t[i];
558                 if (lent > maxLen) {
559                     maxLen = lent;
560                 }
561                 if (lent < minLen) {
562                     minLen = lent;
563                 }
564             }
565             hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
566                                  maxLen, alphaSize);
567             minLens[t] = minLen;
568         }
569     }
570 
571     private void getAndMoveToFrontDecode() throws IOException {
572         this.origPtr = bsR(24);
573         recvDecodingTables();
574 
575         final InputStream inShadow = this.in;
576         final Data dataShadow = this.data;
577         final byte[] ll8 = dataShadow.ll8;
578         final int[] unzftab = dataShadow.unzftab;
579         final byte[] selector = dataShadow.selector;
580         final byte[] seqToUnseq = dataShadow.seqToUnseq;
581         final char[] yy = dataShadow.getAndMoveToFrontDecode_yy;
582         final int[] minLens = dataShadow.minLens;
583         final int[][] limit = dataShadow.limit;
584         final int[][] base = dataShadow.base;
585         final int[][] perm = dataShadow.perm;
586         final int limitLast = this.blockSize100k * 100000;
587 
588         /*
589          * Setting up the unzftab entries here is not strictly necessary, but it
590          * does save having to do it later in a separate pass, and so saves a
591          * block's worth of cache misses.
592          */
593         for (int i = 256; --i >= 0;) {
594             yy[i] = (char) i;
595             unzftab[i] = 0;
596         }
597 
598         int groupNo = 0;
599         int groupPos = G_SIZE - 1;
600         final int eob = this.nInUse + 1;
601         int nextSym = getAndMoveToFrontDecode0(0);
602         int bsBuffShadow = this.bsBuff;
603         int bsLiveShadow = this.bsLive;
604         int lastShadow = -1;
605         int zt = selector[groupNo] & 0xff;
606         int[] base_zt = base[zt];
607         int[] limit_zt = limit[zt];
608         int[] perm_zt = perm[zt];
609         int minLens_zt = minLens[zt];
610 
611         while (nextSym != eob) {
612             if ((nextSym == RUNA) || (nextSym == RUNB)) {
613                 int s = -1;
614 
615                 for (int n = 1; true; n <<= 1) {
616                     if (nextSym == RUNA) {
617                         s += n;
618                     } else if (nextSym == RUNB) {
619                         s += n << 1;
620                     } else {
621                         break;
622                     }
623 
624                     if (groupPos == 0) {
625                         groupPos = G_SIZE - 1;
626                         zt = selector[++groupNo] & 0xff;
627                         base_zt = base[zt];
628                         limit_zt = limit[zt];
629                         perm_zt = perm[zt];
630                         minLens_zt = minLens[zt];
631                     } else {
632                         groupPos--;
633                     }
634 
635                     int zn = minLens_zt;
636 
637                     // Inlined:
638                     // int zvec = bsR(zn);
639                     while (bsLiveShadow < zn) {
640                         final int thech = inShadow.read();
641                         if (thech >= 0) {
642                             bsBuffShadow = (bsBuffShadow << 8) | thech;
643                             bsLiveShadow += 8;
644                             continue;
645                         } else {
646                             throw new IOException("unexpected end of stream");
647                         }
648                     }
649                     int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
650                         & ((1 << zn) - 1);
651                     bsLiveShadow -= zn;
652 
653                     while (zvec > limit_zt[zn]) {
654                         zn++;
655                         while (bsLiveShadow < 1) {
656                             final int thech = inShadow.read();
657                             if (thech >= 0) {
658                                 bsBuffShadow = (bsBuffShadow << 8) | thech;
659                                 bsLiveShadow += 8;
660                                 continue;
661                             } else {
662                                 throw new IOException(
663                                                       "unexpected end of stream");
664                             }
665                         }
666                         bsLiveShadow--;
667                         zvec = (zvec << 1)
668                             | ((bsBuffShadow >> bsLiveShadow) & 1);
669                     }
670                     nextSym = perm_zt[zvec - base_zt[zn]];
671                 }
672 
673                 final byte ch = seqToUnseq[yy[0]];
674                 unzftab[ch & 0xff] += s + 1;
675 
676                 while (s-- >= 0) {
677                     ll8[++lastShadow] = ch;
678                 }
679 
680                 if (lastShadow >= limitLast) {
681                     throw new IOException("block overrun");
682                 }
683             } else {
684                 if (++lastShadow >= limitLast) {
685                     throw new IOException("block overrun");
686                 }
687 
688                 final char tmp = yy[nextSym - 1];
689                 unzftab[seqToUnseq[tmp] & 0xff]++;
690                 ll8[lastShadow] = seqToUnseq[tmp];
691 
692                 /*
693                  * This loop is hammered during decompression, hence avoid
694                  * native method call overhead of System.arraycopy for very
695                  * small ranges to copy.
696                  */
697                 if (nextSym <= 16) {
698                     for (int j = nextSym - 1; j > 0;) {
699                         yy[j] = yy[--j];
700                     }
701                 } else {
702                     System.arraycopy(yy, 0, yy, 1, nextSym - 1);
703                 }
704 
705                 yy[0] = tmp;
706 
707                 if (groupPos == 0) {
708                     groupPos = G_SIZE - 1;
709                     zt = selector[++groupNo] & 0xff;
710                     base_zt = base[zt];
711                     limit_zt = limit[zt];
712                     perm_zt = perm[zt];
713                     minLens_zt = minLens[zt];
714                 } else {
715                     groupPos--;
716                 }
717 
718                 int zn = minLens_zt;
719 
720                 // Inlined:
721                 // int zvec = bsR(zn);
722                 while (bsLiveShadow < zn) {
723                     final int thech = inShadow.read();
724                     if (thech >= 0) {
725                         bsBuffShadow = (bsBuffShadow << 8) | thech;
726                         bsLiveShadow += 8;
727                         continue;
728                     } else {
729                         throw new IOException("unexpected end of stream");
730                     }
731                 }
732                 int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
733                     & ((1 << zn) - 1);
734                 bsLiveShadow -= zn;
735 
736                 while (zvec > limit_zt[zn]) {
737                     zn++;
738                     while (bsLiveShadow < 1) {
739                         final int thech = inShadow.read();
740                         if (thech >= 0) {
741                             bsBuffShadow = (bsBuffShadow << 8) | thech;
742                             bsLiveShadow += 8;
743                             continue;
744                         } else {
745                             throw new IOException("unexpected end of stream");
746                         }
747                     }
748                     bsLiveShadow--;
749                     zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
750                 }
751                 nextSym = perm_zt[zvec - base_zt[zn]];
752             }
753         }
754 
755         this.last = lastShadow;
756         this.bsLive = bsLiveShadow;
757         this.bsBuff = bsBuffShadow;
758     }
759 
760     private int getAndMoveToFrontDecode0(final int groupNo) throws IOException {
761         final InputStream inShadow = this.in;
762         final Data dataShadow = this.data;
763         final int zt = dataShadow.selector[groupNo] & 0xff;
764         final int[] limit_zt = dataShadow.limit[zt];
765         int zn = dataShadow.minLens[zt];
766         int zvec = bsR(zn);
767         int bsLiveShadow = this.bsLive;
768         int bsBuffShadow = this.bsBuff;
769 
770         while (zvec > limit_zt[zn]) {
771             zn++;
772             while (bsLiveShadow < 1) {
773                 final int thech = inShadow.read();
774 
775                 if (thech >= 0) {
776                     bsBuffShadow = (bsBuffShadow << 8) | thech;
777                     bsLiveShadow += 8;
778                     continue;
779                 } else {
780                     throw new IOException("unexpected end of stream");
781                 }
782             }
783             bsLiveShadow--;
784             zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
785         }
786 
787         this.bsLive = bsLiveShadow;
788         this.bsBuff = bsBuffShadow;
789 
790         return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]];
791     }
792 
793     private int setupBlock() throws IOException {
794         if (currentState == EOF || this.data == null) {
795             return -1;
796         }
797 
798         final int[] cftab = this.data.cftab;
799         final int[] tt = this.data.initTT(this.last + 1);
800         final byte[] ll8 = this.data.ll8;
801         cftab[0] = 0;
802         System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);
803 
804         for (int i = 1, c = cftab[0]; i <= 256; i++) {
805             c += cftab[i];
806             cftab[i] = c;
807         }
808 
809         for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
810             tt[cftab[ll8[i] & 0xff]++] = i;
811         }
812 
813         if ((this.origPtr < 0) || (this.origPtr >= tt.length)) {
814             throw new IOException("stream corrupted");
815         }
816 
817         this.su_tPos = tt[this.origPtr];
818         this.su_count = 0;
819         this.su_i2 = 0;
820         this.su_ch2 = 256; /* not a char and not EOF */
821 
822         if (this.blockRandomised) {
823             this.su_rNToGo = 0;
824             this.su_rTPos = 0;
825             return setupRandPartA();
826         }
827         return setupNoRandPartA();
828     }
829 
830     private int setupRandPartA() throws IOException {
831         if (this.su_i2 <= this.last) {
832             this.su_chPrev = this.su_ch2;
833             int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
834             this.su_tPos = this.data.tt[this.su_tPos];
835             if (this.su_rNToGo == 0) {
836                 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
837                 if (++this.su_rTPos == 512) {
838                     this.su_rTPos = 0;
839                 }
840             } else {
841                 this.su_rNToGo--;
842             }
843             this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0;
844             this.su_i2++;
845             this.currentState = RAND_PART_B_STATE;
846             this.crc.updateCRC(su_ch2Shadow);
847             return su_ch2Shadow;
848         } else {
849             endBlock();
850             initBlock();
851             return setupBlock();
852         }
853     }
854 
855     private int setupNoRandPartA() throws IOException {
856         if (this.su_i2 <= this.last) {
857             this.su_chPrev = this.su_ch2;
858             int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
859             this.su_ch2 = su_ch2Shadow;
860             this.su_tPos = this.data.tt[this.su_tPos];
861             this.su_i2++;
862             this.currentState = NO_RAND_PART_B_STATE;
863             this.crc.updateCRC(su_ch2Shadow);
864             return su_ch2Shadow;
865         } else {
866             this.currentState = NO_RAND_PART_A_STATE;
867             endBlock();
868             initBlock();
869             return setupBlock();
870         }
871     }
872 
873     private int setupRandPartB() throws IOException {
874         if (this.su_ch2 != this.su_chPrev) {
875             this.currentState = RAND_PART_A_STATE;
876             this.su_count = 1;
877             return setupRandPartA();
878         } else if (++this.su_count >= 4) {
879             this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
880             this.su_tPos = this.data.tt[this.su_tPos];
881             if (this.su_rNToGo == 0) {
882                 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
883                 if (++this.su_rTPos == 512) {
884                     this.su_rTPos = 0;
885                 }
886             } else {
887                 this.su_rNToGo--;
888             }
889             this.su_j2 = 0;
890             this.currentState = RAND_PART_C_STATE;
891             if (this.su_rNToGo == 1) {
892                 this.su_z ^= 1;
893             }
894             return setupRandPartC();
895         } else {
896             this.currentState = RAND_PART_A_STATE;
897             return setupRandPartA();
898         }
899     }
900 
901     private int setupRandPartC() throws IOException {
902         if (this.su_j2 < this.su_z) {
903             this.crc.updateCRC(this.su_ch2);
904             this.su_j2++;
905             return this.su_ch2;
906         } else {
907             this.currentState = RAND_PART_A_STATE;
908             this.su_i2++;
909             this.su_count = 0;
910             return setupRandPartA();
911         }
912     }
913 
914     private int setupNoRandPartB() throws IOException {
915         if (this.su_ch2 != this.su_chPrev) {
916             this.su_count = 1;
917             return setupNoRandPartA();
918         } else if (++this.su_count >= 4) {
919             this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
920             this.su_tPos = this.data.tt[this.su_tPos];
921             this.su_j2 = 0;
922             return setupNoRandPartC();
923         } else {
924             return setupNoRandPartA();
925         }
926     }
927 
928     private int setupNoRandPartC() throws IOException {
929         if (this.su_j2 < this.su_z) {
930             int su_ch2Shadow = this.su_ch2;
931             this.crc.updateCRC(su_ch2Shadow);
932             this.su_j2++;
933             this.currentState = NO_RAND_PART_C_STATE;
934             return su_ch2Shadow;
935         } else {
936             this.su_i2++;
937             this.su_count = 0;
938             return setupNoRandPartA();
939         }
940     }
941 
942     private static final class Data extends Object {
943 
944         // (with blockSize 900k)
945         final boolean[] inUse = new boolean[256]; // 256 byte
946 
947         final byte[] seqToUnseq = new byte[256]; // 256 byte
948         final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
949         final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
950 
951         /**
952          * Freq table collected to save a pass over the data during
953          * decompression.
954          */
955         final int[] unzftab = new int[256]; // 1024 byte
956 
957         final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
958         final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
959         final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
960         final int[] minLens = new int[N_GROUPS]; // 24 byte
961 
962         final int[] cftab = new int[257]; // 1028 byte
963         final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte
964         final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
965         // byte
966         final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte
967         // ---------------
968         // 60798 byte
969 
970         int[] tt; // 3600000 byte
971         byte[] ll8; // 900000 byte
972 
973         // ---------------
974         // 4560782 byte
975         // ===============
976 
977         Data(int blockSize100k) {
978             this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE];
979         }
980 
981         /**
982          * Initializes the {@link #tt} array.
983          * 
984          * This method is called when the required length of the array is known.
985          * I don't initialize it at construction time to avoid unneccessary
986          * memory allocation when compressing small files.
987          */
988         int[] initTT(int length) {
989             int[] ttShadow = this.tt;
990 
991             // tt.length should always be >= length, but theoretically
992             // it can happen, if the compressor mixed small and large
993             // blocks. Normally only the last block will be smaller
994             // than others.
995             if ((ttShadow == null) || (ttShadow.length < length)) {
996                 this.tt = ttShadow = new int[length];
997             }
998 
999             return ttShadow;
1000         }
1001 
1002     }
1003 
1004     /**
1005      * Checks if the signature matches what is expected for a bzip2 file.
1006      * 
1007      * @param signature
1008      *            the bytes to check
1009      * @param length
1010      *            the number of bytes to check
1011      * @return true, if this stream is a bzip2 compressed stream, false otherwise
1012      * 
1013      * @since 1.1
1014      */
1015     public static boolean matches(byte[] signature, int length) {
1016 
1017         if (length < 3) {
1018             return false;
1019         }
1020 
1021         if (signature[0] != 'B') {
1022             return false;
1023         }
1024 
1025         if (signature[1] != 'Z') {
1026             return false;
1027         }
1028 
1029         if (signature[2] != 'h') {
1030             return false;
1031         }
1032 
1033         return true;
1034     }
1035 }