View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  /*
21   * This package is based on the work done by Keiron Liddle, Aftex Software
22   * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
23   * great code.
24   */
25  package org.apache.commons.compress.compressors.bzip2;
26  
27  import java.io.IOException;
28  import java.io.InputStream;
29  
30  import org.apache.commons.compress.compressors.CompressorInputStream;
31  
32  /**
33   * An input stream that decompresses from the BZip2 format to be read as any other stream.
34   * 
35   * @NotThreadSafe
36   */
37  public class BZip2CompressorInputStream extends CompressorInputStream implements
38                                                                            BZip2Constants {
39  
40      /**
41       * Index of the last char in the block, so the block size == last + 1.
42       */
43      private int last;
44  
45      /**
46       * Index in zptr[] of original string after sorting.
47       */
48      private int origPtr;
49  
50      /**
51       * always: in the range 0 .. 9. The current block size is 100000 * this
52       * number.
53       */
54      private int blockSize100k;
55  
56      private boolean blockRandomised;
57  
58      private int bsBuff;
59      private int bsLive;
60      private final CRC crc = new CRC();
61  
62      private int nInUse;
63  
64      private InputStream in;
65      private final boolean decompressConcatenated;
66  
67      private static final int EOF = 0;
68      private static final int START_BLOCK_STATE = 1;
69      private static final int RAND_PART_A_STATE = 2;
70      private static final int RAND_PART_B_STATE = 3;
71      private static final int RAND_PART_C_STATE = 4;
72      private static final int NO_RAND_PART_A_STATE = 5;
73      private static final int NO_RAND_PART_B_STATE = 6;
74      private static final int NO_RAND_PART_C_STATE = 7;
75  
76      private int currentState = START_BLOCK_STATE;
77  
78      private int storedBlockCRC, storedCombinedCRC;
79      private int computedBlockCRC, computedCombinedCRC;
80  
81      // Variables used by setup* methods exclusively
82  
83      private int su_count;
84      private int su_ch2;
85      private int su_chPrev;
86      private int su_i2;
87      private int su_j2;
88      private int su_rNToGo;
89      private int su_rTPos;
90      private int su_tPos;
91      private char su_z;
92  
93      /**
94       * All memory intensive stuff. This field is initialized by initBlock().
95       */
96      private BZip2CompressorInputStream.Data data;
97  
98      /**
99       * Constructs a new BZip2CompressorInputStream which decompresses bytes
100      * read from the specified stream. This doesn't suppprt decompressing
101      * concatenated .bz2 files.
102      * 
103      * @param in the InputStream from which this object should be created
104      * @throws IOException
105      *             if the stream content is malformed or an I/O error occurs.
106      * @throws NullPointerException
107      *             if {@code in == null}
108      */
109     public BZip2CompressorInputStream(final InputStream in) throws IOException {
110         this(in, false);
111     }
112 
113     /**
114      * Constructs a new BZip2CompressorInputStream which decompresses bytes
115      * read from the specified stream.
116      *
117      * @param in the InputStream from which this object should be created
118      * @param decompressConcatenated
119      *                     if true, decompress until the end of the input;
120      *                     if false, stop after the first .bz2 stream and
121      *                     leave the input position to point to the next
122      *                     byte after the .bz2 stream
123      *
124      * @throws IOException
125      *             if {@code in == null}, the stream content is malformed, or an I/O error occurs.
126      */
127     public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
128         this.in = in;
129         this.decompressConcatenated = decompressConcatenated;
130 
131         init(true);
132         initBlock();
133     }
134 
135     @Override
136     public int read() throws IOException {
137         if (this.in != null) {
138             final int r = read0();
139             count(r < 0 ? -1 : 1);
140             return r;
141         }
142         throw new IOException("stream closed");
143     }
144 
145     /*
146      * (non-Javadoc)
147      * 
148      * @see java.io.InputStream#read(byte[], int, int)
149      */
150     @Override
151     public int read(final byte[] dest, final int offs, final int len)
152         throws IOException {
153         if (offs < 0) {
154             throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
155         }
156         if (len < 0) {
157             throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
158         }
159         if (offs + len > dest.length) {
160             throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
161                                                 + len + ") > dest.length(" + dest.length + ").");
162         }
163         if (this.in == null) {
164             throw new IOException("stream closed");
165         }
166         if (len == 0) {
167             return 0;
168         }
169 
170         final int hi = offs + len;
171         int destOffs = offs;
172         int b;
173         while (destOffs < hi && ((b = read0()) >= 0)) {
174             dest[destOffs++] = (byte) b;
175             count(1);
176         }
177 
178         final int c = (destOffs == offs) ? -1 : (destOffs - offs);
179         return c;
180     }
181 
182     private void makeMaps() {
183         final boolean[] inUse = this.data.inUse;
184         final byte[] seqToUnseq = this.data.seqToUnseq;
185 
186         int nInUseShadow = 0;
187 
188         for (int i = 0; i < 256; i++) {
189             if (inUse[i]) {
190                 seqToUnseq[nInUseShadow++] = (byte) i;
191             }
192         }
193 
194         this.nInUse = nInUseShadow;
195     }
196 
197     private int read0() throws IOException {
198         switch (currentState) {
199         case EOF:
200             return -1;
201 
202         case START_BLOCK_STATE:
203             return setupBlock();
204 
205         case RAND_PART_A_STATE:
206             throw new IllegalStateException();
207 
208         case RAND_PART_B_STATE:
209             return setupRandPartB();
210 
211         case RAND_PART_C_STATE:
212             return setupRandPartC();
213 
214         case NO_RAND_PART_A_STATE:
215             throw new IllegalStateException();
216 
217         case NO_RAND_PART_B_STATE:
218             return setupNoRandPartB();
219 
220         case NO_RAND_PART_C_STATE:
221             return setupNoRandPartC();
222 
223         default:
224             throw new IllegalStateException();
225         }
226     }
227 
228     private boolean init(final boolean isFirstStream) throws IOException {
229         if (null == in) {
230             throw new IOException("No InputStream");
231         }
232 
233         final int magic0 = this.in.read();
234         if (magic0 == -1 && !isFirstStream) {
235             return false;
236         }
237         final int magic1 = this.in.read();
238         final int magic2 = this.in.read();
239 
240         if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') {
241             throw new IOException(isFirstStream
242                     ? "Stream is not in the BZip2 format"
243                     : "Garbage after a valid BZip2 stream");
244         }
245 
246         final int blockSize = this.in.read();
247         if ((blockSize < '1') || (blockSize > '9')) {
248             throw new IOException("BZip2 block size is invalid");
249         }
250 
251         this.blockSize100k = blockSize - '0';
252 
253         this.bsLive = 0;
254         this.computedCombinedCRC = 0;
255 
256         return true;
257     }
258 
259     private void initBlock() throws IOException {
260         char magic0;
261         char magic1;
262         char magic2;
263         char magic3;
264         char magic4;
265         char magic5;
266 
267         while (true) {
268             // Get the block magic bytes.
269             magic0 = bsGetUByte();
270             magic1 = bsGetUByte();
271             magic2 = bsGetUByte();
272             magic3 = bsGetUByte();
273             magic4 = bsGetUByte();
274             magic5 = bsGetUByte();
275 
276             // If isn't end of stream magic, break out of the loop.
277             if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45
278                     || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) {
279                 break;
280             }
281 
282             // End of stream was reached. Check the combined CRC and
283             // advance to the next .bz2 stream if decoding concatenated
284             // streams.
285             if (complete()) {
286                 return;
287             }
288         }
289 
290         if (magic0 != 0x31 || // '1'
291             magic1 != 0x41 || // ')'
292             magic2 != 0x59 || // 'Y'
293             magic3 != 0x26 || // '&'
294             magic4 != 0x53 || // 'S'
295             magic5 != 0x59 // 'Y'
296             ) {
297             this.currentState = EOF;
298             throw new IOException("bad block header");
299         }
300         this.storedBlockCRC = bsGetInt();
301         this.blockRandomised = bsR(1) == 1;
302 
303         /**
304          * Allocate data here instead in constructor, so we do not allocate
305          * it if the input file is empty.
306          */
307         if (this.data == null) {
308             this.data = new Data(this.blockSize100k);
309         }
310 
311         // currBlockNo++;
312         getAndMoveToFrontDecode();
313 
314         this.crc.initialiseCRC();
315         this.currentState = START_BLOCK_STATE;
316     }
317 
318     private void endBlock() throws IOException {
319         this.computedBlockCRC = this.crc.getFinalCRC();
320 
321         // A bad CRC is considered a fatal error.
322         if (this.storedBlockCRC != this.computedBlockCRC) {
323             // make next blocks readable without error
324             // (repair feature, not yet documented, not tested)
325             this.computedCombinedCRC = (this.storedCombinedCRC << 1)
326                 | (this.storedCombinedCRC >>> 31);
327             this.computedCombinedCRC ^= this.storedBlockCRC;
328 
329             throw new IOException("BZip2 CRC error");
330         }
331 
332         this.computedCombinedCRC = (this.computedCombinedCRC << 1)
333             | (this.computedCombinedCRC >>> 31);
334         this.computedCombinedCRC ^= this.computedBlockCRC;
335     }
336 
337     private boolean complete() throws IOException {
338         this.storedCombinedCRC = bsGetInt();
339         this.currentState = EOF;
340         this.data = null;
341 
342         if (this.storedCombinedCRC != this.computedCombinedCRC) {
343             throw new IOException("BZip2 CRC error");
344         }
345 
346         // Look for the next .bz2 stream if decompressing
347         // concatenated files.
348         return !decompressConcatenated || !init(false);
349     }
350 
351     @Override
352     public void close() throws IOException {
353         final InputStream inShadow = this.in;
354         if (inShadow != null) {
355             try {
356                 if (inShadow != System.in) {
357                     inShadow.close();
358                 }
359             } finally {
360                 this.data = null;
361                 this.in = null;
362             }
363         }
364     }
365 
366     private int bsR(final int n) throws IOException {
367         int bsLiveShadow = this.bsLive;
368         int bsBuffShadow = this.bsBuff;
369 
370         if (bsLiveShadow < n) {
371             final InputStream inShadow = this.in;
372             do {
373                 final int thech = inShadow.read();
374 
375                 if (thech < 0) {
376                     throw new IOException("unexpected end of stream");
377                 }
378 
379                 bsBuffShadow = (bsBuffShadow << 8) | thech;
380                 bsLiveShadow += 8;
381             } while (bsLiveShadow < n);
382 
383             this.bsBuff = bsBuffShadow;
384         }
385 
386         this.bsLive = bsLiveShadow - n;
387         return (bsBuffShadow >> (bsLiveShadow - n)) & ((1 << n) - 1);
388     }
389 
390     private boolean bsGetBit() throws IOException {
391         return bsR(1) != 0;
392     }
393 
394     private char bsGetUByte() throws IOException {
395         return (char) bsR(8);
396     }
397 
398     private int bsGetInt() throws IOException {
399         return (((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8);
400     }
401 
402     /**
403      * Called by createHuffmanDecodingTables() exclusively.
404      */
405     private static void hbCreateDecodeTables(final int[] limit,
406                                              final int[] base, final int[] perm, final char[] length,
407                                              final int minLen, final int maxLen, final int alphaSize) {
408         for (int i = minLen, pp = 0; i <= maxLen; i++) {
409             for (int j = 0; j < alphaSize; j++) {
410                 if (length[j] == i) {
411                     perm[pp++] = j;
412                 }
413             }
414         }
415 
416         for (int i = MAX_CODE_LEN; --i > 0;) {
417             base[i] = 0;
418             limit[i] = 0;
419         }
420 
421         for (int i = 0; i < alphaSize; i++) {
422             base[length[i] + 1]++;
423         }
424 
425         for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
426             b += base[i];
427             base[i] = b;
428         }
429 
430         for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
431             final int nb = base[i + 1];
432             vec += nb - b;
433             b = nb;
434             limit[i] = vec - 1;
435             vec <<= 1;
436         }
437 
438         for (int i = minLen + 1; i <= maxLen; i++) {
439             base[i] = ((limit[i - 1] + 1) << 1) - base[i];
440         }
441     }
442 
443     private void recvDecodingTables() throws IOException {
444         final Data dataShadow = this.data;
445         final boolean[] inUse = dataShadow.inUse;
446         final byte[] pos = dataShadow.recvDecodingTables_pos;
447         final byte[] selector = dataShadow.selector;
448         final byte[] selectorMtf = dataShadow.selectorMtf;
449 
450         int inUse16 = 0;
451 
452         /* Receive the mapping table */
453         for (int i = 0; i < 16; i++) {
454             if (bsGetBit()) {
455                 inUse16 |= 1 << i;
456             }
457         }
458 
459         for (int i = 256; --i >= 0;) {
460             inUse[i] = false;
461         }
462 
463         for (int i = 0; i < 16; i++) {
464             if ((inUse16 & (1 << i)) != 0) {
465                 final int i16 = i << 4;
466                 for (int j = 0; j < 16; j++) {
467                     if (bsGetBit()) {
468                         inUse[i16 + j] = true;
469                     }
470                 }
471             }
472         }
473 
474         makeMaps();
475         final int alphaSize = this.nInUse + 2;
476 
477         /* Now the selectors */
478         final int nGroups = bsR(3);
479         final int nSelectors = bsR(15);
480 
481         for (int i = 0; i < nSelectors; i++) {
482             int j = 0;
483             while (bsGetBit()) {
484                 j++;
485             }
486             selectorMtf[i] = (byte) j;
487         }
488 
489         /* Undo the MTF values for the selectors. */
490         for (int v = nGroups; --v >= 0;) {
491             pos[v] = (byte) v;
492         }
493 
494         for (int i = 0; i < nSelectors; i++) {
495             int v = selectorMtf[i] & 0xff;
496             final byte tmp = pos[v];
497             while (v > 0) {
498                 // nearly all times v is zero, 4 in most other cases
499                 pos[v] = pos[v - 1];
500                 v--;
501             }
502             pos[0] = tmp;
503             selector[i] = tmp;
504         }
505 
506         final char[][] len = dataShadow.temp_charArray2d;
507 
508         /* Now the coding tables */
509         for (int t = 0; t < nGroups; t++) {
510             int curr = bsR(5);
511             final char[] len_t = len[t];
512             for (int i = 0; i < alphaSize; i++) {
513                 while (bsGetBit()) {
514                     curr += bsGetBit() ? -1 : 1;
515                 }
516                 len_t[i] = (char) curr;
517             }
518         }
519 
520         // finally create the Huffman tables
521         createHuffmanDecodingTables(alphaSize, nGroups);
522     }
523 
524     /**
525      * Called by recvDecodingTables() exclusively.
526      */
527     private void createHuffmanDecodingTables(final int alphaSize,
528                                              final int nGroups) {
529         final Data dataShadow = this.data;
530         final char[][] len = dataShadow.temp_charArray2d;
531         final int[] minLens = dataShadow.minLens;
532         final int[][] limit = dataShadow.limit;
533         final int[][] base = dataShadow.base;
534         final int[][] perm = dataShadow.perm;
535 
536         for (int t = 0; t < nGroups; t++) {
537             int minLen = 32;
538             int maxLen = 0;
539             final char[] len_t = len[t];
540             for (int i = alphaSize; --i >= 0;) {
541                 final char lent = len_t[i];
542                 if (lent > maxLen) {
543                     maxLen = lent;
544                 }
545                 if (lent < minLen) {
546                     minLen = lent;
547                 }
548             }
549             hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
550                                  maxLen, alphaSize);
551             minLens[t] = minLen;
552         }
553     }
554 
555     private void getAndMoveToFrontDecode() throws IOException {
556         this.origPtr = bsR(24);
557         recvDecodingTables();
558 
559         final InputStream inShadow = this.in;
560         final Data dataShadow = this.data;
561         final byte[] ll8 = dataShadow.ll8;
562         final int[] unzftab = dataShadow.unzftab;
563         final byte[] selector = dataShadow.selector;
564         final byte[] seqToUnseq = dataShadow.seqToUnseq;
565         final char[] yy = dataShadow.getAndMoveToFrontDecode_yy;
566         final int[] minLens = dataShadow.minLens;
567         final int[][] limit = dataShadow.limit;
568         final int[][] base = dataShadow.base;
569         final int[][] perm = dataShadow.perm;
570         final int limitLast = this.blockSize100k * 100000;
571 
572         /*
573          * Setting up the unzftab entries here is not strictly necessary, but it
574          * does save having to do it later in a separate pass, and so saves a
575          * block's worth of cache misses.
576          */
577         for (int i = 256; --i >= 0;) {
578             yy[i] = (char) i;
579             unzftab[i] = 0;
580         }
581 
582         int groupNo = 0;
583         int groupPos = G_SIZE - 1;
584         final int eob = this.nInUse + 1;
585         int nextSym = getAndMoveToFrontDecode0(0);
586         int bsBuffShadow = this.bsBuff;
587         int bsLiveShadow = this.bsLive;
588         int lastShadow = -1;
589         int zt = selector[groupNo] & 0xff;
590         int[] base_zt = base[zt];
591         int[] limit_zt = limit[zt];
592         int[] perm_zt = perm[zt];
593         int minLens_zt = minLens[zt];
594 
595         while (nextSym != eob) {
596             if ((nextSym == RUNA) || (nextSym == RUNB)) {
597                 int s = -1;
598 
599                 for (int n = 1; true; n <<= 1) {
600                     if (nextSym == RUNA) {
601                         s += n;
602                     } else if (nextSym == RUNB) {
603                         s += n << 1;
604                     } else {
605                         break;
606                     }
607 
608                     if (groupPos == 0) {
609                         groupPos = G_SIZE - 1;
610                         zt = selector[++groupNo] & 0xff;
611                         base_zt = base[zt];
612                         limit_zt = limit[zt];
613                         perm_zt = perm[zt];
614                         minLens_zt = minLens[zt];
615                     } else {
616                         groupPos--;
617                     }
618 
619                     int zn = minLens_zt;
620 
621                     // Inlined:
622                     // int zvec = bsR(zn);
623                     while (bsLiveShadow < zn) {
624                         final int thech = inShadow.read();
625                         if (thech >= 0) {
626                             bsBuffShadow = (bsBuffShadow << 8) | thech;
627                             bsLiveShadow += 8;
628                             continue;
629                         }
630                         throw new IOException("unexpected end of stream");
631                     }
632                     int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
633                         & ((1 << zn) - 1);
634                     bsLiveShadow -= zn;
635 
636                     while (zvec > limit_zt[zn]) {
637                         zn++;
638                         while (bsLiveShadow < 1) {
639                             final int thech = inShadow.read();
640                             if (thech >= 0) {
641                                 bsBuffShadow = (bsBuffShadow << 8) | thech;
642                                 bsLiveShadow += 8;
643                                 continue;
644                             }
645                             throw new IOException(
646                                                   "unexpected end of stream");
647                         }
648                         bsLiveShadow--;
649                         zvec = (zvec << 1)
650                             | ((bsBuffShadow >> bsLiveShadow) & 1);
651                     }
652                     nextSym = perm_zt[zvec - base_zt[zn]];
653                 }
654 
655                 final byte ch = seqToUnseq[yy[0]];
656                 unzftab[ch & 0xff] += s + 1;
657 
658                 while (s-- >= 0) {
659                     ll8[++lastShadow] = ch;
660                 }
661 
662                 if (lastShadow >= limitLast) {
663                     throw new IOException("block overrun");
664                 }
665             } else {
666                 if (++lastShadow >= limitLast) {
667                     throw new IOException("block overrun");
668                 }
669 
670                 final char tmp = yy[nextSym - 1];
671                 unzftab[seqToUnseq[tmp] & 0xff]++;
672                 ll8[lastShadow] = seqToUnseq[tmp];
673 
674                 /*
675                  * This loop is hammered during decompression, hence avoid
676                  * native method call overhead of System.arraycopy for very
677                  * small ranges to copy.
678                  */
679                 if (nextSym <= 16) {
680                     for (int j = nextSym - 1; j > 0;) {
681                         yy[j] = yy[--j];
682                     }
683                 } else {
684                     System.arraycopy(yy, 0, yy, 1, nextSym - 1);
685                 }
686 
687                 yy[0] = tmp;
688 
689                 if (groupPos == 0) {
690                     groupPos = G_SIZE - 1;
691                     zt = selector[++groupNo] & 0xff;
692                     base_zt = base[zt];
693                     limit_zt = limit[zt];
694                     perm_zt = perm[zt];
695                     minLens_zt = minLens[zt];
696                 } else {
697                     groupPos--;
698                 }
699 
700                 int zn = minLens_zt;
701 
702                 // Inlined:
703                 // int zvec = bsR(zn);
704                 while (bsLiveShadow < zn) {
705                     final int thech = inShadow.read();
706                     if (thech >= 0) {
707                         bsBuffShadow = (bsBuffShadow << 8) | thech;
708                         bsLiveShadow += 8;
709                         continue;
710                     }
711                     throw new IOException("unexpected end of stream");
712                 }
713                 int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
714                     & ((1 << zn) - 1);
715                 bsLiveShadow -= zn;
716 
717                 while (zvec > limit_zt[zn]) {
718                     zn++;
719                     while (bsLiveShadow < 1) {
720                         final int thech = inShadow.read();
721                         if (thech >= 0) {
722                             bsBuffShadow = (bsBuffShadow << 8) | thech;
723                             bsLiveShadow += 8;
724                             continue;
725                         }
726                         throw new IOException("unexpected end of stream");
727                     }
728                     bsLiveShadow--;
729                     zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
730                 }
731                 nextSym = perm_zt[zvec - base_zt[zn]];
732             }
733         }
734 
735         this.last = lastShadow;
736         this.bsLive = bsLiveShadow;
737         this.bsBuff = bsBuffShadow;
738     }
739 
740     private int getAndMoveToFrontDecode0(final int groupNo) throws IOException {
741         final InputStream inShadow = this.in;
742         final Data dataShadow = this.data;
743         final int zt = dataShadow.selector[groupNo] & 0xff;
744         final int[] limit_zt = dataShadow.limit[zt];
745         int zn = dataShadow.minLens[zt];
746         int zvec = bsR(zn);
747         int bsLiveShadow = this.bsLive;
748         int bsBuffShadow = this.bsBuff;
749 
750         while (zvec > limit_zt[zn]) {
751             zn++;
752             while (bsLiveShadow < 1) {
753                 final int thech = inShadow.read();
754 
755                 if (thech >= 0) {
756                     bsBuffShadow = (bsBuffShadow << 8) | thech;
757                     bsLiveShadow += 8;
758                     continue;
759                 }
760                 throw new IOException("unexpected end of stream");
761             }
762             bsLiveShadow--;
763             zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
764         }
765 
766         this.bsLive = bsLiveShadow;
767         this.bsBuff = bsBuffShadow;
768 
769         return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]];
770     }
771 
772     private int setupBlock() throws IOException {
773         if (currentState == EOF || this.data == null) {
774             return -1;
775         }
776 
777         final int[] cftab = this.data.cftab;
778         final int[] tt = this.data.initTT(this.last + 1);
779         final byte[] ll8 = this.data.ll8;
780         cftab[0] = 0;
781         System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);
782 
783         for (int i = 1, c = cftab[0]; i <= 256; i++) {
784             c += cftab[i];
785             cftab[i] = c;
786         }
787 
788         for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
789             tt[cftab[ll8[i] & 0xff]++] = i;
790         }
791 
792         if ((this.origPtr < 0) || (this.origPtr >= tt.length)) {
793             throw new IOException("stream corrupted");
794         }
795 
796         this.su_tPos = tt[this.origPtr];
797         this.su_count = 0;
798         this.su_i2 = 0;
799         this.su_ch2 = 256; /* not a char and not EOF */
800 
801         if (this.blockRandomised) {
802             this.su_rNToGo = 0;
803             this.su_rTPos = 0;
804             return setupRandPartA();
805         }
806         return setupNoRandPartA();
807     }
808 
809     private int setupRandPartA() throws IOException {
810         if (this.su_i2 <= this.last) {
811             this.su_chPrev = this.su_ch2;
812             int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
813             this.su_tPos = this.data.tt[this.su_tPos];
814             if (this.su_rNToGo == 0) {
815                 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
816                 if (++this.su_rTPos == 512) {
817                     this.su_rTPos = 0;
818                 }
819             } else {
820                 this.su_rNToGo--;
821             }
822             this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0;
823             this.su_i2++;
824             this.currentState = RAND_PART_B_STATE;
825             this.crc.updateCRC(su_ch2Shadow);
826             return su_ch2Shadow;
827         }
828         endBlock();
829         initBlock();
830         return setupBlock();
831     }
832 
833     private int setupNoRandPartA() throws IOException {
834         if (this.su_i2 <= this.last) {
835             this.su_chPrev = this.su_ch2;
836             final int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
837             this.su_ch2 = su_ch2Shadow;
838             this.su_tPos = this.data.tt[this.su_tPos];
839             this.su_i2++;
840             this.currentState = NO_RAND_PART_B_STATE;
841             this.crc.updateCRC(su_ch2Shadow);
842             return su_ch2Shadow;
843         }
844         this.currentState = NO_RAND_PART_A_STATE;
845         endBlock();
846         initBlock();
847         return setupBlock();
848     }
849 
850     private int setupRandPartB() throws IOException {
851         if (this.su_ch2 != this.su_chPrev) {
852             this.currentState = RAND_PART_A_STATE;
853             this.su_count = 1;
854             return setupRandPartA();
855         } else if (++this.su_count >= 4) {
856             this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
857             this.su_tPos = this.data.tt[this.su_tPos];
858             if (this.su_rNToGo == 0) {
859                 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
860                 if (++this.su_rTPos == 512) {
861                     this.su_rTPos = 0;
862                 }
863             } else {
864                 this.su_rNToGo--;
865             }
866             this.su_j2 = 0;
867             this.currentState = RAND_PART_C_STATE;
868             if (this.su_rNToGo == 1) {
869                 this.su_z ^= 1;
870             }
871             return setupRandPartC();
872         } else {
873             this.currentState = RAND_PART_A_STATE;
874             return setupRandPartA();
875         }
876     }
877 
878     private int setupRandPartC() throws IOException {
879         if (this.su_j2 < this.su_z) {
880             this.crc.updateCRC(this.su_ch2);
881             this.su_j2++;
882             return this.su_ch2;
883         }
884         this.currentState = RAND_PART_A_STATE;
885         this.su_i2++;
886         this.su_count = 0;
887         return setupRandPartA();
888     }
889 
890     private int setupNoRandPartB() throws IOException {
891         if (this.su_ch2 != this.su_chPrev) {
892             this.su_count = 1;
893             return setupNoRandPartA();
894         } else if (++this.su_count >= 4) {
895             this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
896             this.su_tPos = this.data.tt[this.su_tPos];
897             this.su_j2 = 0;
898             return setupNoRandPartC();
899         } else {
900             return setupNoRandPartA();
901         }
902     }
903 
904     private int setupNoRandPartC() throws IOException {
905         if (this.su_j2 < this.su_z) {
906             final int su_ch2Shadow = this.su_ch2;
907             this.crc.updateCRC(su_ch2Shadow);
908             this.su_j2++;
909             this.currentState = NO_RAND_PART_C_STATE;
910             return su_ch2Shadow;
911         }
912         this.su_i2++;
913         this.su_count = 0;
914         return setupNoRandPartA();
915     }
916 
917     private static final class Data {
918 
919         // (with blockSize 900k)
920         final boolean[] inUse = new boolean[256]; // 256 byte
921 
922         final byte[] seqToUnseq = new byte[256]; // 256 byte
923         final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
924         final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
925 
926         /**
927          * Freq table collected to save a pass over the data during
928          * decompression.
929          */
930         final int[] unzftab = new int[256]; // 1024 byte
931 
932         final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
933         final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
934         final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
935         final int[] minLens = new int[N_GROUPS]; // 24 byte
936 
937         final int[] cftab = new int[257]; // 1028 byte
938         final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte
939         final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
940         // byte
941         final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte
942         // ---------------
943         // 60798 byte
944 
945         int[] tt; // 3600000 byte
946         byte[] ll8; // 900000 byte
947 
948         // ---------------
949         // 4560782 byte
950         // ===============
951 
952         Data(final int blockSize100k) {
953             this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE];
954         }
955 
956         /**
957          * Initializes the {@link #tt} array.
958          * 
959          * This method is called when the required length of the array is known.
960          * I don't initialize it at construction time to avoid unneccessary
961          * memory allocation when compressing small files.
962          */
963         int[] initTT(final int length) {
964             int[] ttShadow = this.tt;
965 
966             // tt.length should always be >= length, but theoretically
967             // it can happen, if the compressor mixed small and large
968             // blocks. Normally only the last block will be smaller
969             // than others.
970             if ((ttShadow == null) || (ttShadow.length < length)) {
971                 this.tt = ttShadow = new int[length];
972             }
973 
974             return ttShadow;
975         }
976 
977     }
978 
979     /**
980      * Checks if the signature matches what is expected for a bzip2 file.
981      * 
982      * @param signature
983      *            the bytes to check
984      * @param length
985      *            the number of bytes to check
986      * @return true, if this stream is a bzip2 compressed stream, false otherwise
987      * 
988      * @since 1.1
989      */
990     public static boolean matches(final byte[] signature, final int length) {
991 
992         if (length < 3) {
993             return false;
994         }
995 
996         if (signature[0] != 'B') {
997             return false;
998         }
999 
1000         if (signature[1] != 'Z') {
1001             return false;
1002         }
1003 
1004         if (signature[2] != 'h') {
1005             return false;
1006         }
1007 
1008         return true;
1009     }
1010 }