View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  /*
21   * This package is based on the work done by Keiron Liddle, Aftex Software
22   * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
23   * great code.
24   */
25  package org.apache.commons.compress.compressors.bzip2;
26  
27  import java.io.IOException;
28  import java.io.InputStream;
29  import java.nio.ByteOrder;
30  import java.util.Arrays;
31  
32  import org.apache.commons.compress.compressors.CompressorInputStream;
33  import org.apache.commons.compress.utils.BitInputStream;
34  import org.apache.commons.compress.utils.CloseShieldFilterInputStream;
35  import org.apache.commons.compress.utils.InputStreamStatistics;
36  
37  /**
38   * An input stream that decompresses from the BZip2 format to be read as any other stream.
39   *
40   * @NotThreadSafe
41   */
42  public class BZip2CompressorInputStream extends CompressorInputStream
43      implements BZip2Constants, InputStreamStatistics {
44  
45      /**
46       * Index of the last char in the block, so the block size == last + 1.
47       */
48      private int last;
49  
50      /**
51       * Index in zptr[] of original string after sorting.
52       */
53      private int origPtr;
54  
55      /**
56       * always: in the range 0 .. 9. The current block size is 100000 * this
57       * number.
58       */
59      private int blockSize100k;
60  
61      private boolean blockRandomised;
62  
63      private final CRC crc = new CRC();
64  
65      private int nInUse;
66  
67      private BitInputStream bin;
68      private final boolean decompressConcatenated;
69  
70      private static final int EOF = 0;
71      private static final int START_BLOCK_STATE = 1;
72      private static final int RAND_PART_A_STATE = 2;
73      private static final int RAND_PART_B_STATE = 3;
74      private static final int RAND_PART_C_STATE = 4;
75      private static final int NO_RAND_PART_A_STATE = 5;
76      private static final int NO_RAND_PART_B_STATE = 6;
77      private static final int NO_RAND_PART_C_STATE = 7;
78  
79      private int currentState = START_BLOCK_STATE;
80  
81      private int storedBlockCRC, storedCombinedCRC;
82      private int computedBlockCRC, computedCombinedCRC;
83  
84      // Variables used by setup* methods exclusively
85  
86      private int su_count;
87      private int su_ch2;
88      private int su_chPrev;
89      private int su_i2;
90      private int su_j2;
91      private int su_rNToGo;
92      private int su_rTPos;
93      private int su_tPos;
94      private char su_z;
95  
96      /**
97       * All memory intensive stuff. This field is initialized by initBlock().
98       */
99      private BZip2CompressorInputStream.Data data;
100 
101     /**
102      * Constructs a new BZip2CompressorInputStream which decompresses bytes
103      * read from the specified stream. This doesn't suppprt decompressing
104      * concatenated .bz2 files.
105      *
106      * @param in the InputStream from which this object should be created
107      * @throws IOException
108      *             if the stream content is malformed or an I/O error occurs.
109      * @throws NullPointerException
110      *             if {@code in == null}
111      */
112     public BZip2CompressorInputStream(final InputStream in) throws IOException {
113         this(in, false);
114     }
115 
116     /**
117      * Constructs a new BZip2CompressorInputStream which decompresses bytes
118      * read from the specified stream.
119      *
120      * @param in the InputStream from which this object should be created
121      * @param decompressConcatenated
122      *                     if true, decompress until the end of the input;
123      *                     if false, stop after the first .bz2 stream and
124      *                     leave the input position to point to the next
125      *                     byte after the .bz2 stream
126      *
127      * @throws IOException
128      *             if {@code in == null}, the stream content is malformed, or an I/O error occurs.
129      */
130     public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
131         this.bin = new BitInputStream(in == System.in ? new CloseShieldFilterInputStream(in) : in,
132             ByteOrder.BIG_ENDIAN);
133         this.decompressConcatenated = decompressConcatenated;
134 
135         init(true);
136         initBlock();
137     }
138 
139     @Override
140     public int read() throws IOException {
141         if (this.bin != null) {
142             final int r = read0();
143             count(r < 0 ? -1 : 1);
144             return r;
145         }
146         throw new IOException("stream closed");
147     }
148 
149     /*
150      * (non-Javadoc)
151      *
152      * @see java.io.InputStream#read(byte[], int, int)
153      */
154     @Override
155     public int read(final byte[] dest, final int offs, final int len)
156         throws IOException {
157         if (offs < 0) {
158             throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
159         }
160         if (len < 0) {
161             throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
162         }
163         if (offs + len > dest.length) {
164             throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
165                                                 + len + ") > dest.length(" + dest.length + ").");
166         }
167         if (this.bin == null) {
168             throw new IOException("stream closed");
169         }
170         if (len == 0) {
171             return 0;
172         }
173 
174         final int hi = offs + len;
175         int destOffs = offs;
176         int b;
177         while (destOffs < hi && ((b = read0()) >= 0)) {
178             dest[destOffs++] = (byte) b;
179             count(1);
180         }
181 
182         return (destOffs == offs) ? -1 : (destOffs - offs);
183     }
184 
185     /**
186      * @since 1.17
187      */
188     @Override
189     public long getCompressedCount() {
190         return bin.getBytesRead();
191     }
192 
193     private void makeMaps() {
194         final boolean[] inUse = this.data.inUse;
195         final byte[] seqToUnseq = this.data.seqToUnseq;
196 
197         int nInUseShadow = 0;
198 
199         for (int i = 0; i < 256; i++) {
200             if (inUse[i]) {
201                 seqToUnseq[nInUseShadow++] = (byte) i;
202             }
203         }
204 
205         this.nInUse = nInUseShadow;
206     }
207 
208     private int read0() throws IOException {
209         switch (currentState) {
210         case EOF:
211             return -1;
212 
213         case START_BLOCK_STATE:
214             return setupBlock();
215 
216         case RAND_PART_A_STATE:
217             throw new IllegalStateException();
218 
219         case RAND_PART_B_STATE:
220             return setupRandPartB();
221 
222         case RAND_PART_C_STATE:
223             return setupRandPartC();
224 
225         case NO_RAND_PART_A_STATE:
226             throw new IllegalStateException();
227 
228         case NO_RAND_PART_B_STATE:
229             return setupNoRandPartB();
230 
231         case NO_RAND_PART_C_STATE:
232             return setupNoRandPartC();
233 
234         default:
235             throw new IllegalStateException();
236         }
237     }
238 
239     private int readNextByte(BitInputStream in) throws IOException {
240         long b = in.readBits(8);
241         return (int) b;
242     }
243 
244     private boolean init(final boolean isFirstStream) throws IOException {
245         if (null == bin) {
246             throw new IOException("No InputStream");
247         }
248 
249         if (!isFirstStream) {
250             bin.clearBitCache();
251         }
252 
253         final int magic0 = readNextByte(this.bin);
254         if (magic0 == -1 && !isFirstStream) {
255             return false;
256         }
257         final int magic1 = readNextByte(this.bin);
258         final int magic2 = readNextByte(this.bin);
259 
260         if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') {
261             throw new IOException(isFirstStream
262                     ? "Stream is not in the BZip2 format"
263                     : "Garbage after a valid BZip2 stream");
264         }
265 
266         final int blockSize = readNextByte(this.bin);
267         if ((blockSize < '1') || (blockSize > '9')) {
268             throw new IOException("BZip2 block size is invalid");
269         }
270 
271         this.blockSize100k = blockSize - '0';
272 
273         this.computedCombinedCRC = 0;
274 
275         return true;
276     }
277 
278     private void initBlock() throws IOException {
279         BitInputStream bin = this.bin;
280         char magic0;
281         char magic1;
282         char magic2;
283         char magic3;
284         char magic4;
285         char magic5;
286 
287         while (true) {
288             // Get the block magic bytes.
289             magic0 = bsGetUByte(bin);
290             magic1 = bsGetUByte(bin);
291             magic2 = bsGetUByte(bin);
292             magic3 = bsGetUByte(bin);
293             magic4 = bsGetUByte(bin);
294             magic5 = bsGetUByte(bin);
295 
296             // If isn't end of stream magic, break out of the loop.
297             if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45
298                     || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) {
299                 break;
300             }
301 
302             // End of stream was reached. Check the combined CRC and
303             // advance to the next .bz2 stream if decoding concatenated
304             // streams.
305             if (complete()) {
306                 return;
307             }
308         }
309 
310         if (magic0 != 0x31 || // '1'
311             magic1 != 0x41 || // ')'
312             magic2 != 0x59 || // 'Y'
313             magic3 != 0x26 || // '&'
314             magic4 != 0x53 || // 'S'
315             magic5 != 0x59 // 'Y'
316             ) {
317             this.currentState = EOF;
318             throw new IOException("bad block header");
319         }
320         this.storedBlockCRC = bsGetInt(bin);
321         this.blockRandomised = bsR(bin, 1) == 1;
322 
323         /**
324          * Allocate data here instead in constructor, so we do not allocate
325          * it if the input file is empty.
326          */
327         if (this.data == null) {
328             this.data = new Data(this.blockSize100k);
329         }
330 
331         // currBlockNo++;
332         getAndMoveToFrontDecode();
333 
334         this.crc.initialiseCRC();
335         this.currentState = START_BLOCK_STATE;
336     }
337 
338     private void endBlock() throws IOException {
339         this.computedBlockCRC = this.crc.getFinalCRC();
340 
341         // A bad CRC is considered a fatal error.
342         if (this.storedBlockCRC != this.computedBlockCRC) {
343             // make next blocks readable without error
344             // (repair feature, not yet documented, not tested)
345             this.computedCombinedCRC = (this.storedCombinedCRC << 1)
346                 | (this.storedCombinedCRC >>> 31);
347             this.computedCombinedCRC ^= this.storedBlockCRC;
348 
349             throw new IOException("BZip2 CRC error");
350         }
351 
352         this.computedCombinedCRC = (this.computedCombinedCRC << 1)
353             | (this.computedCombinedCRC >>> 31);
354         this.computedCombinedCRC ^= this.computedBlockCRC;
355     }
356 
357     private boolean complete() throws IOException {
358         this.storedCombinedCRC = bsGetInt(bin);
359         this.currentState = EOF;
360         this.data = null;
361 
362         if (this.storedCombinedCRC != this.computedCombinedCRC) {
363             throw new IOException("BZip2 CRC error");
364         }
365 
366         // Look for the next .bz2 stream if decompressing
367         // concatenated files.
368         return !decompressConcatenated || !init(false);
369     }
370 
371     @Override
372     public void close() throws IOException {
373         final BitInputStream inShadow = this.bin;
374         if (inShadow != null) {
375             try {
376                 inShadow.close();
377             } finally {
378                 this.data = null;
379                 this.bin = null;
380             }
381         }
382     }
383 
384     /**
385      * read bits from the input stream
386      * @param n the number of bits to read, must not exceed 32?
387      * @return the requested bits combined into an int
388      * @throws IOException
389      */
390     private static int bsR(BitInputStream bin, final int n) throws IOException {
391         long thech = bin.readBits(n);
392         if (thech < 0) {
393             throw new IOException("unexpected end of stream");
394         }
395         return (int) thech;
396     }
397 
398     private static boolean bsGetBit(BitInputStream bin) throws IOException {
399         return bsR(bin, 1) != 0;
400     }
401 
402     private static char bsGetUByte(BitInputStream bin) throws IOException {
403         return (char) bsR(bin, 8);
404     }
405 
406     private static int bsGetInt(BitInputStream bin) throws IOException {
407         return bsR(bin, 32);
408     }
409 
410     private static void checkBounds(final int checkVal, final int limitExclusive, String name)
411         throws IOException {
412         if (checkVal < 0) {
413             throw new IOException("Corrupted input, " + name + " value negative");
414         }
415         if (checkVal >= limitExclusive) {
416             throw new IOException("Corrupted input, " + name + " value too big");
417         }
418     }
419 
420     /**
421      * Called by createHuffmanDecodingTables() exclusively.
422      */
423     private static void hbCreateDecodeTables(final int[] limit,
424                                              final int[] base, final int[] perm, final char[] length,
425                                              final int minLen, final int maxLen, final int alphaSize)
426         throws IOException {
427         for (int i = minLen, pp = 0; i <= maxLen; i++) {
428             for (int j = 0; j < alphaSize; j++) {
429                 if (length[j] == i) {
430                     perm[pp++] = j;
431                 }
432             }
433         }
434 
435         for (int i = MAX_CODE_LEN; --i > 0;) {
436             base[i] = 0;
437             limit[i] = 0;
438         }
439 
440         for (int i = 0; i < alphaSize; i++) {
441             final int l = length[i];
442             checkBounds(l, MAX_ALPHA_SIZE, "length");
443             base[l + 1]++;
444         }
445 
446         for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
447             b += base[i];
448             base[i] = b;
449         }
450 
451         for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
452             final int nb = base[i + 1];
453             vec += nb - b;
454             b = nb;
455             limit[i] = vec - 1;
456             vec <<= 1;
457         }
458 
459         for (int i = minLen + 1; i <= maxLen; i++) {
460             base[i] = ((limit[i - 1] + 1) << 1) - base[i];
461         }
462     }
463 
464     private void recvDecodingTables() throws IOException {
465         final BitInputStream bin = this.bin;
466         final Data dataShadow = this.data;
467         final boolean[] inUse = dataShadow.inUse;
468         final byte[] pos = dataShadow.recvDecodingTables_pos;
469         final byte[] selector = dataShadow.selector;
470         final byte[] selectorMtf = dataShadow.selectorMtf;
471 
472         int inUse16 = 0;
473 
474         /* Receive the mapping table */
475         for (int i = 0; i < 16; i++) {
476             if (bsGetBit(bin)) {
477                 inUse16 |= 1 << i;
478             }
479         }
480 
481         Arrays.fill(inUse, false);
482         for (int i = 0; i < 16; i++) {
483             if ((inUse16 & (1 << i)) != 0) {
484                 final int i16 = i << 4;
485                 for (int j = 0; j < 16; j++) {
486                     if (bsGetBit(bin)) {
487                         inUse[i16 + j] = true;
488                     }
489                 }
490             }
491         }
492 
493         makeMaps();
494         final int alphaSize = this.nInUse + 2;
495         /* Now the selectors */
496         final int nGroups = bsR(bin, 3);
497         final int nSelectors = bsR(bin, 15);
498         checkBounds(alphaSize, MAX_ALPHA_SIZE + 1, "alphaSize");
499         checkBounds(nGroups, N_GROUPS + 1, "nGroups");
500         checkBounds(nSelectors, MAX_SELECTORS + 1, "nSelectors");
501 
502         for (int i = 0; i < nSelectors; i++) {
503             int j = 0;
504             while (bsGetBit(bin)) {
505                 j++;
506             }
507             selectorMtf[i] = (byte) j;
508         }
509 
510         /* Undo the MTF values for the selectors. */
511         for (int v = nGroups; --v >= 0;) {
512             pos[v] = (byte) v;
513         }
514 
515         for (int i = 0; i < nSelectors; i++) {
516             int v = selectorMtf[i] & 0xff;
517             checkBounds(v, N_GROUPS, "selectorMtf");
518             final byte tmp = pos[v];
519             while (v > 0) {
520                 // nearly all times v is zero, 4 in most other cases
521                 pos[v] = pos[v - 1];
522                 v--;
523             }
524             pos[0] = tmp;
525             selector[i] = tmp;
526         }
527 
528         final char[][] len = dataShadow.temp_charArray2d;
529 
530         /* Now the coding tables */
531         for (int t = 0; t < nGroups; t++) {
532             int curr = bsR(bin, 5);
533             final char[] len_t = len[t];
534             for (int i = 0; i < alphaSize; i++) {
535                 while (bsGetBit(bin)) {
536                     curr += bsGetBit(bin) ? -1 : 1;
537                 }
538                 len_t[i] = (char) curr;
539             }
540         }
541 
542         // finally create the Huffman tables
543         createHuffmanDecodingTables(alphaSize, nGroups);
544     }
545 
546     /**
547      * Called by recvDecodingTables() exclusively.
548      */
549     private void createHuffmanDecodingTables(final int alphaSize,
550                                              final int nGroups) throws IOException {
551         final Data dataShadow = this.data;
552         final char[][] len = dataShadow.temp_charArray2d;
553         final int[] minLens = dataShadow.minLens;
554         final int[][] limit = dataShadow.limit;
555         final int[][] base = dataShadow.base;
556         final int[][] perm = dataShadow.perm;
557 
558         for (int t = 0; t < nGroups; t++) {
559             int minLen = 32;
560             int maxLen = 0;
561             final char[] len_t = len[t];
562             for (int i = alphaSize; --i >= 0;) {
563                 final char lent = len_t[i];
564                 if (lent > maxLen) {
565                     maxLen = lent;
566                 }
567                 if (lent < minLen) {
568                     minLen = lent;
569                 }
570             }
571             hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
572                                  maxLen, alphaSize);
573             minLens[t] = minLen;
574         }
575     }
576 
577     private void getAndMoveToFrontDecode() throws IOException {
578         final BitInputStream bin = this.bin;
579         this.origPtr = bsR(bin, 24);
580         recvDecodingTables();
581 
582         final Data dataShadow = this.data;
583         final byte[] ll8 = dataShadow.ll8;
584         final int[] unzftab = dataShadow.unzftab;
585         final byte[] selector = dataShadow.selector;
586         final byte[] seqToUnseq = dataShadow.seqToUnseq;
587         final char[] yy = dataShadow.getAndMoveToFrontDecode_yy;
588         final int[] minLens = dataShadow.minLens;
589         final int[][] limit = dataShadow.limit;
590         final int[][] base = dataShadow.base;
591         final int[][] perm = dataShadow.perm;
592         final int limitLast = this.blockSize100k * 100000;
593 
594         /*
595          * Setting up the unzftab entries here is not strictly necessary, but it
596          * does save having to do it later in a separate pass, and so saves a
597          * block's worth of cache misses.
598          */
599         for (int i = 256; --i >= 0;) {
600             yy[i] = (char) i;
601             unzftab[i] = 0;
602         }
603 
604         int groupNo = 0;
605         int groupPos = G_SIZE - 1;
606         final int eob = this.nInUse + 1;
607         int nextSym = getAndMoveToFrontDecode0();
608         int lastShadow = -1;
609         int zt = selector[groupNo] & 0xff;
610         checkBounds(zt, N_GROUPS, "zt");
611         int[] base_zt = base[zt];
612         int[] limit_zt = limit[zt];
613         int[] perm_zt = perm[zt];
614         int minLens_zt = minLens[zt];
615 
616         while (nextSym != eob) {
617             if ((nextSym == RUNA) || (nextSym == RUNB)) {
618                 int s = -1;
619 
620                 for (int n = 1; true; n <<= 1) {
621                     if (nextSym == RUNA) {
622                         s += n;
623                     } else if (nextSym == RUNB) {
624                         s += n << 1;
625                     } else {
626                         break;
627                     }
628 
629                     if (groupPos == 0) {
630                         groupPos = G_SIZE - 1;
631                         checkBounds(++groupNo, MAX_SELECTORS, "groupNo");
632                         zt = selector[groupNo] & 0xff;
633                         checkBounds(zt, N_GROUPS, "zt");
634                         base_zt = base[zt];
635                         limit_zt = limit[zt];
636                         perm_zt = perm[zt];
637                         minLens_zt = minLens[zt];
638                     } else {
639                         groupPos--;
640                     }
641 
642                     int zn = minLens_zt;
643                     checkBounds(zn, MAX_ALPHA_SIZE, "zn");
644                     int zvec = bsR(bin, zn);
645                     while(zvec > limit_zt[zn]) {
646                         checkBounds(++zn, MAX_ALPHA_SIZE, "zn");
647                         zvec = (zvec << 1) | bsR(bin, 1);
648                     }
649                     final int tmp = zvec - base_zt[zn];
650                     checkBounds(tmp, MAX_ALPHA_SIZE, "zvec");
651                     nextSym = perm_zt[tmp];
652                 }
653 
654                 final int yy0 = yy[0];
655                 checkBounds(yy0, 256, "yy");
656                 final byte ch = seqToUnseq[yy0];
657                 unzftab[ch & 0xff] += s + 1;
658 
659                 while (s-- >= 0) {
660                     ll8[++lastShadow] = ch;
661                 }
662 
663                 if (lastShadow >= limitLast) {
664                     throw new IOException("block overrun");
665                 }
666             } else {
667                 if (++lastShadow >= limitLast) {
668                     throw new IOException("block overrun");
669                 }
670                 checkBounds(nextSym, 256 + 1, "nextSym");
671 
672                 final char tmp = yy[nextSym - 1];
673                 checkBounds(tmp, 256, "yy");
674                 unzftab[seqToUnseq[tmp] & 0xff]++;
675                 ll8[lastShadow] = seqToUnseq[tmp];
676 
677                 /*
678                  * This loop is hammered during decompression, hence avoid
679                  * native method call overhead of System.arraycopy for very
680                  * small ranges to copy.
681                  */
682                 if (nextSym <= 16) {
683                     for (int j = nextSym - 1; j > 0;) {
684                         yy[j] = yy[--j];
685                     }
686                 } else {
687                     System.arraycopy(yy, 0, yy, 1, nextSym - 1);
688                 }
689 
690                 yy[0] = tmp;
691 
692                 if (groupPos == 0) {
693                     groupPos = G_SIZE - 1;
694                     checkBounds(++groupNo, MAX_SELECTORS, "groupNo");
695                     zt = selector[groupNo] & 0xff;
696                     checkBounds(zt, N_GROUPS, "zt");
697                     base_zt = base[zt];
698                     limit_zt = limit[zt];
699                     perm_zt = perm[zt];
700                     minLens_zt = minLens[zt];
701                 } else {
702                     groupPos--;
703                 }
704 
705                 int zn = minLens_zt;
706                 checkBounds(zn, MAX_ALPHA_SIZE, "zn");
707                 int zvec = bsR(bin, zn);
708                 while(zvec > limit_zt[zn]) {
709                     checkBounds(++zn, MAX_ALPHA_SIZE, "zn");
710                     zvec = (zvec << 1) | bsR(bin, 1);
711                 }
712                 final int idx = zvec - base_zt[zn];
713                 checkBounds(idx, MAX_ALPHA_SIZE, "zvec");
714                 nextSym = perm_zt[idx];
715             }
716         }
717 
718         this.last = lastShadow;
719     }
720 
721     private int getAndMoveToFrontDecode0() throws IOException {
722         final Data dataShadow = this.data;
723         final int zt = dataShadow.selector[0] & 0xff;
724         checkBounds(zt, N_GROUPS, "zt");
725         final int[] limit_zt = dataShadow.limit[zt];
726         int zn = dataShadow.minLens[zt];
727         checkBounds(zn, MAX_ALPHA_SIZE, "zn");
728         int zvec = bsR(bin, zn);
729         while (zvec > limit_zt[zn]) {
730             checkBounds(++zn, MAX_ALPHA_SIZE, "zn");
731             zvec = (zvec << 1) | bsR(bin, 1);
732         }
733         final int tmp = zvec - dataShadow.base[zt][zn];
734         checkBounds(tmp, MAX_ALPHA_SIZE, "zvec");
735 
736         return dataShadow.perm[zt][tmp];
737     }
738 
739     private int setupBlock() throws IOException {
740         if (currentState == EOF || this.data == null) {
741             return -1;
742         }
743 
744         final int[] cftab = this.data.cftab;
745         final int ttLen = this.last + 1;
746         final int[] tt = this.data.initTT(ttLen);
747         final byte[] ll8 = this.data.ll8;
748         cftab[0] = 0;
749         System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);
750 
751         for (int i = 1, c = cftab[0]; i <= 256; i++) {
752             c += cftab[i];
753             cftab[i] = c;
754         }
755 
756         for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
757             final int tmp = cftab[ll8[i] & 0xff]++;
758             checkBounds(tmp, ttLen, "tt index");
759             tt[tmp] = i;
760         }
761 
762         if ((this.origPtr < 0) || (this.origPtr >= tt.length)) {
763             throw new IOException("stream corrupted");
764         }
765 
766         this.su_tPos = tt[this.origPtr];
767         this.su_count = 0;
768         this.su_i2 = 0;
769         this.su_ch2 = 256; /* not a char and not EOF */
770 
771         if (this.blockRandomised) {
772             this.su_rNToGo = 0;
773             this.su_rTPos = 0;
774             return setupRandPartA();
775         }
776         return setupNoRandPartA();
777     }
778 
779     private int setupRandPartA() throws IOException {
780         if (this.su_i2 <= this.last) {
781             this.su_chPrev = this.su_ch2;
782             int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
783             checkBounds(this.su_tPos, this.data.tt.length, "su_tPos");
784             this.su_tPos = this.data.tt[this.su_tPos];
785             if (this.su_rNToGo == 0) {
786                 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
787                 if (++this.su_rTPos == 512) {
788                     this.su_rTPos = 0;
789                 }
790             } else {
791                 this.su_rNToGo--;
792             }
793             this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0;
794             this.su_i2++;
795             this.currentState = RAND_PART_B_STATE;
796             this.crc.updateCRC(su_ch2Shadow);
797             return su_ch2Shadow;
798         }
799         endBlock();
800         initBlock();
801         return setupBlock();
802     }
803 
804     private int setupNoRandPartA() throws IOException {
805         if (this.su_i2 <= this.last) {
806             this.su_chPrev = this.su_ch2;
807             final int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
808             this.su_ch2 = su_ch2Shadow;
809             checkBounds(this.su_tPos, this.data.tt.length, "su_tPos");
810             this.su_tPos = this.data.tt[this.su_tPos];
811             this.su_i2++;
812             this.currentState = NO_RAND_PART_B_STATE;
813             this.crc.updateCRC(su_ch2Shadow);
814             return su_ch2Shadow;
815         }
816         this.currentState = NO_RAND_PART_A_STATE;
817         endBlock();
818         initBlock();
819         return setupBlock();
820     }
821 
822     private int setupRandPartB() throws IOException {
823         if (this.su_ch2 != this.su_chPrev) {
824             this.currentState = RAND_PART_A_STATE;
825             this.su_count = 1;
826             return setupRandPartA();
827         } else if (++this.su_count >= 4) {
828             this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
829             checkBounds(this.su_tPos, this.data.tt.length, "su_tPos");
830             this.su_tPos = this.data.tt[this.su_tPos];
831             if (this.su_rNToGo == 0) {
832                 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
833                 if (++this.su_rTPos == 512) {
834                     this.su_rTPos = 0;
835                 }
836             } else {
837                 this.su_rNToGo--;
838             }
839             this.su_j2 = 0;
840             this.currentState = RAND_PART_C_STATE;
841             if (this.su_rNToGo == 1) {
842                 this.su_z ^= 1;
843             }
844             return setupRandPartC();
845         } else {
846             this.currentState = RAND_PART_A_STATE;
847             return setupRandPartA();
848         }
849     }
850 
851     private int setupRandPartC() throws IOException {
852         if (this.su_j2 < this.su_z) {
853             this.crc.updateCRC(this.su_ch2);
854             this.su_j2++;
855             return this.su_ch2;
856         }
857         this.currentState = RAND_PART_A_STATE;
858         this.su_i2++;
859         this.su_count = 0;
860         return setupRandPartA();
861     }
862 
863     private int setupNoRandPartB() throws IOException {
864         if (this.su_ch2 != this.su_chPrev) {
865             this.su_count = 1;
866             return setupNoRandPartA();
867         } else if (++this.su_count >= 4) {
868             checkBounds(this.su_tPos, this.data.ll8.length, "su_tPos");
869             this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
870             this.su_tPos = this.data.tt[this.su_tPos];
871             this.su_j2 = 0;
872             return setupNoRandPartC();
873         } else {
874             return setupNoRandPartA();
875         }
876     }
877 
878     private int setupNoRandPartC() throws IOException {
879         if (this.su_j2 < this.su_z) {
880             final int su_ch2Shadow = this.su_ch2;
881             this.crc.updateCRC(su_ch2Shadow);
882             this.su_j2++;
883             this.currentState = NO_RAND_PART_C_STATE;
884             return su_ch2Shadow;
885         }
886         this.su_i2++;
887         this.su_count = 0;
888         return setupNoRandPartA();
889     }
890 
891     private static final class Data {
892 
893         // (with blockSize 900k)
894         final boolean[] inUse = new boolean[256]; // 256 byte
895 
896         final byte[] seqToUnseq = new byte[256]; // 256 byte
897         final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
898         final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
899 
900         /**
901          * Freq table collected to save a pass over the data during
902          * decompression.
903          */
904         final int[] unzftab = new int[256]; // 1024 byte
905 
906         final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
907         final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
908         final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
909         final int[] minLens = new int[N_GROUPS]; // 24 byte
910 
911         final int[] cftab = new int[257]; // 1028 byte
912         final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte
913         final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
914         // byte
915         final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte
916         // ---------------
917         // 60798 byte
918 
919         int[] tt; // 3600000 byte
920         byte[] ll8; // 900000 byte
921 
922         // ---------------
923         // 4560782 byte
924         // ===============
925 
926         Data(final int blockSize100k) {
927             this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE];
928         }
929 
930         /**
931          * Initializes the {@link #tt} array.
932          *
933          * This method is called when the required length of the array is known.
934          * I don't initialize it at construction time to avoid unneccessary
935          * memory allocation when compressing small files.
936          */
937         int[] initTT(final int length) {
938             int[] ttShadow = this.tt;
939 
940             // tt.length should always be >= length, but theoretically
941             // it can happen, if the compressor mixed small and large
942             // blocks. Normally only the last block will be smaller
943             // than others.
944             if ((ttShadow == null) || (ttShadow.length < length)) {
945                 this.tt = ttShadow = new int[length];
946             }
947 
948             return ttShadow;
949         }
950 
951     }
952 
953     /**
954      * Checks if the signature matches what is expected for a bzip2 file.
955      *
956      * @param signature
957      *            the bytes to check
958      * @param length
959      *            the number of bytes to check
960      * @return true, if this stream is a bzip2 compressed stream, false otherwise
961      *
962      * @since 1.1
963      */
964     public static boolean matches(final byte[] signature, final int length) {
965         return length >= 3 && signature[0] == 'B' &&
966                 signature[1] == 'Z' && signature[2] == 'h';
967     }
968 }