001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 020/* 021 * This package is based on the work done by Keiron Liddle, Aftex Software 022 * <keiron@aftexsw.com> to whom the Ant project is very grateful for his 023 * great code. 024 */ 025package org.apache.commons.compress.compressors.bzip2; 026 027import java.io.IOException; 028import java.io.InputStream; 029import java.nio.ByteOrder; 030import java.util.Arrays; 031 032import org.apache.commons.compress.compressors.CompressorInputStream; 033import org.apache.commons.compress.utils.BitInputStream; 034import org.apache.commons.compress.utils.CloseShieldFilterInputStream; 035import org.apache.commons.compress.utils.InputStreamStatistics; 036 037/** 038 * An input stream that decompresses from the BZip2 format to be read as any other stream. 039 * 040 * @NotThreadSafe 041 */ 042public class BZip2CompressorInputStream extends CompressorInputStream 043 implements BZip2Constants, InputStreamStatistics { 044 045 /** 046 * Index of the last char in the block, so the block size == last + 1. 047 */ 048 private int last; 049 050 /** 051 * Index in zptr[] of original string after sorting. 052 */ 053 private int origPtr; 054 055 /** 056 * always: in the range 0 .. 9. The current block size is 100000 * this 057 * number. 058 */ 059 private int blockSize100k; 060 061 private boolean blockRandomised; 062 063 private final CRC crc = new CRC(); 064 065 private int nInUse; 066 067 private BitInputStream bin; 068 private final boolean decompressConcatenated; 069 070 private static final int EOF = 0; 071 private static final int START_BLOCK_STATE = 1; 072 private static final int RAND_PART_A_STATE = 2; 073 private static final int RAND_PART_B_STATE = 3; 074 private static final int RAND_PART_C_STATE = 4; 075 private static final int NO_RAND_PART_A_STATE = 5; 076 private static final int NO_RAND_PART_B_STATE = 6; 077 private static final int NO_RAND_PART_C_STATE = 7; 078 079 private int currentState = START_BLOCK_STATE; 080 081 private int storedBlockCRC, storedCombinedCRC; 082 private int computedBlockCRC, computedCombinedCRC; 083 084 // Variables used by setup* methods exclusively 085 086 private int su_count; 087 private int su_ch2; 088 private int su_chPrev; 089 private int su_i2; 090 private int su_j2; 091 private int su_rNToGo; 092 private int su_rTPos; 093 private int su_tPos; 094 private char su_z; 095 096 /** 097 * All memory intensive stuff. This field is initialized by initBlock(). 098 */ 099 private BZip2CompressorInputStream.Data data; 100 101 /** 102 * Constructs a new BZip2CompressorInputStream which decompresses bytes 103 * read from the specified stream. This doesn't support decompressing 104 * concatenated .bz2 files. 105 * 106 * @param in the InputStream from which this object should be created 107 * @throws IOException 108 * if the stream content is malformed or an I/O error occurs. 109 * @throws NullPointerException 110 * if {@code in == null} 111 */ 112 public BZip2CompressorInputStream(final InputStream in) throws IOException { 113 this(in, false); 114 } 115 116 /** 117 * Constructs a new BZip2CompressorInputStream which decompresses bytes 118 * read from the specified stream. 119 * 120 * @param in the InputStream from which this object should be created 121 * @param decompressConcatenated 122 * if true, decompress until the end of the input; 123 * if false, stop after the first .bz2 stream and 124 * leave the input position to point to the next 125 * byte after the .bz2 stream 126 * 127 * @throws IOException 128 * if {@code in == null}, the stream content is malformed, or an I/O error occurs. 129 */ 130 public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException { 131 this.bin = new BitInputStream(in == System.in ? new CloseShieldFilterInputStream(in) : in, 132 ByteOrder.BIG_ENDIAN); 133 this.decompressConcatenated = decompressConcatenated; 134 135 init(true); 136 initBlock(); 137 } 138 139 @Override 140 public int read() throws IOException { 141 if (this.bin != null) { 142 final int r = read0(); 143 count(r < 0 ? -1 : 1); 144 return r; 145 } 146 throw new IOException("Stream closed"); 147 } 148 149 /* 150 * (non-Javadoc) 151 * 152 * @see java.io.InputStream#read(byte[], int, int) 153 */ 154 @Override 155 public int read(final byte[] dest, final int offs, final int len) 156 throws IOException { 157 if (offs < 0) { 158 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); 159 } 160 if (len < 0) { 161 throw new IndexOutOfBoundsException("len(" + len + ") < 0."); 162 } 163 if (offs + len > dest.length) { 164 throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" 165 + len + ") > dest.length(" + dest.length + ")."); 166 } 167 if (this.bin == null) { 168 throw new IOException("Stream closed"); 169 } 170 if (len == 0) { 171 return 0; 172 } 173 174 final int hi = offs + len; 175 int destOffs = offs; 176 int b; 177 while (destOffs < hi && ((b = read0()) >= 0)) { 178 dest[destOffs++] = (byte) b; 179 count(1); 180 } 181 182 return (destOffs == offs) ? -1 : (destOffs - offs); 183 } 184 185 /** 186 * @since 1.17 187 */ 188 @Override 189 public long getCompressedCount() { 190 return bin.getBytesRead(); 191 } 192 193 private void makeMaps() { 194 final boolean[] inUse = this.data.inUse; 195 final byte[] seqToUnseq = this.data.seqToUnseq; 196 197 int nInUseShadow = 0; 198 199 for (int i = 0; i < 256; i++) { 200 if (inUse[i]) { 201 seqToUnseq[nInUseShadow++] = (byte) i; 202 } 203 } 204 205 this.nInUse = nInUseShadow; 206 } 207 208 private int read0() throws IOException { 209 switch (currentState) { 210 case EOF: 211 return -1; 212 213 case START_BLOCK_STATE: 214 return setupBlock(); 215 216 case RAND_PART_A_STATE: 217 throw new IllegalStateException(); 218 219 case RAND_PART_B_STATE: 220 return setupRandPartB(); 221 222 case RAND_PART_C_STATE: 223 return setupRandPartC(); 224 225 case NO_RAND_PART_A_STATE: 226 throw new IllegalStateException(); 227 228 case NO_RAND_PART_B_STATE: 229 return setupNoRandPartB(); 230 231 case NO_RAND_PART_C_STATE: 232 return setupNoRandPartC(); 233 234 default: 235 throw new IllegalStateException(); 236 } 237 } 238 239 private int readNextByte(final BitInputStream in) throws IOException { 240 final long b = in.readBits(8); 241 return (int) b; 242 } 243 244 private boolean init(final boolean isFirstStream) throws IOException { 245 if (null == bin) { 246 throw new IOException("No InputStream"); 247 } 248 249 if (!isFirstStream) { 250 bin.clearBitCache(); 251 } 252 253 final int magic0 = readNextByte(this.bin); 254 if (magic0 == -1 && !isFirstStream) { 255 return false; 256 } 257 final int magic1 = readNextByte(this.bin); 258 final int magic2 = readNextByte(this.bin); 259 260 if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') { 261 throw new IOException(isFirstStream 262 ? "Stream is not in the BZip2 format" 263 : "Garbage after a valid BZip2 stream"); 264 } 265 266 final int blockSize = readNextByte(this.bin); 267 if ((blockSize < '1') || (blockSize > '9')) { 268 throw new IOException("BZip2 block size is invalid"); 269 } 270 271 this.blockSize100k = blockSize - '0'; 272 273 this.computedCombinedCRC = 0; 274 275 return true; 276 } 277 278 private void initBlock() throws IOException { 279 final BitInputStream bin = this.bin; 280 char magic0; 281 char magic1; 282 char magic2; 283 char magic3; 284 char magic4; 285 char magic5; 286 287 while (true) { 288 // Get the block magic bytes. 289 magic0 = bsGetUByte(bin); 290 magic1 = bsGetUByte(bin); 291 magic2 = bsGetUByte(bin); 292 magic3 = bsGetUByte(bin); 293 magic4 = bsGetUByte(bin); 294 magic5 = bsGetUByte(bin); 295 296 // If isn't end of stream magic, break out of the loop. 297 if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45 298 || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) { 299 break; 300 } 301 302 // End of stream was reached. Check the combined CRC and 303 // advance to the next .bz2 stream if decoding concatenated 304 // streams. 305 if (complete()) { 306 return; 307 } 308 } 309 310 if (magic0 != 0x31 || // '1' 311 magic1 != 0x41 || // ')' 312 magic2 != 0x59 || // 'Y' 313 magic3 != 0x26 || // '&' 314 magic4 != 0x53 || // 'S' 315 magic5 != 0x59 // 'Y' 316 ) { 317 this.currentState = EOF; 318 throw new IOException("Bad block header"); 319 } 320 this.storedBlockCRC = bsGetInt(bin); 321 this.blockRandomised = bsR(bin, 1) == 1; 322 323 /* 324 * Allocate data here instead in constructor, so we do not allocate 325 * it if the input file is empty. 326 */ 327 if (this.data == null) { 328 this.data = new Data(this.blockSize100k); 329 } 330 331 // currBlockNo++; 332 getAndMoveToFrontDecode(); 333 334 this.crc.initializeCRC(); 335 this.currentState = START_BLOCK_STATE; 336 } 337 338 private void endBlock() throws IOException { 339 this.computedBlockCRC = this.crc.getFinalCRC(); 340 341 // A bad CRC is considered a fatal error. 342 if (this.storedBlockCRC != this.computedBlockCRC) { 343 // make next blocks readable without error 344 // (repair feature, not yet documented, not tested) 345 this.computedCombinedCRC = (this.storedCombinedCRC << 1) 346 | (this.storedCombinedCRC >>> 31); 347 this.computedCombinedCRC ^= this.storedBlockCRC; 348 349 throw new IOException("BZip2 CRC error"); 350 } 351 352 this.computedCombinedCRC = (this.computedCombinedCRC << 1) 353 | (this.computedCombinedCRC >>> 31); 354 this.computedCombinedCRC ^= this.computedBlockCRC; 355 } 356 357 private boolean complete() throws IOException { 358 this.storedCombinedCRC = bsGetInt(bin); 359 this.currentState = EOF; 360 this.data = null; 361 362 if (this.storedCombinedCRC != this.computedCombinedCRC) { 363 throw new IOException("BZip2 CRC error"); 364 } 365 366 // Look for the next .bz2 stream if decompressing 367 // concatenated files. 368 return !decompressConcatenated || !init(false); 369 } 370 371 @Override 372 public void close() throws IOException { 373 final BitInputStream inShadow = this.bin; 374 if (inShadow != null) { 375 try { 376 inShadow.close(); 377 } finally { 378 this.data = null; 379 this.bin = null; 380 } 381 } 382 } 383 384 /** 385 * read bits from the input stream 386 * @param n the number of bits to read, must not exceed 32? 387 * @return the requested bits combined into an int 388 * @throws IOException 389 */ 390 private static int bsR(final BitInputStream bin, final int n) throws IOException { 391 final long thech = bin.readBits(n); 392 if (thech < 0) { 393 throw new IOException("Unexpected end of stream"); 394 } 395 return (int) thech; 396 } 397 398 private static boolean bsGetBit(final BitInputStream bin) throws IOException { 399 return bsR(bin, 1) != 0; 400 } 401 402 private static char bsGetUByte(final BitInputStream bin) throws IOException { 403 return (char) bsR(bin, 8); 404 } 405 406 private static int bsGetInt(final BitInputStream bin) throws IOException { 407 return bsR(bin, 32); 408 } 409 410 private static void checkBounds(final int checkVal, final int limitExclusive, final String name) 411 throws IOException { 412 if (checkVal < 0) { 413 throw new IOException("Corrupted input, " + name + " value negative"); 414 } 415 if (checkVal >= limitExclusive) { 416 throw new IOException("Corrupted input, " + name + " value too big"); 417 } 418 } 419 420 /** 421 * Called by createHuffmanDecodingTables() exclusively. 422 */ 423 private static void hbCreateDecodeTables(final int[] limit, 424 final int[] base, final int[] perm, final char[] length, 425 final int minLen, final int maxLen, final int alphaSize) 426 throws IOException { 427 for (int i = minLen, pp = 0; i <= maxLen; i++) { 428 for (int j = 0; j < alphaSize; j++) { 429 if (length[j] == i) { 430 perm[pp++] = j; 431 } 432 } 433 } 434 435 for (int i = MAX_CODE_LEN; --i > 0;) { 436 base[i] = 0; 437 limit[i] = 0; 438 } 439 440 for (int i = 0; i < alphaSize; i++) { 441 final int l = length[i]; 442 checkBounds(l, MAX_ALPHA_SIZE, "length"); 443 base[l + 1]++; 444 } 445 446 for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) { 447 b += base[i]; 448 base[i] = b; 449 } 450 451 for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) { 452 final int nb = base[i + 1]; 453 vec += nb - b; 454 b = nb; 455 limit[i] = vec - 1; 456 vec <<= 1; 457 } 458 459 for (int i = minLen + 1; i <= maxLen; i++) { 460 base[i] = ((limit[i - 1] + 1) << 1) - base[i]; 461 } 462 } 463 464 private void recvDecodingTables() throws IOException { 465 final BitInputStream bin = this.bin; 466 final Data dataShadow = this.data; 467 final boolean[] inUse = dataShadow.inUse; 468 final byte[] pos = dataShadow.recvDecodingTables_pos; 469 final byte[] selector = dataShadow.selector; 470 final byte[] selectorMtf = dataShadow.selectorMtf; 471 472 int inUse16 = 0; 473 474 /* Receive the mapping table */ 475 for (int i = 0; i < 16; i++) { 476 if (bsGetBit(bin)) { 477 inUse16 |= 1 << i; 478 } 479 } 480 481 Arrays.fill(inUse, false); 482 for (int i = 0; i < 16; i++) { 483 if ((inUse16 & (1 << i)) != 0) { 484 final int i16 = i << 4; 485 for (int j = 0; j < 16; j++) { 486 if (bsGetBit(bin)) { 487 inUse[i16 + j] = true; 488 } 489 } 490 } 491 } 492 493 makeMaps(); 494 final int alphaSize = this.nInUse + 2; 495 /* Now the selectors */ 496 final int nGroups = bsR(bin, 3); 497 final int selectors = bsR(bin, 15); 498 if (selectors < 0) { 499 throw new IOException("Corrupted input, nSelectors value negative"); 500 } 501 checkBounds(alphaSize, MAX_ALPHA_SIZE + 1, "alphaSize"); 502 checkBounds(nGroups, N_GROUPS + 1, "nGroups"); 503 504 // Don't fail on nSelectors overflowing boundaries but discard the values in overflow 505 // See https://gnu.wildebeest.org/blog/mjw/2019/08/02/bzip2-and-the-cve-that-wasnt/ 506 // and https://sourceware.org/ml/bzip2-devel/2019-q3/msg00007.html 507 508 for (int i = 0; i < selectors; i++) { 509 int j = 0; 510 while (bsGetBit(bin)) { 511 j++; 512 } 513 if (i < MAX_SELECTORS) { 514 selectorMtf[i] = (byte) j; 515 } 516 } 517 final int nSelectors = Math.min(selectors, MAX_SELECTORS); 518 519 /* Undo the MTF values for the selectors. */ 520 for (int v = nGroups; --v >= 0;) { 521 pos[v] = (byte) v; 522 } 523 524 for (int i = 0; i < nSelectors; i++) { 525 int v = selectorMtf[i] & 0xff; 526 checkBounds(v, N_GROUPS, "selectorMtf"); 527 final byte tmp = pos[v]; 528 while (v > 0) { 529 // nearly all times v is zero, 4 in most other cases 530 pos[v] = pos[v - 1]; 531 v--; 532 } 533 pos[0] = tmp; 534 selector[i] = tmp; 535 } 536 537 final char[][] len = dataShadow.temp_charArray2d; 538 539 /* Now the coding tables */ 540 for (int t = 0; t < nGroups; t++) { 541 int curr = bsR(bin, 5); 542 final char[] len_t = len[t]; 543 for (int i = 0; i < alphaSize; i++) { 544 while (bsGetBit(bin)) { 545 curr += bsGetBit(bin) ? -1 : 1; 546 } 547 len_t[i] = (char) curr; 548 } 549 } 550 551 // finally create the Huffman tables 552 createHuffmanDecodingTables(alphaSize, nGroups); 553 } 554 555 /** 556 * Called by recvDecodingTables() exclusively. 557 */ 558 private void createHuffmanDecodingTables(final int alphaSize, 559 final int nGroups) throws IOException { 560 final Data dataShadow = this.data; 561 final char[][] len = dataShadow.temp_charArray2d; 562 final int[] minLens = dataShadow.minLens; 563 final int[][] limit = dataShadow.limit; 564 final int[][] base = dataShadow.base; 565 final int[][] perm = dataShadow.perm; 566 567 for (int t = 0; t < nGroups; t++) { 568 int minLen = 32; 569 int maxLen = 0; 570 final char[] len_t = len[t]; 571 for (int i = alphaSize; --i >= 0;) { 572 final char lent = len_t[i]; 573 if (lent > maxLen) { 574 maxLen = lent; 575 } 576 if (lent < minLen) { 577 minLen = lent; 578 } 579 } 580 hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, 581 maxLen, alphaSize); 582 minLens[t] = minLen; 583 } 584 } 585 586 private void getAndMoveToFrontDecode() throws IOException { 587 final BitInputStream bin = this.bin; 588 this.origPtr = bsR(bin, 24); 589 recvDecodingTables(); 590 591 final Data dataShadow = this.data; 592 final byte[] ll8 = dataShadow.ll8; 593 final int[] unzftab = dataShadow.unzftab; 594 final byte[] selector = dataShadow.selector; 595 final byte[] seqToUnseq = dataShadow.seqToUnseq; 596 final char[] yy = dataShadow.getAndMoveToFrontDecode_yy; 597 final int[] minLens = dataShadow.minLens; 598 final int[][] limit = dataShadow.limit; 599 final int[][] base = dataShadow.base; 600 final int[][] perm = dataShadow.perm; 601 final int limitLast = this.blockSize100k * 100000; 602 603 /* 604 * Setting up the unzftab entries here is not strictly necessary, but it 605 * does save having to do it later in a separate pass, and so saves a 606 * block's worth of cache misses. 607 */ 608 for (int i = 256; --i >= 0;) { 609 yy[i] = (char) i; 610 unzftab[i] = 0; 611 } 612 613 int groupNo = 0; 614 int groupPos = G_SIZE - 1; 615 final int eob = this.nInUse + 1; 616 int nextSym = getAndMoveToFrontDecode0(); 617 int lastShadow = -1; 618 int zt = selector[groupNo] & 0xff; 619 checkBounds(zt, N_GROUPS, "zt"); 620 int[] base_zt = base[zt]; 621 int[] limit_zt = limit[zt]; 622 int[] perm_zt = perm[zt]; 623 int minLens_zt = minLens[zt]; 624 625 while (nextSym != eob) { 626 if ((nextSym == RUNA) || (nextSym == RUNB)) { 627 int s = -1; 628 629 for (int n = 1; true; n <<= 1) { 630 if (nextSym == RUNA) { 631 s += n; 632 } else if (nextSym == RUNB) { 633 s += n << 1; 634 } else { 635 break; 636 } 637 638 if (groupPos == 0) { 639 groupPos = G_SIZE - 1; 640 checkBounds(++groupNo, MAX_SELECTORS, "groupNo"); 641 zt = selector[groupNo] & 0xff; 642 checkBounds(zt, N_GROUPS, "zt"); 643 base_zt = base[zt]; 644 limit_zt = limit[zt]; 645 perm_zt = perm[zt]; 646 minLens_zt = minLens[zt]; 647 } else { 648 groupPos--; 649 } 650 651 int zn = minLens_zt; 652 checkBounds(zn, MAX_ALPHA_SIZE, "zn"); 653 int zvec = bsR(bin, zn); 654 while(zvec > limit_zt[zn]) { 655 checkBounds(++zn, MAX_ALPHA_SIZE, "zn"); 656 zvec = (zvec << 1) | bsR(bin, 1); 657 } 658 final int tmp = zvec - base_zt[zn]; 659 checkBounds(tmp, MAX_ALPHA_SIZE, "zvec"); 660 nextSym = perm_zt[tmp]; 661 } 662 checkBounds(s, this.data.ll8.length, "s"); 663 664 final int yy0 = yy[0]; 665 checkBounds(yy0, 256, "yy"); 666 final byte ch = seqToUnseq[yy0]; 667 unzftab[ch & 0xff] += s + 1; 668 669 final int from = ++lastShadow; 670 lastShadow += s; 671 checkBounds(lastShadow, this.data.ll8.length, "lastShadow"); 672 Arrays.fill(ll8, from, lastShadow + 1, ch); 673 674 if (lastShadow >= limitLast) { 675 throw new IOException("Block overrun while expanding RLE in MTF, " 676 + lastShadow + " exceeds " + limitLast); 677 } 678 } else { 679 if (++lastShadow >= limitLast) { 680 throw new IOException("Block overrun in MTF, " 681 + lastShadow + " exceeds " + limitLast); 682 } 683 checkBounds(nextSym, 256 + 1, "nextSym"); 684 685 final char tmp = yy[nextSym - 1]; 686 checkBounds(tmp, 256, "yy"); 687 unzftab[seqToUnseq[tmp] & 0xff]++; 688 ll8[lastShadow] = seqToUnseq[tmp]; 689 690 /* 691 * This loop is hammered during decompression, hence avoid 692 * native method call overhead of System.arraycopy for very 693 * small ranges to copy. 694 */ 695 if (nextSym <= 16) { 696 for (int j = nextSym - 1; j > 0;) { 697 yy[j] = yy[--j]; 698 } 699 } else { 700 System.arraycopy(yy, 0, yy, 1, nextSym - 1); 701 } 702 703 yy[0] = tmp; 704 705 if (groupPos == 0) { 706 groupPos = G_SIZE - 1; 707 checkBounds(++groupNo, MAX_SELECTORS, "groupNo"); 708 zt = selector[groupNo] & 0xff; 709 checkBounds(zt, N_GROUPS, "zt"); 710 base_zt = base[zt]; 711 limit_zt = limit[zt]; 712 perm_zt = perm[zt]; 713 minLens_zt = minLens[zt]; 714 } else { 715 groupPos--; 716 } 717 718 int zn = minLens_zt; 719 checkBounds(zn, MAX_ALPHA_SIZE, "zn"); 720 int zvec = bsR(bin, zn); 721 while(zvec > limit_zt[zn]) { 722 checkBounds(++zn, MAX_ALPHA_SIZE, "zn"); 723 zvec = (zvec << 1) | bsR(bin, 1); 724 } 725 final int idx = zvec - base_zt[zn]; 726 checkBounds(idx, MAX_ALPHA_SIZE, "zvec"); 727 nextSym = perm_zt[idx]; 728 } 729 } 730 731 this.last = lastShadow; 732 } 733 734 private int getAndMoveToFrontDecode0() throws IOException { 735 final Data dataShadow = this.data; 736 final int zt = dataShadow.selector[0] & 0xff; 737 checkBounds(zt, N_GROUPS, "zt"); 738 final int[] limit_zt = dataShadow.limit[zt]; 739 int zn = dataShadow.minLens[zt]; 740 checkBounds(zn, MAX_ALPHA_SIZE, "zn"); 741 int zvec = bsR(bin, zn); 742 while (zvec > limit_zt[zn]) { 743 checkBounds(++zn, MAX_ALPHA_SIZE, "zn"); 744 zvec = (zvec << 1) | bsR(bin, 1); 745 } 746 final int tmp = zvec - dataShadow.base[zt][zn]; 747 checkBounds(tmp, MAX_ALPHA_SIZE, "zvec"); 748 749 return dataShadow.perm[zt][tmp]; 750 } 751 752 private int setupBlock() throws IOException { 753 if (currentState == EOF || this.data == null) { 754 return -1; 755 } 756 757 final int[] cftab = this.data.cftab; 758 final int ttLen = this.last + 1; 759 final int[] tt = this.data.initTT(ttLen); 760 final byte[] ll8 = this.data.ll8; 761 cftab[0] = 0; 762 System.arraycopy(this.data.unzftab, 0, cftab, 1, 256); 763 764 for (int i = 1, c = cftab[0]; i <= 256; i++) { 765 c += cftab[i]; 766 cftab[i] = c; 767 } 768 769 for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { 770 final int tmp = cftab[ll8[i] & 0xff]++; 771 checkBounds(tmp, ttLen, "tt index"); 772 tt[tmp] = i; 773 } 774 775 if ((this.origPtr < 0) || (this.origPtr >= tt.length)) { 776 throw new IOException("Stream corrupted"); 777 } 778 779 this.su_tPos = tt[this.origPtr]; 780 this.su_count = 0; 781 this.su_i2 = 0; 782 this.su_ch2 = 256; /* not a char and not EOF */ 783 784 if (this.blockRandomised) { 785 this.su_rNToGo = 0; 786 this.su_rTPos = 0; 787 return setupRandPartA(); 788 } 789 return setupNoRandPartA(); 790 } 791 792 private int setupRandPartA() throws IOException { 793 if (this.su_i2 <= this.last) { 794 this.su_chPrev = this.su_ch2; 795 int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; 796 checkBounds(this.su_tPos, this.data.tt.length, "su_tPos"); 797 this.su_tPos = this.data.tt[this.su_tPos]; 798 if (this.su_rNToGo == 0) { 799 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1; 800 if (++this.su_rTPos == 512) { 801 this.su_rTPos = 0; 802 } 803 } else { 804 this.su_rNToGo--; 805 } 806 this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0; 807 this.su_i2++; 808 this.currentState = RAND_PART_B_STATE; 809 this.crc.updateCRC(su_ch2Shadow); 810 return su_ch2Shadow; 811 } 812 endBlock(); 813 initBlock(); 814 return setupBlock(); 815 } 816 817 private int setupNoRandPartA() throws IOException { 818 if (this.su_i2 <= this.last) { 819 this.su_chPrev = this.su_ch2; 820 final int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; 821 this.su_ch2 = su_ch2Shadow; 822 checkBounds(this.su_tPos, this.data.tt.length, "su_tPos"); 823 this.su_tPos = this.data.tt[this.su_tPos]; 824 this.su_i2++; 825 this.currentState = NO_RAND_PART_B_STATE; 826 this.crc.updateCRC(su_ch2Shadow); 827 return su_ch2Shadow; 828 } 829 this.currentState = NO_RAND_PART_A_STATE; 830 endBlock(); 831 initBlock(); 832 return setupBlock(); 833 } 834 835 private int setupRandPartB() throws IOException { 836 if (this.su_ch2 != this.su_chPrev) { 837 this.currentState = RAND_PART_A_STATE; 838 this.su_count = 1; 839 return setupRandPartA(); 840 } 841 if (++this.su_count < 4) { 842 this.currentState = RAND_PART_A_STATE; 843 return setupRandPartA(); 844 } 845 this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); 846 checkBounds(this.su_tPos, this.data.tt.length, "su_tPos"); 847 this.su_tPos = this.data.tt[this.su_tPos]; 848 if (this.su_rNToGo == 0) { 849 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1; 850 if (++this.su_rTPos == 512) { 851 this.su_rTPos = 0; 852 } 853 } else { 854 this.su_rNToGo--; 855 } 856 this.su_j2 = 0; 857 this.currentState = RAND_PART_C_STATE; 858 if (this.su_rNToGo == 1) { 859 this.su_z ^= 1; 860 } 861 return setupRandPartC(); 862 } 863 864 private int setupRandPartC() throws IOException { 865 if (this.su_j2 < this.su_z) { 866 this.crc.updateCRC(this.su_ch2); 867 this.su_j2++; 868 return this.su_ch2; 869 } 870 this.currentState = RAND_PART_A_STATE; 871 this.su_i2++; 872 this.su_count = 0; 873 return setupRandPartA(); 874 } 875 876 private int setupNoRandPartB() throws IOException { 877 if (this.su_ch2 != this.su_chPrev) { 878 this.su_count = 1; 879 return setupNoRandPartA(); 880 } 881 if (++this.su_count >= 4) { 882 checkBounds(this.su_tPos, this.data.ll8.length, "su_tPos"); 883 this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); 884 this.su_tPos = this.data.tt[this.su_tPos]; 885 this.su_j2 = 0; 886 return setupNoRandPartC(); 887 } 888 return setupNoRandPartA(); 889 } 890 891 private int setupNoRandPartC() throws IOException { 892 if (this.su_j2 < this.su_z) { 893 final int su_ch2Shadow = this.su_ch2; 894 this.crc.updateCRC(su_ch2Shadow); 895 this.su_j2++; 896 this.currentState = NO_RAND_PART_C_STATE; 897 return su_ch2Shadow; 898 } 899 this.su_i2++; 900 this.su_count = 0; 901 return setupNoRandPartA(); 902 } 903 904 private static final class Data { 905 906 // (with blockSize 900k) 907 final boolean[] inUse = new boolean[256]; // 256 byte 908 909 final byte[] seqToUnseq = new byte[256]; // 256 byte 910 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte 911 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte 912 913 /** 914 * Freq table collected to save a pass over the data during 915 * decompression. 916 */ 917 final int[] unzftab = new int[256]; // 1024 byte 918 919 final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 920 final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 921 final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 922 final int[] minLens = new int[N_GROUPS]; // 24 byte 923 924 final int[] cftab = new int[257]; // 1028 byte 925 final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte 926 final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096 927 // byte 928 final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte 929 // --------------- 930 // 60798 byte 931 932 int[] tt; // 3600000 byte 933 final byte[] ll8; // 900000 byte 934 935 // --------------- 936 // 4560782 byte 937 // =============== 938 939 Data(final int blockSize100k) { 940 this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE]; 941 } 942 943 /** 944 * Initializes the {@link #tt} array. 945 * 946 * This method is called when the required length of the array is known. 947 * I don't initialize it at construction time to avoid unnecessary 948 * memory allocation when compressing small files. 949 */ 950 int[] initTT(final int length) { 951 int[] ttShadow = this.tt; 952 953 // tt.length should always be >= length, but theoretically 954 // it can happen, if the compressor mixed small and large 955 // blocks. Normally only the last block will be smaller 956 // than others. 957 if ((ttShadow == null) || (ttShadow.length < length)) { 958 this.tt = ttShadow = new int[length]; 959 } 960 961 return ttShadow; 962 } 963 964 } 965 966 /** 967 * Checks if the signature matches what is expected for a bzip2 file. 968 * 969 * @param signature 970 * the bytes to check 971 * @param length 972 * the number of bytes to check 973 * @return true, if this stream is a bzip2 compressed stream, false otherwise 974 * 975 * @since 1.1 976 */ 977 public static boolean matches(final byte[] signature, final int length) { 978 return length >= 3 && signature[0] == 'B' && 979 signature[1] == 'Z' && signature[2] == 'h'; 980 } 981}