001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 020/* 021 * This package is based on the work done by Keiron Liddle, Aftex Software 022 * <keiron@aftexsw.com> to whom the Ant project is very grateful for his 023 * great code. 024 */ 025package org.apache.commons.compress.compressors.bzip2; 026 027import java.io.IOException; 028import java.io.InputStream; 029import java.nio.ByteOrder; 030import java.util.Arrays; 031 032import org.apache.commons.compress.compressors.CompressorInputStream; 033import org.apache.commons.compress.utils.BitInputStream; 034import org.apache.commons.compress.utils.InputStreamStatistics; 035import org.apache.commons.io.input.CloseShieldInputStream; 036 037/** 038 * An input stream that decompresses from the BZip2 format to be read as any other stream. 039 * 040 * @NotThreadSafe 041 */ 042public class BZip2CompressorInputStream extends CompressorInputStream implements BZip2Constants, InputStreamStatistics { 043 044 private static final class Data { 045 046 // (with blockSize 900k) 047 final boolean[] inUse = new boolean[256]; // 256 byte 048 049 final byte[] seqToUnseq = new byte[256]; // 256 byte 050 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte 051 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte 052 053 /** 054 * Freq table collected to save a pass over the data during decompression. 055 */ 056 final int[] unzftab = new int[256]; // 1024 byte 057 058 final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 059 final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 060 final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 061 final int[] minLens = new int[N_GROUPS]; // 24 byte 062 063 final int[] cftab = new int[257]; // 1028 byte 064 final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte 065 final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096 066 // byte 067 final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte 068 // --------------- 069 // 60798 byte 070 071 int[] tt; // 3600000 byte 072 final byte[] ll8; // 900000 byte 073 074 // --------------- 075 // 4560782 byte 076 // =============== 077 078 Data(final int blockSize100k) { 079 this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE]; 080 } 081 082 /** 083 * Initializes the {@link #tt} array. 084 * 085 * This method is called when the required length of the array is known. I don't initialize it at construction time to avoid unnecessary memory 086 * allocation when compressing small files. 087 */ 088 int[] initTT(final int length) { 089 int[] ttShadow = this.tt; 090 091 // tt.length should always be >= length, but theoretically 092 // it can happen, if the compressor mixed small and large 093 // blocks. Normally only the last block will be smaller 094 // than others. 095 if (ttShadow == null || ttShadow.length < length) { 096 this.tt = ttShadow = new int[length]; 097 } 098 099 return ttShadow; 100 } 101 102 } 103 104 private static final int EOF = 0; 105 106 private static final int START_BLOCK_STATE = 1; 107 108 private static final int RAND_PART_A_STATE = 2; 109 110 private static final int RAND_PART_B_STATE = 3; 111 112 private static final int RAND_PART_C_STATE = 4; 113 114 private static final int NO_RAND_PART_A_STATE = 5; 115 private static final int NO_RAND_PART_B_STATE = 6; 116 117 private static final int NO_RAND_PART_C_STATE = 7; 118 119 private static boolean bsGetBit(final BitInputStream bin) throws IOException { 120 return bsR(bin, 1) != 0; 121 } 122 123 private static int bsGetInt(final BitInputStream bin) throws IOException { 124 return bsR(bin, 32); 125 } 126 127 private static char bsGetUByte(final BitInputStream bin) throws IOException { 128 return (char) bsR(bin, 8); 129 } 130 131 /** 132 * read bits from the input stream 133 * 134 * @param n the number of bits to read, must not exceed 32? 135 * @return the requested bits combined into an int 136 * @throws IOException 137 */ 138 private static int bsR(final BitInputStream bin, final int n) throws IOException { 139 final long thech = bin.readBits(n); 140 if (thech < 0) { 141 throw new IOException("Unexpected end of stream"); 142 } 143 return (int) thech; 144 } 145 146 private static void checkBounds(final int checkVal, final int limitExclusive, final String name) throws IOException { 147 if (checkVal < 0) { 148 throw new IOException("Corrupted input, " + name + " value negative"); 149 } 150 if (checkVal >= limitExclusive) { 151 throw new IOException("Corrupted input, " + name + " value too big"); 152 } 153 } 154 155 /** 156 * Called by createHuffmanDecodingTables() exclusively. 157 */ 158 private static void hbCreateDecodeTables(final int[] limit, final int[] base, final int[] perm, final char[] length, final int minLen, final int maxLen, 159 final int alphaSize) throws IOException { 160 for (int i = minLen, pp = 0; i <= maxLen; i++) { 161 for (int j = 0; j < alphaSize; j++) { 162 if (length[j] == i) { 163 perm[pp++] = j; 164 } 165 } 166 } 167 168 for (int i = MAX_CODE_LEN; --i > 0;) { 169 base[i] = 0; 170 limit[i] = 0; 171 } 172 173 for (int i = 0; i < alphaSize; i++) { 174 final int l = length[i]; 175 checkBounds(l, MAX_ALPHA_SIZE, "length"); 176 base[l + 1]++; 177 } 178 179 for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) { 180 b += base[i]; 181 base[i] = b; 182 } 183 184 for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) { 185 final int nb = base[i + 1]; 186 vec += nb - b; 187 b = nb; 188 limit[i] = vec - 1; 189 vec <<= 1; 190 } 191 192 for (int i = minLen + 1; i <= maxLen; i++) { 193 base[i] = (limit[i - 1] + 1 << 1) - base[i]; 194 } 195 } 196 197 /** 198 * Checks if the signature matches what is expected for a bzip2 file. 199 * 200 * @param signature the bytes to check 201 * @param length the number of bytes to check 202 * @return true, if this stream is a bzip2 compressed stream, false otherwise 203 * 204 * @since 1.1 205 */ 206 public static boolean matches(final byte[] signature, final int length) { 207 return length >= 3 && signature[0] == 'B' && signature[1] == 'Z' && signature[2] == 'h'; 208 } 209 210 /** 211 * Index of the last char in the block, so the block size == last + 1. 212 */ 213 private int last; 214 215 /** 216 * Index in zptr[] of original string after sorting. 217 */ 218 private int origPtr; 219 /** 220 * always: in the range 0 .. 9. The current block size is 100000 * this number. 221 */ 222 private int blockSize100k; 223 224 // Variables used by setup* methods exclusively 225 226 private boolean blockRandomised; 227 private final CRC crc = new CRC(); 228 private int nInUse; 229 private BitInputStream bin; 230 private final boolean decompressConcatenated; 231 private int currentState = START_BLOCK_STATE; 232 private int storedBlockCRC, storedCombinedCRC; 233 private int computedCombinedCRC; 234 private int su_count; 235 236 private int su_ch2; 237 238 private int su_chPrev; 239 240 private int su_i2; 241 242 private int su_j2; 243 244 private int su_rNToGo; 245 246 private int su_rTPos; 247 248 private int su_tPos; 249 250 private char su_z; 251 252 /** 253 * All memory intensive stuff. This field is initialized by initBlock(). 254 */ 255 private BZip2CompressorInputStream.Data data; 256 257 /** 258 * Constructs a new BZip2CompressorInputStream which decompresses bytes read from the specified stream. This doesn't support decompressing concatenated .bz2 259 * files. 260 * 261 * @param in the InputStream from which this object should be created 262 * @throws IOException if the stream content is malformed or an I/O error occurs. 263 * @throws NullPointerException if {@code in == null} 264 */ 265 public BZip2CompressorInputStream(final InputStream in) throws IOException { 266 this(in, false); 267 } 268 269 /** 270 * Constructs a new BZip2CompressorInputStream which decompresses bytes read from the specified stream. 271 * 272 * @param in the InputStream from which this object should be created 273 * @param decompressConcatenated if true, decompress until the end of the input; if false, stop after the first .bz2 stream and leave the input position to 274 * point to the next byte after the .bz2 stream 275 * 276 * @throws IOException if {@code in == null}, the stream content is malformed, or an I/O error occurs. 277 */ 278 public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException { 279 this.bin = new BitInputStream(in == System.in ? CloseShieldInputStream.wrap(in) : in, ByteOrder.BIG_ENDIAN); 280 this.decompressConcatenated = decompressConcatenated; 281 282 init(true); 283 initBlock(); 284 } 285 286 @Override 287 public void close() throws IOException { 288 final BitInputStream inShadow = this.bin; 289 if (inShadow != null) { 290 try { 291 inShadow.close(); 292 } finally { 293 this.data = null; 294 this.bin = null; 295 } 296 } 297 } 298 299 private boolean complete() throws IOException { 300 this.storedCombinedCRC = bsGetInt(bin); 301 this.currentState = EOF; 302 this.data = null; 303 304 if (this.storedCombinedCRC != this.computedCombinedCRC) { 305 throw new IOException("BZip2 CRC error"); 306 } 307 308 // Look for the next .bz2 stream if decompressing 309 // concatenated files. 310 return !decompressConcatenated || !init(false); 311 } 312 313 /** 314 * Called by recvDecodingTables() exclusively. 315 */ 316 private void createHuffmanDecodingTables(final int alphaSize, final int nGroups) throws IOException { 317 final Data dataShadow = this.data; 318 final char[][] len = dataShadow.temp_charArray2d; 319 final int[] minLens = dataShadow.minLens; 320 final int[][] limit = dataShadow.limit; 321 final int[][] base = dataShadow.base; 322 final int[][] perm = dataShadow.perm; 323 324 for (int t = 0; t < nGroups; t++) { 325 int minLen = 32; 326 int maxLen = 0; 327 final char[] len_t = len[t]; 328 for (int i = alphaSize; --i >= 0;) { 329 final char lent = len_t[i]; 330 if (lent > maxLen) { 331 maxLen = lent; 332 } 333 if (lent < minLen) { 334 minLen = lent; 335 } 336 } 337 hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, maxLen, alphaSize); 338 minLens[t] = minLen; 339 } 340 } 341 342 private void endBlock() throws IOException { 343 final int computedBlockCRC = this.crc.getValue(); 344 345 // A bad CRC is considered a fatal error. 346 if (this.storedBlockCRC != computedBlockCRC) { 347 // make next blocks readable without error 348 // (repair feature, not yet documented, not tested) 349 this.computedCombinedCRC = this.storedCombinedCRC << 1 | this.storedCombinedCRC >>> 31; 350 this.computedCombinedCRC ^= this.storedBlockCRC; 351 352 throw new IOException("BZip2 CRC error"); 353 } 354 355 this.computedCombinedCRC = this.computedCombinedCRC << 1 | this.computedCombinedCRC >>> 31; 356 this.computedCombinedCRC ^= computedBlockCRC; 357 } 358 359 private void getAndMoveToFrontDecode() throws IOException { 360 final BitInputStream bin = this.bin; 361 this.origPtr = bsR(bin, 24); 362 recvDecodingTables(); 363 364 final Data dataShadow = this.data; 365 final byte[] ll8 = dataShadow.ll8; 366 final int[] unzftab = dataShadow.unzftab; 367 final byte[] selector = dataShadow.selector; 368 final byte[] seqToUnseq = dataShadow.seqToUnseq; 369 final char[] yy = dataShadow.getAndMoveToFrontDecode_yy; 370 final int[] minLens = dataShadow.minLens; 371 final int[][] limit = dataShadow.limit; 372 final int[][] base = dataShadow.base; 373 final int[][] perm = dataShadow.perm; 374 final int limitLast = this.blockSize100k * 100000; 375 376 /* 377 * Setting up the unzftab entries here is not strictly necessary, but it does save having to do it later in a separate pass, and so saves a block's 378 * worth of cache misses. 379 */ 380 for (int i = 256; --i >= 0;) { 381 yy[i] = (char) i; 382 unzftab[i] = 0; 383 } 384 385 int groupNo = 0; 386 int groupPos = G_SIZE - 1; 387 final int eob = this.nInUse + 1; 388 int nextSym = getAndMoveToFrontDecode0(); 389 int lastShadow = -1; 390 int zt = selector[groupNo] & 0xff; 391 checkBounds(zt, N_GROUPS, "zt"); 392 int[] base_zt = base[zt]; 393 int[] limit_zt = limit[zt]; 394 int[] perm_zt = perm[zt]; 395 int minLens_zt = minLens[zt]; 396 397 while (nextSym != eob) { 398 if (nextSym == RUNA || nextSym == RUNB) { 399 int s = -1; 400 401 for (int n = 1; true; n <<= 1) { 402 if (nextSym == RUNA) { 403 s += n; 404 } else if (nextSym == RUNB) { 405 s += n << 1; 406 } else { 407 break; 408 } 409 410 if (groupPos == 0) { 411 groupPos = G_SIZE - 1; 412 checkBounds(++groupNo, MAX_SELECTORS, "groupNo"); 413 zt = selector[groupNo] & 0xff; 414 checkBounds(zt, N_GROUPS, "zt"); 415 base_zt = base[zt]; 416 limit_zt = limit[zt]; 417 perm_zt = perm[zt]; 418 minLens_zt = minLens[zt]; 419 } else { 420 groupPos--; 421 } 422 423 int zn = minLens_zt; 424 checkBounds(zn, MAX_ALPHA_SIZE, "zn"); 425 int zvec = bsR(bin, zn); 426 while (zvec > limit_zt[zn]) { 427 checkBounds(++zn, MAX_ALPHA_SIZE, "zn"); 428 zvec = zvec << 1 | bsR(bin, 1); 429 } 430 final int tmp = zvec - base_zt[zn]; 431 checkBounds(tmp, MAX_ALPHA_SIZE, "zvec"); 432 nextSym = perm_zt[tmp]; 433 } 434 checkBounds(s, this.data.ll8.length, "s"); 435 436 final int yy0 = yy[0]; 437 checkBounds(yy0, 256, "yy"); 438 final byte ch = seqToUnseq[yy0]; 439 unzftab[ch & 0xff] += s + 1; 440 441 final int from = ++lastShadow; 442 lastShadow += s; 443 checkBounds(lastShadow, this.data.ll8.length, "lastShadow"); 444 Arrays.fill(ll8, from, lastShadow + 1, ch); 445 446 if (lastShadow >= limitLast) { 447 throw new IOException("Block overrun while expanding RLE in MTF, " + lastShadow + " exceeds " + limitLast); 448 } 449 } else { 450 if (++lastShadow >= limitLast) { 451 throw new IOException("Block overrun in MTF, " + lastShadow + " exceeds " + limitLast); 452 } 453 checkBounds(nextSym, 256 + 1, "nextSym"); 454 455 final char tmp = yy[nextSym - 1]; 456 checkBounds(tmp, 256, "yy"); 457 unzftab[seqToUnseq[tmp] & 0xff]++; 458 ll8[lastShadow] = seqToUnseq[tmp]; 459 460 /* 461 * This loop is hammered during decompression, hence avoid native method call overhead of System.arraycopy for very small ranges to copy. 462 */ 463 if (nextSym <= 16) { 464 for (int j = nextSym - 1; j > 0;) { 465 yy[j] = yy[--j]; 466 } 467 } else { 468 System.arraycopy(yy, 0, yy, 1, nextSym - 1); 469 } 470 471 yy[0] = tmp; 472 473 if (groupPos == 0) { 474 groupPos = G_SIZE - 1; 475 checkBounds(++groupNo, MAX_SELECTORS, "groupNo"); 476 zt = selector[groupNo] & 0xff; 477 checkBounds(zt, N_GROUPS, "zt"); 478 base_zt = base[zt]; 479 limit_zt = limit[zt]; 480 perm_zt = perm[zt]; 481 minLens_zt = minLens[zt]; 482 } else { 483 groupPos--; 484 } 485 486 int zn = minLens_zt; 487 checkBounds(zn, MAX_ALPHA_SIZE, "zn"); 488 int zvec = bsR(bin, zn); 489 while (zvec > limit_zt[zn]) { 490 checkBounds(++zn, MAX_ALPHA_SIZE, "zn"); 491 zvec = zvec << 1 | bsR(bin, 1); 492 } 493 final int idx = zvec - base_zt[zn]; 494 checkBounds(idx, MAX_ALPHA_SIZE, "zvec"); 495 nextSym = perm_zt[idx]; 496 } 497 } 498 499 this.last = lastShadow; 500 } 501 502 private int getAndMoveToFrontDecode0() throws IOException { 503 final Data dataShadow = this.data; 504 final int zt = dataShadow.selector[0] & 0xff; 505 checkBounds(zt, N_GROUPS, "zt"); 506 final int[] limit_zt = dataShadow.limit[zt]; 507 int zn = dataShadow.minLens[zt]; 508 checkBounds(zn, MAX_ALPHA_SIZE, "zn"); 509 int zvec = bsR(bin, zn); 510 while (zvec > limit_zt[zn]) { 511 checkBounds(++zn, MAX_ALPHA_SIZE, "zn"); 512 zvec = zvec << 1 | bsR(bin, 1); 513 } 514 final int tmp = zvec - dataShadow.base[zt][zn]; 515 checkBounds(tmp, MAX_ALPHA_SIZE, "zvec"); 516 517 return dataShadow.perm[zt][tmp]; 518 } 519 520 /** 521 * @since 1.17 522 */ 523 @Override 524 public long getCompressedCount() { 525 return bin.getBytesRead(); 526 } 527 528 private boolean init(final boolean isFirstStream) throws IOException { 529 if (null == bin) { 530 throw new IOException("No InputStream"); 531 } 532 533 if (!isFirstStream) { 534 bin.clearBitCache(); 535 } 536 537 final int magic0 = readNextByte(this.bin); 538 if (magic0 == -1 && !isFirstStream) { 539 return false; 540 } 541 final int magic1 = readNextByte(this.bin); 542 final int magic2 = readNextByte(this.bin); 543 544 if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') { 545 throw new IOException(isFirstStream ? "Stream is not in the BZip2 format" : "Garbage after a valid BZip2 stream"); 546 } 547 548 final int blockSize = readNextByte(this.bin); 549 if (blockSize < '1' || blockSize > '9') { 550 throw new IOException("BZip2 block size is invalid"); 551 } 552 553 this.blockSize100k = blockSize - '0'; 554 555 this.computedCombinedCRC = 0; 556 557 return true; 558 } 559 560 private void initBlock() throws IOException { 561 final BitInputStream bin = this.bin; 562 char magic0; 563 char magic1; 564 char magic2; 565 char magic3; 566 char magic4; 567 char magic5; 568 569 while (true) { 570 // Get the block magic bytes. 571 magic0 = bsGetUByte(bin); 572 magic1 = bsGetUByte(bin); 573 magic2 = bsGetUByte(bin); 574 magic3 = bsGetUByte(bin); 575 magic4 = bsGetUByte(bin); 576 magic5 = bsGetUByte(bin); 577 578 // If isn't end of stream magic, break out of the loop. 579 if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45 || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) { 580 break; 581 } 582 583 // End of stream was reached. Check the combined CRC and 584 // advance to the next .bz2 stream if decoding concatenated 585 // streams. 586 if (complete()) { 587 return; 588 } 589 } 590 591 if (magic0 != 0x31 || // '1' 592 magic1 != 0x41 || // ')' 593 magic2 != 0x59 || // 'Y' 594 magic3 != 0x26 || // '&' 595 magic4 != 0x53 || // 'S' 596 magic5 != 0x59 // 'Y' 597 ) { 598 this.currentState = EOF; 599 throw new IOException("Bad block header"); 600 } 601 this.storedBlockCRC = bsGetInt(bin); 602 this.blockRandomised = bsR(bin, 1) == 1; 603 604 /* 605 * Allocate data here instead in constructor, so we do not allocate it if the input file is empty. 606 */ 607 if (this.data == null) { 608 this.data = new Data(this.blockSize100k); 609 } 610 611 // currBlockNo++; 612 getAndMoveToFrontDecode(); 613 614 this.crc.reset(); 615 this.currentState = START_BLOCK_STATE; 616 } 617 618 private void makeMaps() { 619 final boolean[] inUse = this.data.inUse; 620 final byte[] seqToUnseq = this.data.seqToUnseq; 621 622 int nInUseShadow = 0; 623 624 for (int i = 0; i < 256; i++) { 625 if (inUse[i]) { 626 seqToUnseq[nInUseShadow++] = (byte) i; 627 } 628 } 629 630 this.nInUse = nInUseShadow; 631 } 632 633 @Override 634 public int read() throws IOException { 635 if (this.bin != null) { 636 final int r = read0(); 637 count(r < 0 ? -1 : 1); 638 return r; 639 } 640 throw new IOException("Stream closed"); 641 } 642 643 /* 644 * (non-Javadoc) 645 * 646 * @see java.io.InputStream#read(byte[], int, int) 647 */ 648 @Override 649 public int read(final byte[] dest, final int offs, final int len) throws IOException { 650 if (offs < 0) { 651 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); 652 } 653 if (len < 0) { 654 throw new IndexOutOfBoundsException("len(" + len + ") < 0."); 655 } 656 if (offs + len > dest.length) { 657 throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + len + ") > dest.length(" + dest.length + ")."); 658 } 659 if (this.bin == null) { 660 throw new IOException("Stream closed"); 661 } 662 if (len == 0) { 663 return 0; 664 } 665 666 final int hi = offs + len; 667 int destOffs = offs; 668 int b; 669 while (destOffs < hi && (b = read0()) >= 0) { 670 dest[destOffs++] = (byte) b; 671 count(1); 672 } 673 674 return destOffs == offs ? -1 : destOffs - offs; 675 } 676 677 private int read0() throws IOException { 678 switch (currentState) { 679 case EOF: 680 return -1; 681 682 case START_BLOCK_STATE: 683 return setupBlock(); 684 685 case RAND_PART_A_STATE: 686 throw new IllegalStateException(); 687 688 case RAND_PART_B_STATE: 689 return setupRandPartB(); 690 691 case RAND_PART_C_STATE: 692 return setupRandPartC(); 693 694 case NO_RAND_PART_A_STATE: 695 throw new IllegalStateException(); 696 697 case NO_RAND_PART_B_STATE: 698 return setupNoRandPartB(); 699 700 case NO_RAND_PART_C_STATE: 701 return setupNoRandPartC(); 702 703 default: 704 throw new IllegalStateException(); 705 } 706 } 707 708 private int readNextByte(final BitInputStream in) throws IOException { 709 final long b = in.readBits(8); 710 return (int) b; 711 } 712 713 private void recvDecodingTables() throws IOException { 714 final BitInputStream bin = this.bin; 715 final Data dataShadow = this.data; 716 final boolean[] inUse = dataShadow.inUse; 717 final byte[] pos = dataShadow.recvDecodingTables_pos; 718 final byte[] selector = dataShadow.selector; 719 final byte[] selectorMtf = dataShadow.selectorMtf; 720 721 int inUse16 = 0; 722 723 /* Receive the mapping table */ 724 for (int i = 0; i < 16; i++) { 725 if (bsGetBit(bin)) { 726 inUse16 |= 1 << i; 727 } 728 } 729 730 Arrays.fill(inUse, false); 731 for (int i = 0; i < 16; i++) { 732 if ((inUse16 & 1 << i) != 0) { 733 final int i16 = i << 4; 734 for (int j = 0; j < 16; j++) { 735 if (bsGetBit(bin)) { 736 inUse[i16 + j] = true; 737 } 738 } 739 } 740 } 741 742 makeMaps(); 743 final int alphaSize = this.nInUse + 2; 744 /* Now the selectors */ 745 final int nGroups = bsR(bin, 3); 746 final int selectors = bsR(bin, 15); 747 if (selectors < 0) { 748 throw new IOException("Corrupted input, nSelectors value negative"); 749 } 750 checkBounds(alphaSize, MAX_ALPHA_SIZE + 1, "alphaSize"); 751 checkBounds(nGroups, N_GROUPS + 1, "nGroups"); 752 753 // Don't fail on nSelectors overflowing boundaries but discard the values in overflow 754 // See https://gnu.wildebeest.org/blog/mjw/2019/08/02/bzip2-and-the-cve-that-wasnt/ 755 // and https://sourceware.org/ml/bzip2-devel/2019-q3/msg00007.html 756 757 for (int i = 0; i < selectors; i++) { 758 int j = 0; 759 while (bsGetBit(bin)) { 760 j++; 761 } 762 if (i < MAX_SELECTORS) { 763 selectorMtf[i] = (byte) j; 764 } 765 } 766 final int nSelectors = Math.min(selectors, MAX_SELECTORS); 767 768 /* Undo the MTF values for the selectors. */ 769 for (int v = nGroups; --v >= 0;) { 770 pos[v] = (byte) v; 771 } 772 773 for (int i = 0; i < nSelectors; i++) { 774 int v = selectorMtf[i] & 0xff; 775 checkBounds(v, N_GROUPS, "selectorMtf"); 776 final byte tmp = pos[v]; 777 while (v > 0) { 778 // nearly all times v is zero, 4 in most other cases 779 pos[v] = pos[v - 1]; 780 v--; 781 } 782 pos[0] = tmp; 783 selector[i] = tmp; 784 } 785 786 final char[][] len = dataShadow.temp_charArray2d; 787 788 /* Now the coding tables */ 789 for (int t = 0; t < nGroups; t++) { 790 int curr = bsR(bin, 5); 791 final char[] len_t = len[t]; 792 for (int i = 0; i < alphaSize; i++) { 793 while (bsGetBit(bin)) { 794 curr += bsGetBit(bin) ? -1 : 1; 795 } 796 len_t[i] = (char) curr; 797 } 798 } 799 800 // finally create the Huffman tables 801 createHuffmanDecodingTables(alphaSize, nGroups); 802 } 803 804 private int setupBlock() throws IOException { 805 if (currentState == EOF || this.data == null) { 806 return -1; 807 } 808 809 final int[] cftab = this.data.cftab; 810 final int ttLen = this.last + 1; 811 final int[] tt = this.data.initTT(ttLen); 812 final byte[] ll8 = this.data.ll8; 813 cftab[0] = 0; 814 System.arraycopy(this.data.unzftab, 0, cftab, 1, 256); 815 816 for (int i = 1, c = cftab[0]; i <= 256; i++) { 817 c += cftab[i]; 818 cftab[i] = c; 819 } 820 821 for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { 822 final int tmp = cftab[ll8[i] & 0xff]++; 823 checkBounds(tmp, ttLen, "tt index"); 824 tt[tmp] = i; 825 } 826 827 if (this.origPtr < 0 || this.origPtr >= tt.length) { 828 throw new IOException("Stream corrupted"); 829 } 830 831 this.su_tPos = tt[this.origPtr]; 832 this.su_count = 0; 833 this.su_i2 = 0; 834 this.su_ch2 = 256; /* not a char and not EOF */ 835 836 if (this.blockRandomised) { 837 this.su_rNToGo = 0; 838 this.su_rTPos = 0; 839 return setupRandPartA(); 840 } 841 return setupNoRandPartA(); 842 } 843 844 private int setupNoRandPartA() throws IOException { 845 if (this.su_i2 <= this.last) { 846 this.su_chPrev = this.su_ch2; 847 final int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; 848 this.su_ch2 = su_ch2Shadow; 849 checkBounds(this.su_tPos, this.data.tt.length, "su_tPos"); 850 this.su_tPos = this.data.tt[this.su_tPos]; 851 this.su_i2++; 852 this.currentState = NO_RAND_PART_B_STATE; 853 this.crc.update(su_ch2Shadow); 854 return su_ch2Shadow; 855 } 856 this.currentState = NO_RAND_PART_A_STATE; 857 endBlock(); 858 initBlock(); 859 return setupBlock(); 860 } 861 862 private int setupNoRandPartB() throws IOException { 863 if (this.su_ch2 != this.su_chPrev) { 864 this.su_count = 1; 865 return setupNoRandPartA(); 866 } 867 if (++this.su_count >= 4) { 868 checkBounds(this.su_tPos, this.data.ll8.length, "su_tPos"); 869 this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); 870 this.su_tPos = this.data.tt[this.su_tPos]; 871 this.su_j2 = 0; 872 return setupNoRandPartC(); 873 } 874 return setupNoRandPartA(); 875 } 876 877 private int setupNoRandPartC() throws IOException { 878 if (this.su_j2 < this.su_z) { 879 final int su_ch2Shadow = this.su_ch2; 880 this.crc.update(su_ch2Shadow); 881 this.su_j2++; 882 this.currentState = NO_RAND_PART_C_STATE; 883 return su_ch2Shadow; 884 } 885 this.su_i2++; 886 this.su_count = 0; 887 return setupNoRandPartA(); 888 } 889 890 private int setupRandPartA() throws IOException { 891 if (this.su_i2 <= this.last) { 892 this.su_chPrev = this.su_ch2; 893 int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; 894 checkBounds(this.su_tPos, this.data.tt.length, "su_tPos"); 895 this.su_tPos = this.data.tt[this.su_tPos]; 896 if (this.su_rNToGo == 0) { 897 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1; 898 if (++this.su_rTPos == 512) { 899 this.su_rTPos = 0; 900 } 901 } else { 902 this.su_rNToGo--; 903 } 904 this.su_ch2 = su_ch2Shadow ^= this.su_rNToGo == 1 ? 1 : 0; 905 this.su_i2++; 906 this.currentState = RAND_PART_B_STATE; 907 this.crc.update(su_ch2Shadow); 908 return su_ch2Shadow; 909 } 910 endBlock(); 911 initBlock(); 912 return setupBlock(); 913 } 914 915 private int setupRandPartB() throws IOException { 916 if (this.su_ch2 != this.su_chPrev) { 917 this.currentState = RAND_PART_A_STATE; 918 this.su_count = 1; 919 return setupRandPartA(); 920 } 921 if (++this.su_count < 4) { 922 this.currentState = RAND_PART_A_STATE; 923 return setupRandPartA(); 924 } 925 this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); 926 checkBounds(this.su_tPos, this.data.tt.length, "su_tPos"); 927 this.su_tPos = this.data.tt[this.su_tPos]; 928 if (this.su_rNToGo == 0) { 929 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1; 930 if (++this.su_rTPos == 512) { 931 this.su_rTPos = 0; 932 } 933 } else { 934 this.su_rNToGo--; 935 } 936 this.su_j2 = 0; 937 this.currentState = RAND_PART_C_STATE; 938 if (this.su_rNToGo == 1) { 939 this.su_z ^= 1; 940 } 941 return setupRandPartC(); 942 } 943 944 private int setupRandPartC() throws IOException { 945 if (this.su_j2 < this.su_z) { 946 this.crc.update(this.su_ch2); 947 this.su_j2++; 948 return this.su_ch2; 949 } 950 this.currentState = RAND_PART_A_STATE; 951 this.su_i2++; 952 this.su_count = 0; 953 return setupRandPartA(); 954 } 955}