BZip2CompressorInputStream.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one
  3.  * or more contributor license agreements.  See the NOTICE file
  4.  * distributed with this work for additional information
  5.  * regarding copyright ownership.  The ASF licenses this file
  6.  * to you under the Apache License, Version 2.0 (the
  7.  * "License"); you may not use this file except in compliance
  8.  * with the License.  You may obtain a copy of the License at
  9.  *
  10.  * http://www.apache.org/licenses/LICENSE-2.0
  11.  *
  12.  * Unless required by applicable law or agreed to in writing,
  13.  * software distributed under the License is distributed on an
  14.  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15.  * KIND, either express or implied.  See the License for the
  16.  * specific language governing permissions and limitations
  17.  * under the License.
  18.  */

  19. /*
  20.  * This package is based on the work done by Keiron Liddle, Aftex Software
  21.  * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
  22.  * great code.
  23.  */
  24. package org.apache.commons.compress.compressors.bzip2;

  25. import java.io.IOException;
  26. import java.io.InputStream;
  27. import java.nio.ByteOrder;
  28. import java.util.Arrays;

  29. import org.apache.commons.compress.compressors.CompressorInputStream;
  30. import org.apache.commons.compress.utils.BitInputStream;
  31. import org.apache.commons.compress.utils.InputStreamStatistics;
  32. import org.apache.commons.io.input.CloseShieldInputStream;

  33. /**
  34.  * An input stream that decompresses from the BZip2 format to be read as any other stream.
  35.  *
  36.  * @NotThreadSafe
  37.  */
  38. public class BZip2CompressorInputStream extends CompressorInputStream implements BZip2Constants, InputStreamStatistics {

  39.     private static final class Data {

  40.         // (with blockSize 900k)
  41.         final boolean[] inUse = new boolean[256]; // 256 byte

  42.         final byte[] seqToUnseq = new byte[256]; // 256 byte
  43.         final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
  44.         final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte

  45.         /**
  46.          * Freq table collected to save a pass over the data during decompression.
  47.          */
  48.         final int[] unzftab = new int[256]; // 1024 byte

  49.         final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
  50.         final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
  51.         final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
  52.         final int[] minLens = new int[N_GROUPS]; // 24 byte

  53.         final int[] cftab = new int[257]; // 1028 byte
  54.         final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte
  55.         final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
  56.         // byte
  57.         final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte
  58.         // ---------------
  59.         // 60798 byte

  60.         int[] tt; // 3600000 byte
  61.         final byte[] ll8; // 900000 byte

  62.         // ---------------
  63.         // 4560782 byte
  64.         // ===============

  65.         Data(final int blockSize100k) {
  66.             this.ll8 = new byte[blockSize100k * BASEBLOCKSIZE];
  67.         }

  68.         /**
  69.          * Initializes the {@link #tt} array.
  70.          *
  71.          * This method is called when the required length of the array is known. I don't initialize it at construction time to avoid unnecessary memory
  72.          * allocation when compressing small files.
  73.          */
  74.         int[] initTT(final int length) {
  75.             int[] ttShadow = this.tt;

  76.             // tt.length should always be >= length, but theoretically
  77.             // it can happen, if the compressor mixed small and large
  78.             // blocks. Normally only the last block will be smaller
  79.             // than others.
  80.             if (ttShadow == null || ttShadow.length < length) {
  81.                 this.tt = ttShadow = new int[length];
  82.             }

  83.             return ttShadow;
  84.         }

  85.     }

  86.     private static final int EOF = 0;

  87.     private static final int START_BLOCK_STATE = 1;

  88.     private static final int RAND_PART_A_STATE = 2;

  89.     private static final int RAND_PART_B_STATE = 3;

  90.     private static final int RAND_PART_C_STATE = 4;

  91.     private static final int NO_RAND_PART_A_STATE = 5;
  92.     private static final int NO_RAND_PART_B_STATE = 6;

  93.     private static final int NO_RAND_PART_C_STATE = 7;

  94.     private static boolean bsGetBit(final BitInputStream bin) throws IOException {
  95.         return bsR(bin, 1) != 0;
  96.     }

  97.     private static int bsGetInt(final BitInputStream bin) throws IOException {
  98.         return bsR(bin, 32);
  99.     }

  100.     private static char bsGetUByte(final BitInputStream bin) throws IOException {
  101.         return (char) bsR(bin, 8);
  102.     }

  103.     /**
  104.      * read bits from the input stream
  105.      *
  106.      * @param n the number of bits to read, must not exceed 32?
  107.      * @return the requested bits combined into an int
  108.      * @throws IOException
  109.      */
  110.     private static int bsR(final BitInputStream bin, final int n) throws IOException {
  111.         final long thech = bin.readBits(n);
  112.         if (thech < 0) {
  113.             throw new IOException("Unexpected end of stream");
  114.         }
  115.         return (int) thech;
  116.     }

  117.     private static void checkBounds(final int checkVal, final int limitExclusive, final String name) throws IOException {
  118.         if (checkVal < 0) {
  119.             throw new IOException("Corrupted input, " + name + " value negative");
  120.         }
  121.         if (checkVal >= limitExclusive) {
  122.             throw new IOException("Corrupted input, " + name + " value too big");
  123.         }
  124.     }

  125.     /**
  126.      * Called by createHuffmanDecodingTables() exclusively.
  127.      */
  128.     private static void hbCreateDecodeTables(final int[] limit, final int[] base, final int[] perm, final char[] length, final int minLen, final int maxLen,
  129.             final int alphaSize) throws IOException {
  130.         for (int i = minLen, pp = 0; i <= maxLen; i++) {
  131.             for (int j = 0; j < alphaSize; j++) {
  132.                 if (length[j] == i) {
  133.                     perm[pp++] = j;
  134.                 }
  135.             }
  136.         }

  137.         for (int i = MAX_CODE_LEN; --i > 0;) {
  138.             base[i] = 0;
  139.             limit[i] = 0;
  140.         }

  141.         for (int i = 0; i < alphaSize; i++) {
  142.             final int l = length[i];
  143.             checkBounds(l, MAX_ALPHA_SIZE, "length");
  144.             base[l + 1]++;
  145.         }

  146.         for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
  147.             b += base[i];
  148.             base[i] = b;
  149.         }

  150.         for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
  151.             final int nb = base[i + 1];
  152.             vec += nb - b;
  153.             b = nb;
  154.             limit[i] = vec - 1;
  155.             vec <<= 1;
  156.         }

  157.         for (int i = minLen + 1; i <= maxLen; i++) {
  158.             base[i] = (limit[i - 1] + 1 << 1) - base[i];
  159.         }
  160.     }

  161.     /**
  162.      * Checks if the signature matches what is expected for a bzip2 file.
  163.      *
  164.      * @param signature the bytes to check
  165.      * @param length    the number of bytes to check
  166.      * @return true, if this stream is a bzip2 compressed stream, false otherwise
  167.      *
  168.      * @since 1.1
  169.      */
  170.     public static boolean matches(final byte[] signature, final int length) {
  171.         return length >= 3 && signature[0] == 'B' && signature[1] == 'Z' && signature[2] == 'h';
  172.     }

  173.     /**
  174.      * Index of the last char in the block, so the block size == last + 1.
  175.      */
  176.     private int last;

  177.     /**
  178.      * Index in zptr[] of original string after sorting.
  179.      */
  180.     private int origPtr;
  181.     /**
  182.      * always: in the range 0 .. 9. The current block size is 100000 * this number.
  183.      */
  184.     private int blockSize100k;

  185.     // Variables used by setup* methods exclusively

  186.     private boolean blockRandomised;
  187.     private final CRC crc = new CRC();
  188.     private int nInUse;
  189.     private BitInputStream bin;
  190.     private final boolean decompressConcatenated;
  191.     private int currentState = START_BLOCK_STATE;
  192.     private int storedBlockCRC, storedCombinedCRC;
  193.     private int computedCombinedCRC;
  194.     private int su_count;
  195.     private int su_ch2;
  196.     private int su_chPrev;
  197.     private int su_i2;
  198.     private int su_j2;
  199.     private int su_rNToGo;
  200.     private int su_rTPos;
  201.     private int su_tPos;
  202.     private char su_z;

  203.     /**
  204.      * All memory intensive stuff. This field is initialized by initBlock().
  205.      */
  206.     private BZip2CompressorInputStream.Data data;

  207.     /**
  208.      * Constructs a new BZip2CompressorInputStream which decompresses bytes read from the specified stream. This doesn't support decompressing concatenated .bz2
  209.      * files.
  210.      *
  211.      * @param in the InputStream from which this object should be created
  212.      * @throws IOException          if the stream content is malformed or an I/O error occurs.
  213.      * @throws NullPointerException if {@code in == null}
  214.      */
  215.     public BZip2CompressorInputStream(final InputStream in) throws IOException {
  216.         this(in, false);
  217.     }

  218.     /**
  219.      * Constructs a new BZip2CompressorInputStream which decompresses bytes read from the specified stream.
  220.      *
  221.      * @param in                     the InputStream from which this object should be created
  222.      * @param decompressConcatenated if true, decompress until the end of the input; if false, stop after the first .bz2 stream and leave the input position to
  223.      *                               point to the next byte after the .bz2 stream
  224.      *
  225.      * @throws IOException if {@code in == null}, the stream content is malformed, or an I/O error occurs.
  226.      */
  227.     public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
  228.         this.bin = new BitInputStream(in == System.in ? CloseShieldInputStream.wrap(in) : in, ByteOrder.BIG_ENDIAN);
  229.         this.decompressConcatenated = decompressConcatenated;
  230.         init(true);
  231.         initBlock();
  232.     }

  233.     @Override
  234.     public void close() throws IOException {
  235.         final BitInputStream inShadow = this.bin;
  236.         if (inShadow != null) {
  237.             try {
  238.                 inShadow.close();
  239.             } finally {
  240.                 this.data = null;
  241.                 this.bin = null;
  242.             }
  243.         }
  244.     }

  245.     private boolean complete() throws IOException {
  246.         this.storedCombinedCRC = bsGetInt(bin);
  247.         this.currentState = EOF;
  248.         this.data = null;
  249.         if (this.storedCombinedCRC != this.computedCombinedCRC) {
  250.             throw new IOException("BZip2 CRC error");
  251.         }
  252.         // Look for the next .bz2 stream if decompressing
  253.         // concatenated files.
  254.         return !decompressConcatenated || !init(false);
  255.     }

  256.     /**
  257.      * Called by recvDecodingTables() exclusively.
  258.      */
  259.     private void createHuffmanDecodingTables(final int alphaSize, final int nGroups) throws IOException {
  260.         final Data dataShadow = this.data;
  261.         final char[][] len = dataShadow.temp_charArray2d;
  262.         final int[] minLens = dataShadow.minLens;
  263.         final int[][] limit = dataShadow.limit;
  264.         final int[][] base = dataShadow.base;
  265.         final int[][] perm = dataShadow.perm;

  266.         for (int t = 0; t < nGroups; t++) {
  267.             int minLen = 32;
  268.             int maxLen = 0;
  269.             final char[] len_t = len[t];
  270.             for (int i = alphaSize; --i >= 0;) {
  271.                 final char lent = len_t[i];
  272.                 if (lent > maxLen) {
  273.                     maxLen = lent;
  274.                 }
  275.                 if (lent < minLen) {
  276.                     minLen = lent;
  277.                 }
  278.             }
  279.             hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, maxLen, alphaSize);
  280.             minLens[t] = minLen;
  281.         }
  282.     }

  283.     private void endBlock() throws IOException {
  284.         final int computedBlockCRC = this.crc.getValue();
  285.         // A bad CRC is considered a fatal error.
  286.         if (this.storedBlockCRC != computedBlockCRC) {
  287.             // make next blocks readable without error
  288.             // (repair feature, not yet documented, not tested)
  289.             this.computedCombinedCRC = this.storedCombinedCRC << 1 | this.storedCombinedCRC >>> 31;
  290.             this.computedCombinedCRC ^= this.storedBlockCRC;
  291.             throw new IOException("BZip2 CRC error");
  292.         }
  293.         this.computedCombinedCRC = this.computedCombinedCRC << 1 | this.computedCombinedCRC >>> 31;
  294.         this.computedCombinedCRC ^= computedBlockCRC;
  295.     }

  296.     private void getAndMoveToFrontDecode() throws IOException {
  297.         final BitInputStream bin = this.bin;
  298.         this.origPtr = bsR(bin, 24);
  299.         recvDecodingTables();
  300.         final Data dataShadow = this.data;
  301.         final byte[] ll8 = dataShadow.ll8;
  302.         final int[] unzftab = dataShadow.unzftab;
  303.         final byte[] selector = dataShadow.selector;
  304.         final byte[] seqToUnseq = dataShadow.seqToUnseq;
  305.         final char[] yy = dataShadow.getAndMoveToFrontDecode_yy;
  306.         final int[] minLens = dataShadow.minLens;
  307.         final int[][] limit = dataShadow.limit;
  308.         final int[][] base = dataShadow.base;
  309.         final int[][] perm = dataShadow.perm;
  310.         final int limitLast = this.blockSize100k * 100000;

  311.         /*
  312.          * Setting up the unzftab entries here is not strictly necessary, but it does save having to do it later in a separate pass, and so saves a block's
  313.          * worth of cache misses.
  314.          */
  315.         for (int i = 256; --i >= 0;) {
  316.             yy[i] = (char) i;
  317.             unzftab[i] = 0;
  318.         }

  319.         int groupNo = 0;
  320.         int groupPos = G_SIZE - 1;
  321.         final int eob = this.nInUse + 1;
  322.         int nextSym = getAndMoveToFrontDecode0();
  323.         int lastShadow = -1;
  324.         int zt = selector[groupNo] & 0xff;
  325.         checkBounds(zt, N_GROUPS, "zt");
  326.         int[] base_zt = base[zt];
  327.         int[] limit_zt = limit[zt];
  328.         int[] perm_zt = perm[zt];
  329.         int minLens_zt = minLens[zt];

  330.         while (nextSym != eob) {
  331.             if (nextSym == RUNA || nextSym == RUNB) {
  332.                 int s = -1;

  333.                 for (int n = 1; true; n <<= 1) {
  334.                     if (nextSym == RUNA) {
  335.                         s += n;
  336.                     } else if (nextSym == RUNB) {
  337.                         s += n << 1;
  338.                     } else {
  339.                         break;
  340.                     }

  341.                     if (groupPos == 0) {
  342.                         groupPos = G_SIZE - 1;
  343.                         checkBounds(++groupNo, MAX_SELECTORS, "groupNo");
  344.                         zt = selector[groupNo] & 0xff;
  345.                         checkBounds(zt, N_GROUPS, "zt");
  346.                         base_zt = base[zt];
  347.                         limit_zt = limit[zt];
  348.                         perm_zt = perm[zt];
  349.                         minLens_zt = minLens[zt];
  350.                     } else {
  351.                         groupPos--;
  352.                     }

  353.                     int zn = minLens_zt;
  354.                     checkBounds(zn, MAX_ALPHA_SIZE, "zn");
  355.                     int zvec = bsR(bin, zn);
  356.                     while (zvec > limit_zt[zn]) {
  357.                         checkBounds(++zn, MAX_ALPHA_SIZE, "zn");
  358.                         zvec = zvec << 1 | bsR(bin, 1);
  359.                     }
  360.                     final int tmp = zvec - base_zt[zn];
  361.                     checkBounds(tmp, MAX_ALPHA_SIZE, "zvec");
  362.                     nextSym = perm_zt[tmp];
  363.                 }
  364.                 checkBounds(s, this.data.ll8.length, "s");

  365.                 final int yy0 = yy[0];
  366.                 checkBounds(yy0, 256, "yy");
  367.                 final byte ch = seqToUnseq[yy0];
  368.                 unzftab[ch & 0xff] += s + 1;

  369.                 final int from = ++lastShadow;
  370.                 lastShadow += s;
  371.                 checkBounds(lastShadow, this.data.ll8.length, "lastShadow");
  372.                 Arrays.fill(ll8, from, lastShadow + 1, ch);

  373.                 if (lastShadow >= limitLast) {
  374.                     throw new IOException("Block overrun while expanding RLE in MTF, " + lastShadow + " exceeds " + limitLast);
  375.                 }
  376.             } else {
  377.                 if (++lastShadow >= limitLast) {
  378.                     throw new IOException("Block overrun in MTF, " + lastShadow + " exceeds " + limitLast);
  379.                 }
  380.                 checkBounds(nextSym, 256 + 1, "nextSym");

  381.                 final char tmp = yy[nextSym - 1];
  382.                 checkBounds(tmp, 256, "yy");
  383.                 unzftab[seqToUnseq[tmp] & 0xff]++;
  384.                 ll8[lastShadow] = seqToUnseq[tmp];

  385.                 /*
  386.                  * This loop is hammered during decompression, hence avoid native method call overhead of System.arraycopy for very small ranges to copy.
  387.                  */
  388.                 if (nextSym <= 16) {
  389.                     for (int j = nextSym - 1; j > 0;) {
  390.                         yy[j] = yy[--j];
  391.                     }
  392.                 } else {
  393.                     System.arraycopy(yy, 0, yy, 1, nextSym - 1);
  394.                 }

  395.                 yy[0] = tmp;

  396.                 if (groupPos == 0) {
  397.                     groupPos = G_SIZE - 1;
  398.                     checkBounds(++groupNo, MAX_SELECTORS, "groupNo");
  399.                     zt = selector[groupNo] & 0xff;
  400.                     checkBounds(zt, N_GROUPS, "zt");
  401.                     base_zt = base[zt];
  402.                     limit_zt = limit[zt];
  403.                     perm_zt = perm[zt];
  404.                     minLens_zt = minLens[zt];
  405.                 } else {
  406.                     groupPos--;
  407.                 }

  408.                 int zn = minLens_zt;
  409.                 checkBounds(zn, MAX_ALPHA_SIZE, "zn");
  410.                 int zvec = bsR(bin, zn);
  411.                 while (zvec > limit_zt[zn]) {
  412.                     checkBounds(++zn, MAX_ALPHA_SIZE, "zn");
  413.                     zvec = zvec << 1 | bsR(bin, 1);
  414.                 }
  415.                 final int idx = zvec - base_zt[zn];
  416.                 checkBounds(idx, MAX_ALPHA_SIZE, "zvec");
  417.                 nextSym = perm_zt[idx];
  418.             }
  419.         }

  420.         this.last = lastShadow;
  421.     }

  422.     private int getAndMoveToFrontDecode0() throws IOException {
  423.         final Data dataShadow = this.data;
  424.         final int zt = dataShadow.selector[0] & 0xff;
  425.         checkBounds(zt, N_GROUPS, "zt");
  426.         final int[] limit_zt = dataShadow.limit[zt];
  427.         int zn = dataShadow.minLens[zt];
  428.         checkBounds(zn, MAX_ALPHA_SIZE, "zn");
  429.         int zvec = bsR(bin, zn);
  430.         while (zvec > limit_zt[zn]) {
  431.             checkBounds(++zn, MAX_ALPHA_SIZE, "zn");
  432.             zvec = zvec << 1 | bsR(bin, 1);
  433.         }
  434.         final int tmp = zvec - dataShadow.base[zt][zn];
  435.         checkBounds(tmp, MAX_ALPHA_SIZE, "zvec");

  436.         return dataShadow.perm[zt][tmp];
  437.     }

  438.     /**
  439.      * @since 1.17
  440.      */
  441.     @Override
  442.     public long getCompressedCount() {
  443.         return bin.getBytesRead();
  444.     }

  445.     private boolean init(final boolean isFirstStream) throws IOException {
  446.         if (null == bin) {
  447.             throw new IOException("No InputStream");
  448.         }

  449.         if (!isFirstStream) {
  450.             bin.clearBitCache();
  451.         }

  452.         final int magic0 = readNextByte(this.bin);
  453.         if (magic0 == -1 && !isFirstStream) {
  454.             return false;
  455.         }
  456.         final int magic1 = readNextByte(this.bin);
  457.         final int magic2 = readNextByte(this.bin);

  458.         if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') {
  459.             throw new IOException(isFirstStream ? "Stream is not in the BZip2 format" : "Garbage after a valid BZip2 stream");
  460.         }

  461.         final int blockSize = readNextByte(this.bin);
  462.         if (blockSize < '1' || blockSize > '9') {
  463.             throw new IOException("BZip2 block size is invalid");
  464.         }

  465.         this.blockSize100k = blockSize - '0';

  466.         this.computedCombinedCRC = 0;

  467.         return true;
  468.     }

  469.     private void initBlock() throws IOException {
  470.         final BitInputStream bin = this.bin;
  471.         char magic0;
  472.         char magic1;
  473.         char magic2;
  474.         char magic3;
  475.         char magic4;
  476.         char magic5;

  477.         while (true) {
  478.             // Get the block magic bytes.
  479.             magic0 = bsGetUByte(bin);
  480.             magic1 = bsGetUByte(bin);
  481.             magic2 = bsGetUByte(bin);
  482.             magic3 = bsGetUByte(bin);
  483.             magic4 = bsGetUByte(bin);
  484.             magic5 = bsGetUByte(bin);

  485.             // If isn't end of stream magic, break out of the loop.
  486.             if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45 || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) {
  487.                 break;
  488.             }

  489.             // End of stream was reached. Check the combined CRC and
  490.             // advance to the next .bz2 stream if decoding concatenated
  491.             // streams.
  492.             if (complete()) {
  493.                 return;
  494.             }
  495.         }

  496.         if (magic0 != 0x31 || // '1'
  497.                 magic1 != 0x41 || // ')'
  498.                 magic2 != 0x59 || // 'Y'
  499.                 magic3 != 0x26 || // '&'
  500.                 magic4 != 0x53 || // 'S'
  501.                 magic5 != 0x59 // 'Y'
  502.         ) {
  503.             this.currentState = EOF;
  504.             throw new IOException("Bad block header");
  505.         }
  506.         this.storedBlockCRC = bsGetInt(bin);
  507.         this.blockRandomised = bsR(bin, 1) == 1;

  508.         /*
  509.          * Allocate data here instead in constructor, so we do not allocate it if the input file is empty.
  510.          */
  511.         if (this.data == null) {
  512.             this.data = new Data(this.blockSize100k);
  513.         }

  514.         // currBlockNo++;
  515.         getAndMoveToFrontDecode();

  516.         this.crc.reset();
  517.         this.currentState = START_BLOCK_STATE;
  518.     }

  519.     private void makeMaps() {
  520.         final boolean[] inUse = this.data.inUse;
  521.         final byte[] seqToUnseq = this.data.seqToUnseq;

  522.         int nInUseShadow = 0;

  523.         for (int i = 0; i < 256; i++) {
  524.             if (inUse[i]) {
  525.                 seqToUnseq[nInUseShadow++] = (byte) i;
  526.             }
  527.         }

  528.         this.nInUse = nInUseShadow;
  529.     }

  530.     @Override
  531.     public int read() throws IOException {
  532.         if (this.bin != null) {
  533.             final int r = read0();
  534.             count(r < 0 ? -1 : 1);
  535.             return r;
  536.         }
  537.         throw new IOException("Stream closed");
  538.     }

  539.     /*
  540.      * (non-Javadoc)
  541.      *
  542.      * @see java.io.InputStream#read(byte[], int, int)
  543.      */
  544.     @Override
  545.     public int read(final byte[] dest, final int offs, final int len) throws IOException {
  546.         if (offs < 0) {
  547.             throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
  548.         }
  549.         if (len < 0) {
  550.             throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
  551.         }
  552.         if (offs + len > dest.length) {
  553.             throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + len + ") > dest.length(" + dest.length + ").");
  554.         }
  555.         if (this.bin == null) {
  556.             throw new IOException("Stream closed");
  557.         }
  558.         if (len == 0) {
  559.             return 0;
  560.         }

  561.         final int hi = offs + len;
  562.         int destOffs = offs;
  563.         int b;
  564.         while (destOffs < hi && (b = read0()) >= 0) {
  565.             dest[destOffs++] = (byte) b;
  566.             count(1);
  567.         }

  568.         return destOffs == offs ? -1 : destOffs - offs;
  569.     }

  570.     private int read0() throws IOException {
  571.         switch (currentState) {
  572.         case EOF:
  573.             return -1;

  574.         case START_BLOCK_STATE:
  575.             return setupBlock();

  576.         case RAND_PART_A_STATE:
  577.             throw new IllegalStateException();

  578.         case RAND_PART_B_STATE:
  579.             return setupRandPartB();

  580.         case RAND_PART_C_STATE:
  581.             return setupRandPartC();

  582.         case NO_RAND_PART_A_STATE:
  583.             throw new IllegalStateException();

  584.         case NO_RAND_PART_B_STATE:
  585.             return setupNoRandPartB();

  586.         case NO_RAND_PART_C_STATE:
  587.             return setupNoRandPartC();

  588.         default:
  589.             throw new IllegalStateException();
  590.         }
  591.     }

  592.     private int readNextByte(final BitInputStream in) throws IOException {
  593.         final long b = in.readBits(8);
  594.         return (int) b;
  595.     }

  596.     private void recvDecodingTables() throws IOException {
  597.         final BitInputStream bin = this.bin;
  598.         final Data dataShadow = this.data;
  599.         final boolean[] inUse = dataShadow.inUse;
  600.         final byte[] pos = dataShadow.recvDecodingTables_pos;
  601.         final byte[] selector = dataShadow.selector;
  602.         final byte[] selectorMtf = dataShadow.selectorMtf;

  603.         int inUse16 = 0;

  604.         /* Receive the mapping table */
  605.         for (int i = 0; i < 16; i++) {
  606.             if (bsGetBit(bin)) {
  607.                 inUse16 |= 1 << i;
  608.             }
  609.         }

  610.         Arrays.fill(inUse, false);
  611.         for (int i = 0; i < 16; i++) {
  612.             if ((inUse16 & 1 << i) != 0) {
  613.                 final int i16 = i << 4;
  614.                 for (int j = 0; j < 16; j++) {
  615.                     if (bsGetBit(bin)) {
  616.                         inUse[i16 + j] = true;
  617.                     }
  618.                 }
  619.             }
  620.         }

  621.         makeMaps();
  622.         final int alphaSize = this.nInUse + 2;
  623.         /* Now the selectors */
  624.         final int nGroups = bsR(bin, 3);
  625.         final int selectors = bsR(bin, 15);
  626.         if (selectors < 0) {
  627.             throw new IOException("Corrupted input, nSelectors value negative");
  628.         }
  629.         checkBounds(alphaSize, MAX_ALPHA_SIZE + 1, "alphaSize");
  630.         checkBounds(nGroups, N_GROUPS + 1, "nGroups");

  631.         // Don't fail on nSelectors overflowing boundaries but discard the values in overflow
  632.         // See https://gnu.wildebeest.org/blog/mjw/2019/08/02/bzip2-and-the-cve-that-wasnt/
  633.         // and https://sourceware.org/ml/bzip2-devel/2019-q3/msg00007.html

  634.         for (int i = 0; i < selectors; i++) {
  635.             int j = 0;
  636.             while (bsGetBit(bin)) {
  637.                 j++;
  638.             }
  639.             if (i < MAX_SELECTORS) {
  640.                 selectorMtf[i] = (byte) j;
  641.             }
  642.         }
  643.         final int nSelectors = Math.min(selectors, MAX_SELECTORS);

  644.         /* Undo the MTF values for the selectors. */
  645.         for (int v = nGroups; --v >= 0;) {
  646.             pos[v] = (byte) v;
  647.         }

  648.         for (int i = 0; i < nSelectors; i++) {
  649.             int v = selectorMtf[i] & 0xff;
  650.             checkBounds(v, N_GROUPS, "selectorMtf");
  651.             final byte tmp = pos[v];
  652.             while (v > 0) {
  653.                 // nearly all times v is zero, 4 in most other cases
  654.                 pos[v] = pos[v - 1];
  655.                 v--;
  656.             }
  657.             pos[0] = tmp;
  658.             selector[i] = tmp;
  659.         }

  660.         final char[][] len = dataShadow.temp_charArray2d;

  661.         /* Now the coding tables */
  662.         for (int t = 0; t < nGroups; t++) {
  663.             int curr = bsR(bin, 5);
  664.             final char[] len_t = len[t];
  665.             for (int i = 0; i < alphaSize; i++) {
  666.                 while (bsGetBit(bin)) {
  667.                     curr += bsGetBit(bin) ? -1 : 1;
  668.                 }
  669.                 len_t[i] = (char) curr;
  670.             }
  671.         }

  672.         // finally create the Huffman tables
  673.         createHuffmanDecodingTables(alphaSize, nGroups);
  674.     }

  675.     private int setupBlock() throws IOException {
  676.         if (currentState == EOF || this.data == null) {
  677.             return -1;
  678.         }

  679.         final int[] cftab = this.data.cftab;
  680.         final int ttLen = this.last + 1;
  681.         final int[] tt = this.data.initTT(ttLen);
  682.         final byte[] ll8 = this.data.ll8;
  683.         cftab[0] = 0;
  684.         System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);

  685.         for (int i = 1, c = cftab[0]; i <= 256; i++) {
  686.             c += cftab[i];
  687.             cftab[i] = c;
  688.         }

  689.         for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
  690.             final int tmp = cftab[ll8[i] & 0xff]++;
  691.             checkBounds(tmp, ttLen, "tt index");
  692.             tt[tmp] = i;
  693.         }

  694.         if (this.origPtr < 0 || this.origPtr >= tt.length) {
  695.             throw new IOException("Stream corrupted");
  696.         }

  697.         this.su_tPos = tt[this.origPtr];
  698.         this.su_count = 0;
  699.         this.su_i2 = 0;
  700.         this.su_ch2 = 256; /* not a char and not EOF */

  701.         if (this.blockRandomised) {
  702.             this.su_rNToGo = 0;
  703.             this.su_rTPos = 0;
  704.             return setupRandPartA();
  705.         }
  706.         return setupNoRandPartA();
  707.     }

  708.     private int setupNoRandPartA() throws IOException {
  709.         if (this.su_i2 <= this.last) {
  710.             this.su_chPrev = this.su_ch2;
  711.             final int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
  712.             this.su_ch2 = su_ch2Shadow;
  713.             checkBounds(this.su_tPos, this.data.tt.length, "su_tPos");
  714.             this.su_tPos = this.data.tt[this.su_tPos];
  715.             this.su_i2++;
  716.             this.currentState = NO_RAND_PART_B_STATE;
  717.             this.crc.update(su_ch2Shadow);
  718.             return su_ch2Shadow;
  719.         }
  720.         this.currentState = NO_RAND_PART_A_STATE;
  721.         endBlock();
  722.         initBlock();
  723.         return setupBlock();
  724.     }

  725.     private int setupNoRandPartB() throws IOException {
  726.         if (this.su_ch2 != this.su_chPrev) {
  727.             this.su_count = 1;
  728.             return setupNoRandPartA();
  729.         }
  730.         if (++this.su_count >= 4) {
  731.             checkBounds(this.su_tPos, this.data.ll8.length, "su_tPos");
  732.             this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
  733.             this.su_tPos = this.data.tt[this.su_tPos];
  734.             this.su_j2 = 0;
  735.             return setupNoRandPartC();
  736.         }
  737.         return setupNoRandPartA();
  738.     }

  739.     private int setupNoRandPartC() throws IOException {
  740.         if (this.su_j2 < this.su_z) {
  741.             final int su_ch2Shadow = this.su_ch2;
  742.             this.crc.update(su_ch2Shadow);
  743.             this.su_j2++;
  744.             this.currentState = NO_RAND_PART_C_STATE;
  745.             return su_ch2Shadow;
  746.         }
  747.         this.su_i2++;
  748.         this.su_count = 0;
  749.         return setupNoRandPartA();
  750.     }

  751.     private int setupRandPartA() throws IOException {
  752.         if (this.su_i2 <= this.last) {
  753.             this.su_chPrev = this.su_ch2;
  754.             int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
  755.             checkBounds(this.su_tPos, this.data.tt.length, "su_tPos");
  756.             this.su_tPos = this.data.tt[this.su_tPos];
  757.             if (this.su_rNToGo == 0) {
  758.                 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
  759.                 if (++this.su_rTPos == 512) {
  760.                     this.su_rTPos = 0;
  761.                 }
  762.             } else {
  763.                 this.su_rNToGo--;
  764.             }
  765.             this.su_ch2 = su_ch2Shadow ^= this.su_rNToGo == 1 ? 1 : 0;
  766.             this.su_i2++;
  767.             this.currentState = RAND_PART_B_STATE;
  768.             this.crc.update(su_ch2Shadow);
  769.             return su_ch2Shadow;
  770.         }
  771.         endBlock();
  772.         initBlock();
  773.         return setupBlock();
  774.     }

  775.     private int setupRandPartB() throws IOException {
  776.         if (this.su_ch2 != this.su_chPrev) {
  777.             this.currentState = RAND_PART_A_STATE;
  778.             this.su_count = 1;
  779.             return setupRandPartA();
  780.         }
  781.         if (++this.su_count < 4) {
  782.             this.currentState = RAND_PART_A_STATE;
  783.             return setupRandPartA();
  784.         }
  785.         this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
  786.         checkBounds(this.su_tPos, this.data.tt.length, "su_tPos");
  787.         this.su_tPos = this.data.tt[this.su_tPos];
  788.         if (this.su_rNToGo == 0) {
  789.             this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
  790.             if (++this.su_rTPos == 512) {
  791.                 this.su_rTPos = 0;
  792.             }
  793.         } else {
  794.             this.su_rNToGo--;
  795.         }
  796.         this.su_j2 = 0;
  797.         this.currentState = RAND_PART_C_STATE;
  798.         if (this.su_rNToGo == 1) {
  799.             this.su_z ^= 1;
  800.         }
  801.         return setupRandPartC();
  802.     }

  803.     private int setupRandPartC() throws IOException {
  804.         if (this.su_j2 < this.su_z) {
  805.             this.crc.update(this.su_ch2);
  806.             this.su_j2++;
  807.             return this.su_ch2;
  808.         }
  809.         this.currentState = RAND_PART_A_STATE;
  810.         this.su_i2++;
  811.         this.su_count = 0;
  812.         return setupRandPartA();
  813.     }
  814. }