BlockLZ4CompressorInputStream.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one
  3.  * or more contributor license agreements.  See the NOTICE file
  4.  * distributed with this work for additional information
  5.  * regarding copyright ownership.  The ASF licenses this file
  6.  * to you under the Apache License, Version 2.0 (the
  7.  * "License"); you may not use this file except in compliance
  8.  * with the License.  You may obtain a copy of the License at
  9.  *
  10.  * http://www.apache.org/licenses/LICENSE-2.0
  11.  *
  12.  * Unless required by applicable law or agreed to in writing,
  13.  * software distributed under the License is distributed on an
  14.  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15.  * KIND, either express or implied.  See the License for the
  16.  * specific language governing permissions and limitations
  17.  * under the License.
  18.  */
  19. package org.apache.commons.compress.compressors.lz4;

  20. import java.io.IOException;
  21. import java.io.InputStream;

  22. import org.apache.commons.compress.compressors.lz77support.AbstractLZ77CompressorInputStream;
  23. import org.apache.commons.compress.utils.ByteUtils;

  24. /**
  25.  * CompressorInputStream for the LZ4 block format.
  26.  *
  27.  * @see <a href="https://lz4.github.io/lz4/lz4_Block_format.html">LZ4 Block Format Description</a>
  28.  * @since 1.14
  29.  * @NotThreadSafe
  30.  */
  31. public class BlockLZ4CompressorInputStream extends AbstractLZ77CompressorInputStream {

  32.     private enum State {
  33.         NO_BLOCK, IN_LITERAL, LOOKING_FOR_BACK_REFERENCE, IN_BACK_REFERENCE, EOF
  34.     }

  35.     static final int WINDOW_SIZE = 1 << 16;
  36.     static final int SIZE_BITS = 4;
  37.     static final int BACK_REFERENCE_SIZE_MASK = (1 << SIZE_BITS) - 1;

  38.     static final int LITERAL_SIZE_MASK = BACK_REFERENCE_SIZE_MASK << SIZE_BITS;

  39.     /** Back-Reference-size part of the block starting byte. */
  40.     private int nextBackReferenceSize;

  41.     /** Current state of the stream */
  42.     private State state = State.NO_BLOCK;

  43.     /**
  44.      * Creates a new LZ4 input stream.
  45.      *
  46.      * @param is An InputStream to read compressed data from
  47.      */
  48.     public BlockLZ4CompressorInputStream(final InputStream is) {
  49.         super(is, WINDOW_SIZE);
  50.     }

  51.     /**
  52.      * @return false if there is no more back-reference - this means this is the last block of the stream.
  53.      */
  54.     private boolean initializeBackReference() throws IOException {
  55.         int backReferenceOffset;
  56.         try {
  57.             backReferenceOffset = (int) ByteUtils.fromLittleEndian(supplier, 2);
  58.         } catch (final IOException ex) {
  59.             if (nextBackReferenceSize == 0) { // the last block has no back-reference
  60.                 return false;
  61.             }
  62.             throw ex;
  63.         }
  64.         long backReferenceSize = nextBackReferenceSize;
  65.         if (nextBackReferenceSize == BACK_REFERENCE_SIZE_MASK) {
  66.             backReferenceSize += readSizeBytes();
  67.         }
  68.         // minimal match length 4 is encoded as 0
  69.         if (backReferenceSize < 0) {
  70.             throw new IOException("Illegal block with a negative match length found");
  71.         }
  72.         try {
  73.             startBackReference(backReferenceOffset, backReferenceSize + 4);
  74.         } catch (final IllegalArgumentException ex) {
  75.             throw new IOException("Illegal block with bad offset found", ex);
  76.         }
  77.         state = State.IN_BACK_REFERENCE;
  78.         return true;
  79.     }

  80.     /**
  81.      * {@inheritDoc}
  82.      */
  83.     @Override
  84.     public int read(final byte[] b, final int off, final int len) throws IOException {
  85.         if (len == 0) {
  86.             return 0;
  87.         }
  88.         switch (state) {
  89.         case EOF:
  90.             return -1;
  91.         case NO_BLOCK: // NOSONAR - fallthrough intended
  92.             readSizes();
  93.             /* FALLTHROUGH */
  94.         case IN_LITERAL:
  95.             final int litLen = readLiteral(b, off, len);
  96.             if (!hasMoreDataInBlock()) {
  97.                 state = State.LOOKING_FOR_BACK_REFERENCE;
  98.             }
  99.             return litLen > 0 ? litLen : read(b, off, len);
  100.         case LOOKING_FOR_BACK_REFERENCE: // NOSONAR - fallthrough intended
  101.             if (!initializeBackReference()) {
  102.                 state = State.EOF;
  103.                 return -1;
  104.             }
  105.             /* FALLTHROUGH */
  106.         case IN_BACK_REFERENCE:
  107.             final int backReferenceLen = readBackReference(b, off, len);
  108.             if (!hasMoreDataInBlock()) {
  109.                 state = State.NO_BLOCK;
  110.             }
  111.             return backReferenceLen > 0 ? backReferenceLen : read(b, off, len);
  112.         default:
  113.             throw new IOException("Unknown stream state " + state);
  114.         }
  115.     }

  116.     private long readSizeBytes() throws IOException {
  117.         long accum = 0;
  118.         int nextByte;
  119.         do {
  120.             nextByte = readOneByte();
  121.             if (nextByte == -1) {
  122.                 throw new IOException("Premature end of stream while parsing length");
  123.             }
  124.             accum += nextByte;
  125.         } while (nextByte == 255);
  126.         return accum;
  127.     }

  128.     private void readSizes() throws IOException {
  129.         final int nextBlock = readOneByte();
  130.         if (nextBlock == -1) {
  131.             throw new IOException("Premature end of stream while looking for next block");
  132.         }
  133.         nextBackReferenceSize = nextBlock & BACK_REFERENCE_SIZE_MASK;
  134.         long literalSizePart = (nextBlock & LITERAL_SIZE_MASK) >> SIZE_BITS;
  135.         if (literalSizePart == BACK_REFERENCE_SIZE_MASK) {
  136.             literalSizePart += readSizeBytes();
  137.         }
  138.         if (literalSizePart < 0) {
  139.             throw new IOException("Illegal block with a negative literal size found");
  140.         }
  141.         startLiteral(literalSizePart);
  142.         state = State.IN_LITERAL;
  143.     }
  144. }