001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lz4;
020
021import java.io.IOException;
022import java.io.InputStream;
023
024import org.apache.commons.compress.compressors.lz77support.AbstractLZ77CompressorInputStream;
025import org.apache.commons.compress.utils.ByteUtils;
026
027/**
028 * CompressorInputStream for the LZ4 block format.
029 *
030 * @see <a href="https://lz4.github.io/lz4/lz4_Block_format.html">LZ4 Block Format Description</a>
031 * @since 1.14
032 * @NotThreadSafe
033 */
034public class BlockLZ4CompressorInputStream extends AbstractLZ77CompressorInputStream {
035
036    private enum State {
037        NO_BLOCK, IN_LITERAL, LOOKING_FOR_BACK_REFERENCE, IN_BACK_REFERENCE, EOF
038    }
039
040    static final int WINDOW_SIZE = 1 << 16;
041    static final int SIZE_BITS = 4;
042    static final int BACK_REFERENCE_SIZE_MASK = (1 << SIZE_BITS) - 1;
043
044    static final int LITERAL_SIZE_MASK = BACK_REFERENCE_SIZE_MASK << SIZE_BITS;
045
046    /** Back-Reference-size part of the block starting byte. */
047    private int nextBackReferenceSize;
048
049    /** Current state of the stream */
050    private State state = State.NO_BLOCK;
051
052    /**
053     * Creates a new LZ4 input stream.
054     *
055     * @param is An InputStream to read compressed data from
056     */
057    public BlockLZ4CompressorInputStream(final InputStream is) {
058        super(is, WINDOW_SIZE);
059    }
060
061    /**
062     * @return false if there is no more back-reference - this means this is the last block of the stream.
063     */
064    private boolean initializeBackReference() throws IOException {
065        int backReferenceOffset;
066        try {
067            backReferenceOffset = (int) ByteUtils.fromLittleEndian(supplier, 2);
068        } catch (final IOException ex) {
069            if (nextBackReferenceSize == 0) { // the last block has no back-reference
070                return false;
071            }
072            throw ex;
073        }
074        long backReferenceSize = nextBackReferenceSize;
075        if (nextBackReferenceSize == BACK_REFERENCE_SIZE_MASK) {
076            backReferenceSize += readSizeBytes();
077        }
078        // minimal match length 4 is encoded as 0
079        if (backReferenceSize < 0) {
080            throw new IOException("Illegal block with a negative match length found");
081        }
082        try {
083            startBackReference(backReferenceOffset, backReferenceSize + 4);
084        } catch (final IllegalArgumentException ex) {
085            throw new IOException("Illegal block with bad offset found", ex);
086        }
087        state = State.IN_BACK_REFERENCE;
088        return true;
089    }
090
091    /**
092     * {@inheritDoc}
093     */
094    @Override
095    public int read(final byte[] b, final int off, final int len) throws IOException {
096        if (len == 0) {
097            return 0;
098        }
099        switch (state) {
100        case EOF:
101            return -1;
102        case NO_BLOCK: // NOSONAR - fallthrough intended
103            readSizes();
104            /* FALLTHROUGH */
105        case IN_LITERAL:
106            final int litLen = readLiteral(b, off, len);
107            if (!hasMoreDataInBlock()) {
108                state = State.LOOKING_FOR_BACK_REFERENCE;
109            }
110            return litLen > 0 ? litLen : read(b, off, len);
111        case LOOKING_FOR_BACK_REFERENCE: // NOSONAR - fallthrough intended
112            if (!initializeBackReference()) {
113                state = State.EOF;
114                return -1;
115            }
116            /* FALLTHROUGH */
117        case IN_BACK_REFERENCE:
118            final int backReferenceLen = readBackReference(b, off, len);
119            if (!hasMoreDataInBlock()) {
120                state = State.NO_BLOCK;
121            }
122            return backReferenceLen > 0 ? backReferenceLen : read(b, off, len);
123        default:
124            throw new IOException("Unknown stream state " + state);
125        }
126    }
127
128    private long readSizeBytes() throws IOException {
129        long accum = 0;
130        int nextByte;
131        do {
132            nextByte = readOneByte();
133            if (nextByte == -1) {
134                throw new IOException("Premature end of stream while parsing length");
135            }
136            accum += nextByte;
137        } while (nextByte == 255);
138        return accum;
139    }
140
141    private void readSizes() throws IOException {
142        final int nextBlock = readOneByte();
143        if (nextBlock == -1) {
144            throw new IOException("Premature end of stream while looking for next block");
145        }
146        nextBackReferenceSize = nextBlock & BACK_REFERENCE_SIZE_MASK;
147        long literalSizePart = (nextBlock & LITERAL_SIZE_MASK) >> SIZE_BITS;
148        if (literalSizePart == BACK_REFERENCE_SIZE_MASK) {
149            literalSizePart += readSizeBytes();
150        }
151        if (literalSizePart < 0) {
152            throw new IOException("Illegal block with a negative literal size found");
153        }
154        startLiteral(literalSizePart);
155        state = State.IN_LITERAL;
156    }
157}