001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lz4;
020
021import java.io.IOException;
022import java.io.InputStream;
023
024import org.apache.commons.compress.compressors.lz77support.AbstractLZ77CompressorInputStream;
025import org.apache.commons.compress.utils.ByteUtils;
026
027/**
028 * CompressorInputStream for the LZ4 block format.
029 *
030 * @see <a href="http://lz4.github.io/lz4/lz4_Block_format.html">LZ4 Block Format Description</a>
031 * @since 1.14
032 * @NotThreadSafe
033 */
034public class BlockLZ4CompressorInputStream extends AbstractLZ77CompressorInputStream {
035
036    static final int WINDOW_SIZE = 1 << 16;
037    static final int SIZE_BITS = 4;
038    static final int BACK_REFERENCE_SIZE_MASK = (1 << SIZE_BITS) - 1;
039    static final int LITERAL_SIZE_MASK = BACK_REFERENCE_SIZE_MASK << SIZE_BITS;
040
041    /** Back-Reference-size part of the block starting byte. */
042    private int nextBackReferenceSize;
043
044    /** Current state of the stream */
045    private State state = State.NO_BLOCK;
046
047    /**
048     * Creates a new LZ4 input stream.
049     *
050     * @param is
051     *            An InputStream to read compressed data from
052     *
053     * @throws IOException if reading fails
054     */
055    public BlockLZ4CompressorInputStream(final InputStream is) throws IOException {
056        super(is, WINDOW_SIZE);
057    }
058
059    /**
060     * {@inheritDoc}
061     */
062    @Override
063    public int read(final byte[] b, final int off, final int len) throws IOException {
064        if (len == 0) {
065            return 0;
066        }
067        switch (state) {
068        case EOF:
069            return -1;
070        case NO_BLOCK: // NOSONAR - fallthrough intended
071            readSizes();
072            /*FALLTHROUGH*/
073        case IN_LITERAL:
074            final int litLen = readLiteral(b, off, len);
075            if (!hasMoreDataInBlock()) {
076                state = State.LOOKING_FOR_BACK_REFERENCE;
077            }
078            return litLen > 0 ? litLen : read(b, off, len);
079        case LOOKING_FOR_BACK_REFERENCE: // NOSONAR - fallthrough intended
080            if (!initializeBackReference()) {
081                state = State.EOF;
082                return -1;
083            }
084            /*FALLTHROUGH*/
085        case IN_BACK_REFERENCE:
086            final int backReferenceLen = readBackReference(b, off, len);
087            if (!hasMoreDataInBlock()) {
088                state = State.NO_BLOCK;
089            }
090            return backReferenceLen > 0 ? backReferenceLen : read(b, off, len);
091        default:
092            throw new IOException("Unknown stream state " + state);
093        }
094    }
095
096    private void readSizes() throws IOException {
097        final int nextBlock = readOneByte();
098        if (nextBlock == -1) {
099            throw new IOException("Premature end of stream while looking for next block");
100        }
101        nextBackReferenceSize = nextBlock & BACK_REFERENCE_SIZE_MASK;
102        long literalSizePart = (nextBlock & LITERAL_SIZE_MASK) >> SIZE_BITS;
103        if (literalSizePart == BACK_REFERENCE_SIZE_MASK) {
104            literalSizePart += readSizeBytes();
105        }
106        if (literalSizePart < 0) {
107            throw new IOException("Illegal block with a negative literal size found");
108        }
109        startLiteral(literalSizePart);
110        state = State.IN_LITERAL;
111    }
112
113    private long readSizeBytes() throws IOException {
114        long accum = 0;
115        int nextByte;
116        do {
117            nextByte = readOneByte();
118            if (nextByte == -1) {
119                throw new IOException("Premature end of stream while parsing length");
120            }
121            accum += nextByte;
122        } while (nextByte == 255);
123        return accum;
124    }
125
126    /**
127     * @return false if there is no more back-reference - this means this is the
128     * last block of the stream.
129     */
130    private boolean initializeBackReference() throws IOException {
131        int backReferenceOffset = 0;
132        try {
133            backReferenceOffset = (int) ByteUtils.fromLittleEndian(supplier, 2);
134        } catch (final IOException ex) {
135            if (nextBackReferenceSize == 0) { // the last block has no back-reference
136                return false;
137            }
138            throw ex;
139        }
140        long backReferenceSize = nextBackReferenceSize;
141        if (nextBackReferenceSize == BACK_REFERENCE_SIZE_MASK) {
142            backReferenceSize += readSizeBytes();
143        }
144        // minimal match length 4 is encoded as 0
145        if (backReferenceSize < 0) {
146            throw new IOException("Illegal block with a negative match length found");
147        }
148        try {
149            startBackReference(backReferenceOffset, backReferenceSize + 4);
150        } catch (final IllegalArgumentException ex) {
151            throw new IOException("Illegal block with bad offset found", ex);
152        }
153        state = State.IN_BACK_REFERENCE;
154        return true;
155    }
156
157    private enum State {
158        NO_BLOCK, IN_LITERAL, LOOKING_FOR_BACK_REFERENCE, IN_BACK_REFERENCE, EOF
159    }
160}