001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lz4;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.util.Arrays;
024
025import org.apache.commons.compress.compressors.CompressorInputStream;
026import org.apache.commons.compress.utils.BoundedInputStream;
027import org.apache.commons.compress.utils.ByteUtils;
028import org.apache.commons.compress.utils.ChecksumCalculatingInputStream;
029import org.apache.commons.compress.utils.CountingInputStream;
030import org.apache.commons.compress.utils.IOUtils;
031import org.apache.commons.compress.utils.InputStreamStatistics;
032
033/**
034 * CompressorInputStream for the LZ4 frame format.
035 *
036 * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p>
037 *
038 * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a>
039 * @since 1.14
040 * @NotThreadSafe
041 */
042public class FramedLZ4CompressorInputStream extends CompressorInputStream
043    implements InputStreamStatistics {
044
045    // used by FramedLZ4CompressorOutputStream as well
046    static final byte[] LZ4_SIGNATURE = new byte[] { //NOSONAR
047        4, 0x22, 0x4d, 0x18
048    };
049    private static final byte[] SKIPPABLE_FRAME_TRAILER = new byte[] {
050        0x2a, 0x4d, 0x18
051    };
052    private static final byte SKIPPABLE_FRAME_PREFIX_BYTE_MASK = 0x50;
053
054    static final int VERSION_MASK = 0xC0;
055    static final int SUPPORTED_VERSION = 0x40;
056    static final int BLOCK_INDEPENDENCE_MASK = 0x20;
057    static final int BLOCK_CHECKSUM_MASK = 0x10;
058    static final int CONTENT_SIZE_MASK = 0x08;
059    static final int CONTENT_CHECKSUM_MASK = 0x04;
060    static final int BLOCK_MAX_SIZE_MASK = 0x70;
061    static final int UNCOMPRESSED_FLAG_MASK = 0x80000000;
062
063    // used in no-arg read method
064    private final byte[] oneByte = new byte[1];
065
066    private final ByteUtils.ByteSupplier supplier = this::readOneByte;
067
068    private final CountingInputStream inputStream;
069    private final boolean decompressConcatenated;
070
071    private boolean expectBlockChecksum;
072    private boolean expectBlockDependency;
073    private boolean expectContentSize;
074    private boolean expectContentChecksum;
075
076    private InputStream currentBlock;
077    private boolean endReached, inUncompressed;
078
079    // used for frame header checksum and content checksum, if present
080    private final XXHash32 contentHash = new XXHash32();
081
082    // used for block checksum, if present
083    private final XXHash32 blockHash = new XXHash32();
084
085    // only created if the frame doesn't set the block independence flag
086    private byte[] blockDependencyBuffer;
087
088    /**
089     * Creates a new input stream that decompresses streams compressed
090     * using the LZ4 frame format and stops after decompressing the
091     * first frame.
092     * @param in  the InputStream from which to read the compressed data
093     * @throws IOException if reading fails
094     */
095    public FramedLZ4CompressorInputStream(final InputStream in) throws IOException {
096        this(in, false);
097    }
098
099    /**
100     * Creates a new input stream that decompresses streams compressed
101     * using the LZ4 frame format.
102     * @param in  the InputStream from which to read the compressed data
103     * @param decompressConcatenated if true, decompress until the end
104     *          of the input; if false, stop after the first LZ4 frame
105     *          and leave the input position to point to the next byte
106     *          after the frame stream
107     * @throws IOException if reading fails
108     */
109    public FramedLZ4CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
110        this.inputStream = new CountingInputStream(in);
111        this.decompressConcatenated = decompressConcatenated;
112        init(true);
113    }
114
115    /** {@inheritDoc} */
116    @Override
117    public int read() throws IOException {
118        return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
119    }
120
121    /** {@inheritDoc} */
122    @Override
123    public void close() throws IOException {
124        try {
125            if (currentBlock != null) {
126                currentBlock.close();
127                currentBlock = null;
128            }
129        } finally {
130            inputStream.close();
131        }
132    }
133
134    /** {@inheritDoc} */
135    @Override
136    public int read(final byte[] b, final int off, final int len) throws IOException {
137        if (len == 0) {
138            return 0;
139        }
140        if (endReached) {
141            return -1;
142        }
143        int r = readOnce(b, off, len);
144        if (r == -1) {
145            nextBlock();
146            if (!endReached) {
147                r = readOnce(b, off, len);
148            }
149        }
150        if (r != -1) {
151            if (expectBlockDependency) {
152                appendToBlockDependencyBuffer(b, off, r);
153            }
154            if (expectContentChecksum) {
155                contentHash.update(b, off, r);
156            }
157        }
158        return r;
159    }
160
161    /**
162     * @since 1.17
163     */
164    @Override
165    public long getCompressedCount() {
166        return inputStream.getBytesRead();
167    }
168
169    private void init(final boolean firstFrame) throws IOException {
170        if (readSignature(firstFrame)) {
171            readFrameDescriptor();
172            nextBlock();
173        }
174    }
175
176    private boolean readSignature(final boolean firstFrame) throws IOException {
177        final String garbageMessage = firstFrame ? "Not a LZ4 frame stream" : "LZ4 frame stream followed by garbage";
178        final byte[] b = new byte[4];
179        int read = IOUtils.readFully(inputStream, b);
180        count(read);
181        if (0 == read && !firstFrame) {
182            // good LZ4 frame and nothing after it
183            endReached = true;
184            return false;
185        }
186        if (4 != read) {
187            throw new IOException(garbageMessage);
188        }
189
190        read = skipSkippableFrame(b);
191        if (0 == read && !firstFrame) {
192            // good LZ4 frame with only some skippable frames after it
193            endReached = true;
194            return false;
195        }
196        if (4 != read || !matches(b, 4)) {
197            throw new IOException(garbageMessage);
198        }
199        return true;
200    }
201
202    private void readFrameDescriptor() throws IOException {
203        final int flags = readOneByte();
204        if (flags == -1) {
205            throw new IOException("Premature end of stream while reading frame flags");
206        }
207        contentHash.update(flags);
208        if ((flags & VERSION_MASK) != SUPPORTED_VERSION) {
209            throw new IOException("Unsupported version " + (flags >> 6));
210        }
211        expectBlockDependency = (flags & BLOCK_INDEPENDENCE_MASK) == 0;
212        if (expectBlockDependency) {
213            if (blockDependencyBuffer == null) {
214                blockDependencyBuffer = new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE];
215            }
216        } else {
217            blockDependencyBuffer = null;
218        }
219        expectBlockChecksum = (flags & BLOCK_CHECKSUM_MASK) != 0;
220        expectContentSize = (flags & CONTENT_SIZE_MASK) != 0;
221        expectContentChecksum = (flags & CONTENT_CHECKSUM_MASK) != 0;
222        final int bdByte = readOneByte();
223        if (bdByte == -1) { // max size is irrelevant for this implementation
224            throw new IOException("Premature end of stream while reading frame BD byte");
225        }
226        contentHash.update(bdByte);
227        if (expectContentSize) { // for now we don't care, contains the uncompressed size
228            final byte[] contentSize = new byte[8];
229            final int skipped = IOUtils.readFully(inputStream, contentSize);
230            count(skipped);
231            if (8 != skipped) {
232                throw new IOException("Premature end of stream while reading content size");
233            }
234            contentHash.update(contentSize, 0, contentSize.length);
235        }
236        final int headerHash = readOneByte();
237        if (headerHash == -1) { // partial hash of header.
238            throw new IOException("Premature end of stream while reading frame header checksum");
239        }
240        final int expectedHash = (int) ((contentHash.getValue() >> 8) & 0xff);
241        contentHash.reset();
242        if (headerHash != expectedHash) {
243            throw new IOException("Frame header checksum mismatch");
244        }
245    }
246
247    private void nextBlock() throws IOException {
248        maybeFinishCurrentBlock();
249        final long len = ByteUtils.fromLittleEndian(supplier, 4);
250        final boolean uncompressed = (len & UNCOMPRESSED_FLAG_MASK) != 0;
251        final int realLen = (int) (len & (~UNCOMPRESSED_FLAG_MASK));
252        if (realLen < 0) {
253            throw new IOException("Found illegal block with negative size");
254        }
255        if (realLen == 0) {
256            verifyContentChecksum();
257            if (!decompressConcatenated) {
258                endReached = true;
259            } else {
260                init(false);
261            }
262            return;
263        }
264        InputStream capped = new BoundedInputStream(inputStream, realLen);
265        if (expectBlockChecksum) {
266            capped = new ChecksumCalculatingInputStream(blockHash, capped);
267        }
268        if (uncompressed) {
269            inUncompressed = true;
270            currentBlock = capped;
271        } else {
272            inUncompressed = false;
273            final BlockLZ4CompressorInputStream s = new BlockLZ4CompressorInputStream(capped);
274            if (expectBlockDependency) {
275                s.prefill(blockDependencyBuffer);
276            }
277            currentBlock = s;
278        }
279    }
280
281    private void maybeFinishCurrentBlock() throws IOException {
282        if (currentBlock != null) {
283            currentBlock.close();
284            currentBlock = null;
285            if (expectBlockChecksum) {
286                verifyChecksum(blockHash, "block");
287                blockHash.reset();
288            }
289        }
290    }
291
292    private void verifyContentChecksum() throws IOException {
293        if (expectContentChecksum) {
294            verifyChecksum(contentHash, "content");
295        }
296        contentHash.reset();
297    }
298
299    private void verifyChecksum(final XXHash32 hash, final String kind) throws IOException {
300        final byte[] checksum = new byte[4];
301        final int read = IOUtils.readFully(inputStream, checksum);
302        count(read);
303        if (4 != read) {
304            throw new IOException("Premature end of stream while reading " + kind + " checksum");
305        }
306        final long expectedHash = hash.getValue();
307        if (expectedHash != ByteUtils.fromLittleEndian(checksum)) {
308            throw new IOException(kind + " checksum mismatch.");
309        }
310    }
311
312    private int readOneByte() throws IOException {
313        final int b = inputStream.read();
314        if (b != -1) {
315            count(1);
316            return b & 0xFF;
317        }
318        return -1;
319    }
320
321    private int readOnce(final byte[] b, final int off, final int len) throws IOException {
322        if (inUncompressed) {
323            final int cnt = currentBlock.read(b, off, len);
324            count(cnt);
325            return cnt;
326        }
327        final BlockLZ4CompressorInputStream l = (BlockLZ4CompressorInputStream) currentBlock;
328        final long before = l.getBytesRead();
329        final int cnt = currentBlock.read(b, off, len);
330        count(l.getBytesRead() - before);
331        return cnt;
332    }
333
334    private static boolean isSkippableFrameSignature(final byte[] b) {
335        if ((b[0] & SKIPPABLE_FRAME_PREFIX_BYTE_MASK) != SKIPPABLE_FRAME_PREFIX_BYTE_MASK) {
336            return false;
337        }
338        for (int i = 1; i < 4; i++) {
339            if (b[i] != SKIPPABLE_FRAME_TRAILER[i - 1]) {
340                return false;
341            }
342        }
343        return true;
344    }
345
346    /**
347     * Skips over the contents of a skippable frame as well as
348     * skippable frames following it.
349     *
350     * <p>It then tries to read four more bytes which are supposed to
351     * hold an LZ4 signature and returns the number of bytes read
352     * while storing the bytes in the given array.</p>
353     */
354    private int skipSkippableFrame(final byte[] b) throws IOException {
355        int read = 4;
356        while (read == 4 && isSkippableFrameSignature(b)) {
357            final long len = ByteUtils.fromLittleEndian(supplier, 4);
358            if (len < 0) {
359                throw new IOException("Found illegal skippable frame with negative size");
360            }
361            final long skipped = IOUtils.skip(inputStream, len);
362            count(skipped);
363            if (len != skipped) {
364                throw new IOException("Premature end of stream while skipping frame");
365            }
366            read = IOUtils.readFully(inputStream, b);
367            count(read);
368        }
369        return read;
370    }
371
372    private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) {
373        len = Math.min(len, blockDependencyBuffer.length);
374        if (len > 0) {
375            final int keep = blockDependencyBuffer.length - len;
376            if (keep > 0) {
377                // move last keep bytes towards the start of the buffer
378                System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep);
379            }
380            // append new data
381            System.arraycopy(b, off, blockDependencyBuffer, keep, len);
382        }
383    }
384
385    /**
386     * Checks if the signature matches what is expected for a .lz4 file.
387     *
388     * <p>.lz4 files start with a four byte signature.</p>
389     *
390     * @param signature the bytes to check
391     * @param length    the number of bytes to check
392     * @return          true if this is a .sz stream, false otherwise
393     */
394    public static boolean matches(final byte[] signature, final int length) {
395
396        if (length < LZ4_SIGNATURE.length) {
397            return false;
398        }
399
400        byte[] shortenedSig = signature;
401        if (signature.length > LZ4_SIGNATURE.length) {
402            shortenedSig = new byte[LZ4_SIGNATURE.length];
403            System.arraycopy(signature, 0, shortenedSig, 0, LZ4_SIGNATURE.length);
404        }
405
406        return Arrays.equals(shortenedSig, LZ4_SIGNATURE);
407    }
408}