View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.lz4;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.util.Arrays;
24  import java.util.zip.CheckedInputStream;
25  
26  import org.apache.commons.compress.compressors.CompressorInputStream;
27  import org.apache.commons.compress.utils.ByteUtils;
28  import org.apache.commons.compress.utils.IOUtils;
29  import org.apache.commons.compress.utils.InputStreamStatistics;
30  import org.apache.commons.io.input.BoundedInputStream;
31  
32  /**
33   * CompressorInputStream for the LZ4 frame format.
34   *
35   * <p>
36   * Based on the "spec" in the version "1.5.1 (31/03/2015)"
37   * </p>
38   *
39   * @see <a href="https://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a>
40   * @since 1.14
41   * @NotThreadSafe
42   */
43  public class FramedLZ4CompressorInputStream extends CompressorInputStream implements InputStreamStatistics {
44  
45      /** Used by FramedLZ4CompressorOutputStream as well. */
46      static final byte[] LZ4_SIGNATURE = { 4, 0x22, 0x4d, 0x18 };
47      private static final byte[] SKIPPABLE_FRAME_TRAILER = { 0x2a, 0x4d, 0x18 };
48      private static final byte SKIPPABLE_FRAME_PREFIX_BYTE_MASK = 0x50;
49  
50      static final int VERSION_MASK = 0xC0;
51      static final int SUPPORTED_VERSION = 0x40;
52      static final int BLOCK_INDEPENDENCE_MASK = 0x20;
53      static final int BLOCK_CHECKSUM_MASK = 0x10;
54      static final int CONTENT_SIZE_MASK = 0x08;
55      static final int CONTENT_CHECKSUM_MASK = 0x04;
56      static final int BLOCK_MAX_SIZE_MASK = 0x70;
57      static final int UNCOMPRESSED_FLAG_MASK = 0x80000000;
58  
59      private static boolean isSkippableFrameSignature(final byte[] b) {
60          if ((b[0] & SKIPPABLE_FRAME_PREFIX_BYTE_MASK) != SKIPPABLE_FRAME_PREFIX_BYTE_MASK) {
61              return false;
62          }
63          for (int i = 1; i < 4; i++) {
64              if (b[i] != SKIPPABLE_FRAME_TRAILER[i - 1]) {
65                  return false;
66              }
67          }
68          return true;
69      }
70  
71      /**
72       * Checks if the signature matches what is expected for a .lz4 file.
73       * <p>
74       * .lz4 files start with a four byte signature.
75       * </p>
76       *
77       * @param signature the bytes to check
78       * @param length    the number of bytes to check
79       * @return true if this is a .sz stream, false otherwise
80       */
81      public static boolean matches(final byte[] signature, final int length) {
82  
83          if (length < LZ4_SIGNATURE.length) {
84              return false;
85          }
86  
87          byte[] shortenedSig = signature;
88          if (signature.length > LZ4_SIGNATURE.length) {
89              shortenedSig = Arrays.copyOf(signature, LZ4_SIGNATURE.length);
90          }
91  
92          return Arrays.equals(shortenedSig, LZ4_SIGNATURE);
93      }
94  
95      /** Used in no-arg read method. */
96      private final byte[] oneByte = new byte[1];
97      private final ByteUtils.ByteSupplier supplier = this::readOneByte;
98  
99      private final BoundedInputStream inputStream;
100     private final boolean decompressConcatenated;
101     private boolean expectBlockChecksum;
102     private boolean expectBlockDependency;
103 
104     private boolean expectContentChecksum;
105 
106     private InputStream currentBlock;
107 
108     private boolean endReached, inUncompressed;
109 
110     /** Used for frame header checksum and content checksum, if present. */
111     private final org.apache.commons.codec.digest.XXHash32 contentHash = new org.apache.commons.codec.digest.XXHash32();
112 
113     /** Used for block checksum, if present. */
114     private final org.apache.commons.codec.digest.XXHash32 blockHash = new org.apache.commons.codec.digest.XXHash32();
115 
116     /** Only created if the frame doesn't set the block independence flag. */
117     private byte[] blockDependencyBuffer;
118 
119     /**
120      * Creates a new input stream that decompresses streams compressed using the LZ4 frame format and stops after decompressing the first frame.
121      *
122      * @param in the InputStream from which to read the compressed data
123      * @throws IOException if reading fails
124      */
125     public FramedLZ4CompressorInputStream(final InputStream in) throws IOException {
126         this(in, false);
127     }
128 
129     /**
130      * Creates a new input stream that decompresses streams compressed using the LZ4 frame format.
131      *
132      * @param in                     the InputStream from which to read the compressed data
133      * @param decompressConcatenated if true, decompress until the end of the input; if false, stop after the first LZ4 frame and leave the input position to
134      *                               point to the next byte after the frame stream
135      * @throws IOException if reading fails
136      */
137     public FramedLZ4CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
138         this.inputStream = BoundedInputStream.builder().setInputStream(in).get();
139         this.decompressConcatenated = decompressConcatenated;
140         init(true);
141     }
142 
143     private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) {
144         len = Math.min(len, blockDependencyBuffer.length);
145         if (len > 0) {
146             final int keep = blockDependencyBuffer.length - len;
147             if (keep > 0) {
148                 // move last keep bytes towards the start of the buffer
149                 System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep);
150             }
151             // append new data
152             System.arraycopy(b, off, blockDependencyBuffer, keep, len);
153         }
154     }
155 
156     /** {@inheritDoc} */
157     @Override
158     public void close() throws IOException {
159         try {
160             if (currentBlock != null) {
161                 currentBlock.close();
162                 currentBlock = null;
163             }
164         } finally {
165             inputStream.close();
166         }
167     }
168 
169     /**
170      * @since 1.17
171      */
172     @Override
173     public long getCompressedCount() {
174         return inputStream.getCount();
175     }
176 
177     private void init(final boolean firstFrame) throws IOException {
178         if (readSignature(firstFrame)) {
179             readFrameDescriptor();
180             nextBlock();
181         }
182     }
183 
184     private void maybeFinishCurrentBlock() throws IOException {
185         if (currentBlock != null) {
186             currentBlock.close();
187             currentBlock = null;
188             if (expectBlockChecksum) {
189                 verifyChecksum(blockHash, "block");
190                 blockHash.reset();
191             }
192         }
193     }
194 
195     private void nextBlock() throws IOException {
196         maybeFinishCurrentBlock();
197         final long len = ByteUtils.fromLittleEndian(supplier, 4);
198         final boolean uncompressed = (len & UNCOMPRESSED_FLAG_MASK) != 0;
199         final int realLen = (int) (len & ~UNCOMPRESSED_FLAG_MASK);
200         if (realLen == 0) {
201             verifyContentChecksum();
202             if (!decompressConcatenated) {
203                 endReached = true;
204             } else {
205                 init(false);
206             }
207             return;
208         }
209         // @formatter:off
210         InputStream capped = BoundedInputStream.builder()
211                 .setInputStream(inputStream)
212                 .setMaxCount(realLen)
213                 .setPropagateClose(false)
214                 .get();
215         // @formatter:on
216         if (expectBlockChecksum) {
217             capped = new CheckedInputStream(capped, blockHash);
218         }
219         if (uncompressed) {
220             inUncompressed = true;
221             currentBlock = capped;
222         } else {
223             inUncompressed = false;
224             final BlockLZ4CompressorInputStream s = new BlockLZ4CompressorInputStream(capped);
225             if (expectBlockDependency) {
226                 s.prefill(blockDependencyBuffer);
227             }
228             currentBlock = s;
229         }
230     }
231 
232     /** {@inheritDoc} */
233     @Override
234     public int read() throws IOException {
235         return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
236     }
237 
238     /** {@inheritDoc} */
239     @Override
240     public int read(final byte[] b, final int off, final int len) throws IOException {
241         if (len == 0) {
242             return 0;
243         }
244         if (endReached) {
245             return -1;
246         }
247         int r = readOnce(b, off, len);
248         if (r == -1) {
249             nextBlock();
250             if (!endReached) {
251                 r = readOnce(b, off, len);
252             }
253         }
254         if (r != -1) {
255             if (expectBlockDependency) {
256                 appendToBlockDependencyBuffer(b, off, r);
257             }
258             if (expectContentChecksum) {
259                 contentHash.update(b, off, r);
260             }
261         }
262         return r;
263     }
264 
265     private void readFrameDescriptor() throws IOException {
266         final int flags = readOneByte();
267         if (flags == -1) {
268             throw new IOException("Premature end of stream while reading frame flags");
269         }
270         contentHash.update(flags);
271         if ((flags & VERSION_MASK) != SUPPORTED_VERSION) {
272             throw new IOException("Unsupported version " + (flags >> 6));
273         }
274         expectBlockDependency = (flags & BLOCK_INDEPENDENCE_MASK) == 0;
275         if (expectBlockDependency) {
276             if (blockDependencyBuffer == null) {
277                 blockDependencyBuffer = new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE];
278             }
279         } else {
280             blockDependencyBuffer = null;
281         }
282         expectBlockChecksum = (flags & BLOCK_CHECKSUM_MASK) != 0;
283         final boolean expectContentSize = (flags & CONTENT_SIZE_MASK) != 0;
284         expectContentChecksum = (flags & CONTENT_CHECKSUM_MASK) != 0;
285         final int bdByte = readOneByte();
286         if (bdByte == -1) { // max size is irrelevant for this implementation
287             throw new IOException("Premature end of stream while reading frame BD byte");
288         }
289         contentHash.update(bdByte);
290         if (expectContentSize) { // for now, we don't care, contains the uncompressed size
291             final byte[] contentSize = new byte[8];
292             final int skipped = IOUtils.readFully(inputStream, contentSize);
293             count(skipped);
294             if (8 != skipped) {
295                 throw new IOException("Premature end of stream while reading content size");
296             }
297             contentHash.update(contentSize, 0, contentSize.length);
298         }
299         final int headerHash = readOneByte();
300         if (headerHash == -1) { // partial hash of header.
301             throw new IOException("Premature end of stream while reading frame header checksum");
302         }
303         final int expectedHash = (int) (contentHash.getValue() >> 8 & 0xff);
304         contentHash.reset();
305         if (headerHash != expectedHash) {
306             throw new IOException("Frame header checksum mismatch");
307         }
308     }
309 
310     private int readOnce(final byte[] b, final int off, final int len) throws IOException {
311         if (inUncompressed) {
312             final int cnt = currentBlock.read(b, off, len);
313             count(cnt);
314             return cnt;
315         }
316         final BlockLZ4CompressorInputStream l = (BlockLZ4CompressorInputStream) currentBlock;
317         final long before = l.getBytesRead();
318         final int cnt = currentBlock.read(b, off, len);
319         count(l.getBytesRead() - before);
320         return cnt;
321     }
322 
323     private int readOneByte() throws IOException {
324         final int b = inputStream.read();
325         if (b != -1) {
326             count(1);
327             return b & 0xFF;
328         }
329         return -1;
330     }
331 
332     private boolean readSignature(final boolean firstFrame) throws IOException {
333         final String garbageMessage = firstFrame ? "Not a LZ4 frame stream" : "LZ4 frame stream followed by garbage";
334         final byte[] b = new byte[4];
335         int read = IOUtils.readFully(inputStream, b);
336         count(read);
337         if (0 == read && !firstFrame) {
338             // good LZ4 frame and nothing after it
339             endReached = true;
340             return false;
341         }
342         if (4 != read) {
343             throw new IOException(garbageMessage);
344         }
345 
346         read = skipSkippableFrame(b);
347         if (0 == read && !firstFrame) {
348             // good LZ4 frame with only some skippable frames after it
349             endReached = true;
350             return false;
351         }
352         if (4 != read || !matches(b, 4)) {
353             throw new IOException(garbageMessage);
354         }
355         return true;
356     }
357 
358     /**
359      * Skips over the contents of a skippable frame as well as skippable frames following it.
360      * <p>
361      * It then tries to read four more bytes which are supposed to hold an LZ4 signature and returns the number of bytes read while storing the bytes in the
362      * given array.
363      * </p>
364      */
365     private int skipSkippableFrame(final byte[] b) throws IOException {
366         int read = 4;
367         while (read == 4 && isSkippableFrameSignature(b)) {
368             final long len = ByteUtils.fromLittleEndian(supplier, 4);
369             if (len < 0) {
370                 throw new IOException("Found illegal skippable frame with negative size");
371             }
372             final long skipped = org.apache.commons.io.IOUtils.skip(inputStream, len);
373             count(skipped);
374             if (len != skipped) {
375                 throw new IOException("Premature end of stream while skipping frame");
376             }
377             read = IOUtils.readFully(inputStream, b);
378             count(read);
379         }
380         return read;
381     }
382 
383     private void verifyChecksum(final org.apache.commons.codec.digest.XXHash32 hash, final String kind) throws IOException {
384         final byte[] checksum = new byte[4];
385         final int read = IOUtils.readFully(inputStream, checksum);
386         count(read);
387         if (4 != read) {
388             throw new IOException("Premature end of stream while reading " + kind + " checksum");
389         }
390         final long expectedHash = hash.getValue();
391         if (expectedHash != ByteUtils.fromLittleEndian(checksum)) {
392             throw new IOException(kind + " checksum mismatch.");
393         }
394     }
395 
396     private void verifyContentChecksum() throws IOException {
397         if (expectContentChecksum) {
398             verifyChecksum(contentHash, "content");
399         }
400         contentHash.reset();
401     }
402 }