View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.lz4;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.util.Arrays;
24  import java.util.zip.CheckedInputStream;
25  
26  import org.apache.commons.compress.compressors.CompressorInputStream;
27  import org.apache.commons.compress.utils.BoundedInputStream;
28  import org.apache.commons.compress.utils.ByteUtils;
29  import org.apache.commons.compress.utils.IOUtils;
30  import org.apache.commons.compress.utils.InputStreamStatistics;
31  import org.apache.commons.io.input.CountingInputStream;
32  
33  /**
34   * CompressorInputStream for the LZ4 frame format.
35   *
36   * <p>
37   * Based on the "spec" in the version "1.5.1 (31/03/2015)"
38   * </p>
39   *
40   * @see <a href="https://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a>
41   * @since 1.14
42   * @NotThreadSafe
43   */
44  public class FramedLZ4CompressorInputStream extends CompressorInputStream implements InputStreamStatistics {
45  
46      /** Used by FramedLZ4CompressorOutputStream as well. */
47      static final byte[] LZ4_SIGNATURE = { 4, 0x22, 0x4d, 0x18 };
48      private static final byte[] SKIPPABLE_FRAME_TRAILER = { 0x2a, 0x4d, 0x18 };
49      private static final byte SKIPPABLE_FRAME_PREFIX_BYTE_MASK = 0x50;
50  
51      static final int VERSION_MASK = 0xC0;
52      static final int SUPPORTED_VERSION = 0x40;
53      static final int BLOCK_INDEPENDENCE_MASK = 0x20;
54      static final int BLOCK_CHECKSUM_MASK = 0x10;
55      static final int CONTENT_SIZE_MASK = 0x08;
56      static final int CONTENT_CHECKSUM_MASK = 0x04;
57      static final int BLOCK_MAX_SIZE_MASK = 0x70;
58      static final int UNCOMPRESSED_FLAG_MASK = 0x80000000;
59  
60      private static boolean isSkippableFrameSignature(final byte[] b) {
61          if ((b[0] & SKIPPABLE_FRAME_PREFIX_BYTE_MASK) != SKIPPABLE_FRAME_PREFIX_BYTE_MASK) {
62              return false;
63          }
64          for (int i = 1; i < 4; i++) {
65              if (b[i] != SKIPPABLE_FRAME_TRAILER[i - 1]) {
66                  return false;
67              }
68          }
69          return true;
70      }
71  
72      /**
73       * Checks if the signature matches what is expected for a .lz4 file.
74       * <p>
75       * .lz4 files start with a four byte signature.
76       * </p>
77       *
78       * @param signature the bytes to check
79       * @param length    the number of bytes to check
80       * @return true if this is a .sz stream, false otherwise
81       */
82      public static boolean matches(final byte[] signature, final int length) {
83  
84          if (length < LZ4_SIGNATURE.length) {
85              return false;
86          }
87  
88          byte[] shortenedSig = signature;
89          if (signature.length > LZ4_SIGNATURE.length) {
90              shortenedSig = Arrays.copyOf(signature, LZ4_SIGNATURE.length);
91          }
92  
93          return Arrays.equals(shortenedSig, LZ4_SIGNATURE);
94      }
95  
96      /** Used in no-arg read method. */
97      private final byte[] oneByte = new byte[1];
98      private final ByteUtils.ByteSupplier supplier = this::readOneByte;
99  
100     private final CountingInputStream inputStream;
101     private final boolean decompressConcatenated;
102     private boolean expectBlockChecksum;
103     private boolean expectBlockDependency;
104 
105     private boolean expectContentChecksum;
106 
107     private InputStream currentBlock;
108 
109     private boolean endReached, inUncompressed;
110 
111     /** Used for frame header checksum and content checksum, if present. */
112     private final org.apache.commons.codec.digest.XXHash32 contentHash = new org.apache.commons.codec.digest.XXHash32();
113 
114     /** Used for block checksum, if present. */
115     private final org.apache.commons.codec.digest.XXHash32 blockHash = new org.apache.commons.codec.digest.XXHash32();
116 
117     /** Only created if the frame doesn't set the block independence flag. */
118     private byte[] blockDependencyBuffer;
119 
120     /**
121      * Creates a new input stream that decompresses streams compressed using the LZ4 frame format and stops after decompressing the first frame.
122      *
123      * @param in the InputStream from which to read the compressed data
124      * @throws IOException if reading fails
125      */
126     public FramedLZ4CompressorInputStream(final InputStream in) throws IOException {
127         this(in, false);
128     }
129 
130     /**
131      * Creates a new input stream that decompresses streams compressed using the LZ4 frame format.
132      *
133      * @param in                     the InputStream from which to read the compressed data
134      * @param decompressConcatenated if true, decompress until the end of the input; if false, stop after the first LZ4 frame and leave the input position to
135      *                               point to the next byte after the frame stream
136      * @throws IOException if reading fails
137      */
138     public FramedLZ4CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
139         this.inputStream = new CountingInputStream(in);
140         this.decompressConcatenated = decompressConcatenated;
141         init(true);
142     }
143 
144     private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) {
145         len = Math.min(len, blockDependencyBuffer.length);
146         if (len > 0) {
147             final int keep = blockDependencyBuffer.length - len;
148             if (keep > 0) {
149                 // move last keep bytes towards the start of the buffer
150                 System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep);
151             }
152             // append new data
153             System.arraycopy(b, off, blockDependencyBuffer, keep, len);
154         }
155     }
156 
157     /** {@inheritDoc} */
158     @Override
159     public void close() throws IOException {
160         try {
161             if (currentBlock != null) {
162                 currentBlock.close();
163                 currentBlock = null;
164             }
165         } finally {
166             inputStream.close();
167         }
168     }
169 
170     /**
171      * @since 1.17
172      */
173     @Override
174     public long getCompressedCount() {
175         return inputStream.getByteCount();
176     }
177 
178     private void init(final boolean firstFrame) throws IOException {
179         if (readSignature(firstFrame)) {
180             readFrameDescriptor();
181             nextBlock();
182         }
183     }
184 
185     private void maybeFinishCurrentBlock() throws IOException {
186         if (currentBlock != null) {
187             currentBlock.close();
188             currentBlock = null;
189             if (expectBlockChecksum) {
190                 verifyChecksum(blockHash, "block");
191                 blockHash.reset();
192             }
193         }
194     }
195 
196     private void nextBlock() throws IOException {
197         maybeFinishCurrentBlock();
198         final long len = ByteUtils.fromLittleEndian(supplier, 4);
199         final boolean uncompressed = (len & UNCOMPRESSED_FLAG_MASK) != 0;
200         final int realLen = (int) (len & ~UNCOMPRESSED_FLAG_MASK);
201         if (realLen == 0) {
202             verifyContentChecksum();
203             if (!decompressConcatenated) {
204                 endReached = true;
205             } else {
206                 init(false);
207             }
208             return;
209         }
210         InputStream capped = new BoundedInputStream(inputStream, realLen);
211         if (expectBlockChecksum) {
212             capped = new CheckedInputStream(capped, blockHash);
213         }
214         if (uncompressed) {
215             inUncompressed = true;
216             currentBlock = capped;
217         } else {
218             inUncompressed = false;
219             final BlockLZ4CompressorInputStream s = new BlockLZ4CompressorInputStream(capped);
220             if (expectBlockDependency) {
221                 s.prefill(blockDependencyBuffer);
222             }
223             currentBlock = s;
224         }
225     }
226 
227     /** {@inheritDoc} */
228     @Override
229     public int read() throws IOException {
230         return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
231     }
232 
233     /** {@inheritDoc} */
234     @Override
235     public int read(final byte[] b, final int off, final int len) throws IOException {
236         if (len == 0) {
237             return 0;
238         }
239         if (endReached) {
240             return -1;
241         }
242         int r = readOnce(b, off, len);
243         if (r == -1) {
244             nextBlock();
245             if (!endReached) {
246                 r = readOnce(b, off, len);
247             }
248         }
249         if (r != -1) {
250             if (expectBlockDependency) {
251                 appendToBlockDependencyBuffer(b, off, r);
252             }
253             if (expectContentChecksum) {
254                 contentHash.update(b, off, r);
255             }
256         }
257         return r;
258     }
259 
260     private void readFrameDescriptor() throws IOException {
261         final int flags = readOneByte();
262         if (flags == -1) {
263             throw new IOException("Premature end of stream while reading frame flags");
264         }
265         contentHash.update(flags);
266         if ((flags & VERSION_MASK) != SUPPORTED_VERSION) {
267             throw new IOException("Unsupported version " + (flags >> 6));
268         }
269         expectBlockDependency = (flags & BLOCK_INDEPENDENCE_MASK) == 0;
270         if (expectBlockDependency) {
271             if (blockDependencyBuffer == null) {
272                 blockDependencyBuffer = new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE];
273             }
274         } else {
275             blockDependencyBuffer = null;
276         }
277         expectBlockChecksum = (flags & BLOCK_CHECKSUM_MASK) != 0;
278         final boolean expectContentSize = (flags & CONTENT_SIZE_MASK) != 0;
279         expectContentChecksum = (flags & CONTENT_CHECKSUM_MASK) != 0;
280         final int bdByte = readOneByte();
281         if (bdByte == -1) { // max size is irrelevant for this implementation
282             throw new IOException("Premature end of stream while reading frame BD byte");
283         }
284         contentHash.update(bdByte);
285         if (expectContentSize) { // for now, we don't care, contains the uncompressed size
286             final byte[] contentSize = new byte[8];
287             final int skipped = IOUtils.readFully(inputStream, contentSize);
288             count(skipped);
289             if (8 != skipped) {
290                 throw new IOException("Premature end of stream while reading content size");
291             }
292             contentHash.update(contentSize, 0, contentSize.length);
293         }
294         final int headerHash = readOneByte();
295         if (headerHash == -1) { // partial hash of header.
296             throw new IOException("Premature end of stream while reading frame header checksum");
297         }
298         final int expectedHash = (int) (contentHash.getValue() >> 8 & 0xff);
299         contentHash.reset();
300         if (headerHash != expectedHash) {
301             throw new IOException("Frame header checksum mismatch");
302         }
303     }
304 
305     private int readOnce(final byte[] b, final int off, final int len) throws IOException {
306         if (inUncompressed) {
307             final int cnt = currentBlock.read(b, off, len);
308             count(cnt);
309             return cnt;
310         }
311         final BlockLZ4CompressorInputStream l = (BlockLZ4CompressorInputStream) currentBlock;
312         final long before = l.getBytesRead();
313         final int cnt = currentBlock.read(b, off, len);
314         count(l.getBytesRead() - before);
315         return cnt;
316     }
317 
318     private int readOneByte() throws IOException {
319         final int b = inputStream.read();
320         if (b != -1) {
321             count(1);
322             return b & 0xFF;
323         }
324         return -1;
325     }
326 
327     private boolean readSignature(final boolean firstFrame) throws IOException {
328         final String garbageMessage = firstFrame ? "Not a LZ4 frame stream" : "LZ4 frame stream followed by garbage";
329         final byte[] b = new byte[4];
330         int read = IOUtils.readFully(inputStream, b);
331         count(read);
332         if (0 == read && !firstFrame) {
333             // good LZ4 frame and nothing after it
334             endReached = true;
335             return false;
336         }
337         if (4 != read) {
338             throw new IOException(garbageMessage);
339         }
340 
341         read = skipSkippableFrame(b);
342         if (0 == read && !firstFrame) {
343             // good LZ4 frame with only some skippable frames after it
344             endReached = true;
345             return false;
346         }
347         if (4 != read || !matches(b, 4)) {
348             throw new IOException(garbageMessage);
349         }
350         return true;
351     }
352 
353     /**
354      * Skips over the contents of a skippable frame as well as skippable frames following it.
355      * <p>
356      * It then tries to read four more bytes which are supposed to hold an LZ4 signature and returns the number of bytes read while storing the bytes in the
357      * given array.
358      * </p>
359      */
360     private int skipSkippableFrame(final byte[] b) throws IOException {
361         int read = 4;
362         while (read == 4 && isSkippableFrameSignature(b)) {
363             final long len = ByteUtils.fromLittleEndian(supplier, 4);
364             if (len < 0) {
365                 throw new IOException("Found illegal skippable frame with negative size");
366             }
367             final long skipped = org.apache.commons.io.IOUtils.skip(inputStream, len);
368             count(skipped);
369             if (len != skipped) {
370                 throw new IOException("Premature end of stream while skipping frame");
371             }
372             read = IOUtils.readFully(inputStream, b);
373             count(read);
374         }
375         return read;
376     }
377 
378     private void verifyChecksum(final org.apache.commons.codec.digest.XXHash32 hash, final String kind) throws IOException {
379         final byte[] checksum = new byte[4];
380         final int read = IOUtils.readFully(inputStream, checksum);
381         count(read);
382         if (4 != read) {
383             throw new IOException("Premature end of stream while reading " + kind + " checksum");
384         }
385         final long expectedHash = hash.getValue();
386         if (expectedHash != ByteUtils.fromLittleEndian(checksum)) {
387             throw new IOException(kind + " checksum mismatch.");
388         }
389     }
390 
391     private void verifyContentChecksum() throws IOException {
392         if (expectContentChecksum) {
393             verifyChecksum(contentHash, "content");
394         }
395         contentHash.reset();
396     }
397 }