View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.lz4;
20  
21  import java.io.ByteArrayOutputStream;
22  import java.io.IOException;
23  import java.io.OutputStream;
24  
25  import org.apache.commons.compress.compressors.CompressorOutputStream;
26  import org.apache.commons.compress.utils.ByteUtils;
27  
28  /**
29   * CompressorOutputStream for the LZ4 frame format.
30   *
31   * <p>
32   * Based on the "spec" in the version "1.5.1 (31/03/2015)"
33   * </p>
34   *
35   * @see <a href="https://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a>
36   * @since 1.14
37   * @NotThreadSafe
38   */
39  public class FramedLZ4CompressorOutputStream extends CompressorOutputStream<OutputStream> {
40  
41      /**
42       * Enumerates the block sizes supported by the format.
43       */
44      public enum BlockSize {
45  
46          /** Block size of 64K. */
47          K64(64 * 1024, 4),
48  
49          /** Block size of 256K. */
50          K256(256 * 1024, 5),
51  
52          /** Block size of 1M. */
53          M1(1024 * 1024, 6),
54  
55          /** Block size of 4M. */
56          M4(4096 * 1024, 7);
57  
58          private final int size;
59          private final int index;
60  
61          BlockSize(final int size, final int index) {
62              this.size = size;
63              this.index = index;
64          }
65  
66          int getIndex() {
67              return index;
68          }
69  
70          int getSize() {
71              return size;
72          }
73      }
74  
75      /**
76       * Parameters of the LZ4 frame format.
77       */
78      public static class Parameters {
79  
80          /**
81           * The default parameters of 4M block size, enabled content checksum, disabled block checksums and independent blocks.
82           *
83           * <p>
84           * This matches the defaults of the lz4 command line utility.
85           * </p>
86           */
87          public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false);
88          private final BlockSize blockSize;
89          private final boolean withContentChecksum;
90          private final boolean withBlockChecksum;
91          private final boolean withBlockDependency;
92  
93          private final org.apache.commons.compress.compressors.lz77support.Parameters lz77params;
94  
95          /**
96           * Sets up custom a custom block size for the LZ4 stream but otherwise uses the defaults of enabled content checksum, disabled block checksums and
97           * independent blocks.
98           *
99           * @param blockSize the size of a single block.
100          */
101         public Parameters(final BlockSize blockSize) {
102             this(blockSize, true, false, false);
103         }
104 
105         /**
106          * Sets up custom parameters for the LZ4 stream.
107          *
108          * @param blockSize           the size of a single block.
109          * @param withContentChecksum whether to write a content checksum
110          * @param withBlockChecksum   whether to write a block checksum. Note that block checksums are not supported by the lz4 command line utility
111          * @param withBlockDependency whether a block may depend on the content of a previous block. Enabling this may improve compression ratio but makes it
112          *                            impossible to decompress the output in parallel.
113          */
114         public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, final boolean withBlockDependency) {
115             this(blockSize, withContentChecksum, withBlockChecksum, withBlockDependency, BlockLZ4CompressorOutputStream.createParameterBuilder().build());
116         }
117 
118         /**
119          * Sets up custom parameters for the LZ4 stream.
120          *
121          * @param blockSize           the size of a single block.
122          * @param withContentChecksum whether to write a content checksum
123          * @param withBlockChecksum   whether to write a block checksum. Note that block checksums are not supported by the lz4 command line utility
124          * @param withBlockDependency whether a block may depend on the content of a previous block. Enabling this may improve compression ratio but makes it
125          *                            impossible to decompress the output in parallel.
126          * @param lz77params          parameters used to fine-tune compression, in particular to balance compression ratio vs compression speed.
127          */
128         public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, final boolean withBlockDependency,
129                 final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) {
130             this.blockSize = blockSize;
131             this.withContentChecksum = withContentChecksum;
132             this.withBlockChecksum = withBlockChecksum;
133             this.withBlockDependency = withBlockDependency;
134             this.lz77params = lz77params;
135         }
136 
137         /**
138          * Sets up custom a custom block size for the LZ4 stream but otherwise uses the defaults of enabled content checksum, disabled block checksums and
139          * independent blocks.
140          *
141          * @param blockSize  the size of a single block.
142          * @param lz77params parameters used to fine-tune compression, in particular to balance compression ratio vs compression speed.
143          */
144         public Parameters(final BlockSize blockSize, final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) {
145             this(blockSize, true, false, false, lz77params);
146         }
147 
148         @Override
149         public String toString() {
150             return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum + ", withBlockChecksum " + withBlockChecksum
151                     + ", withBlockDependency " + withBlockDependency;
152         }
153     }
154 
155     private static final byte[] END_MARK = new byte[4];
156     // used in one-arg write method
157     private final byte[] oneByte = new byte[1];
158     private final byte[] blockData;
159     private final Parameters params;
160 
161     // used for frame header checksum and content checksum, if requested
162     private final org.apache.commons.codec.digest.XXHash32 contentHash = new org.apache.commons.codec.digest.XXHash32();
163     // used for block checksum, if requested
164     private final org.apache.commons.codec.digest.XXHash32 blockHash;
165 
166     // only created if the config requires block dependency
167     private final byte[] blockDependencyBuffer;
168 
169     private int collectedBlockDependencyBytes;
170     private int currentIndex;
171 
172     /**
173      * Constructs a new output stream that compresses data using the LZ4 frame format using the default block size of 4MB.
174      *
175      * @param out the OutputStream to which to write the compressed data
176      * @throws IOException if writing the signature fails
177      */
178     public FramedLZ4CompressorOutputStream(final OutputStream out) throws IOException {
179         this(out, Parameters.DEFAULT);
180     }
181 
182     /**
183      * Constructs a new output stream that compresses data using the LZ4 frame format using the given block size.
184      *
185      * @param out    the OutputStream to which to write the compressed data
186      * @param params the parameters to use
187      * @throws IOException if writing the signature fails
188      */
189     public FramedLZ4CompressorOutputStream(final OutputStream out, final Parameters params) throws IOException {
190         super(out);
191         this.params = params;
192         blockData = new byte[params.blockSize.getSize()];
193         blockHash = params.withBlockChecksum ? new org.apache.commons.codec.digest.XXHash32() : null;
194         out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE);
195         writeFrameDescriptor();
196         blockDependencyBuffer = params.withBlockDependency ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE] : null;
197     }
198 
199     private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) {
200         len = Math.min(len, blockDependencyBuffer.length);
201         if (len > 0) {
202             final int keep = blockDependencyBuffer.length - len;
203             if (keep > 0) {
204                 // move last keep bytes towards the start of the buffer
205                 System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep);
206             }
207             // append new data
208             System.arraycopy(b, off, blockDependencyBuffer, keep, len);
209             collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len, blockDependencyBuffer.length);
210         }
211     }
212 
213     @Override
214     public void close() throws IOException {
215         try {
216             finish();
217         } finally {
218             super.close();
219         }
220     }
221 
222     /**
223      * Compresses all blockDataRemaining data and writes it to the stream, doesn't close the underlying stream.
224      *
225      * @throws IOException if an error occurs
226      */
227     @Override
228     public void finish() throws IOException {
229         if (!isFinished()) {
230             flushBlock();
231             writeTrailer();
232             super.finish();
233         }
234     }
235 
236     private void flushBlock() throws IOException {
237         if (currentIndex == 0) {
238             return;
239         }
240         final boolean withBlockDependency = params.withBlockDependency;
241         final ByteArrayOutputStream baos = new ByteArrayOutputStream();
242         try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos, params.lz77params)) {
243             if (withBlockDependency) {
244                 o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes, collectedBlockDependencyBytes);
245             }
246             o.write(blockData, 0, currentIndex);
247         }
248         if (withBlockDependency) {
249             appendToBlockDependencyBuffer(blockData, 0, currentIndex);
250         }
251         final byte[] b = baos.toByteArray();
252         if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize
253             ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK, 4);
254             out.write(blockData, 0, currentIndex);
255             if (params.withBlockChecksum) {
256                 blockHash.update(blockData, 0, currentIndex);
257             }
258         } else {
259             ByteUtils.toLittleEndian(out, b.length, 4);
260             out.write(b);
261             if (params.withBlockChecksum) {
262                 blockHash.update(b, 0, b.length);
263             }
264         }
265         if (params.withBlockChecksum) {
266             ByteUtils.toLittleEndian(out, blockHash.getValue(), 4);
267             blockHash.reset();
268         }
269         currentIndex = 0;
270     }
271 
272     @Override
273     public void write(final byte[] data, int off, int len) throws IOException {
274         if (params.withContentChecksum) {
275             contentHash.update(data, off, len);
276         }
277         int blockDataRemaining = blockData.length - currentIndex;
278         while (len > 0) {
279             final int copyLen = Math.min(len, blockDataRemaining);
280             System.arraycopy(data, off, blockData, currentIndex, copyLen);
281             off += copyLen;
282             blockDataRemaining -= copyLen;
283             len -= copyLen;
284             currentIndex += copyLen;
285             if (blockDataRemaining == 0) {
286                 flushBlock();
287                 blockDataRemaining = blockData.length;
288             }
289         }
290     }
291 
292     @Override
293     public void write(final int b) throws IOException {
294         oneByte[0] = (byte) (b & 0xff);
295         write(oneByte);
296     }
297 
298     private void writeFrameDescriptor() throws IOException {
299         int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION;
300         if (!params.withBlockDependency) {
301             flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK;
302         }
303         if (params.withContentChecksum) {
304             flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK;
305         }
306         if (params.withBlockChecksum) {
307             flags |= FramedLZ4CompressorInputStream.BLOCK_CHECKSUM_MASK;
308         }
309         out.write(flags);
310         contentHash.update(flags);
311         final int bd = params.blockSize.getIndex() << 4 & FramedLZ4CompressorInputStream.BLOCK_MAX_SIZE_MASK;
312         out.write(bd);
313         contentHash.update(bd);
314         out.write((int) (contentHash.getValue() >> 8 & 0xff));
315         contentHash.reset();
316     }
317 
318     private void writeTrailer() throws IOException {
319         out.write(END_MARK);
320         if (params.withContentChecksum) {
321             ByteUtils.toLittleEndian(out, contentHash.getValue(), 4);
322         }
323     }
324 
325 }