View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.lz4;
20  
21  import java.io.ByteArrayOutputStream;
22  import java.io.IOException;
23  import java.io.OutputStream;
24  
25  import org.apache.commons.compress.compressors.CompressorOutputStream;
26  import org.apache.commons.compress.utils.ByteUtils;
27  
28  /**
29   * CompressorOutputStream for the LZ4 frame format.
30   *
31   * <p>
32   * Based on the "spec" in the version "1.5.1 (31/03/2015)"
33   * </p>
34   *
35   * @see <a href="https://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a>
36   * @since 1.14
37   * @NotThreadSafe
38   */
39  public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
40  
41      /**
42       * The block sizes supported by the format.
43       */
44      public enum BlockSize {
45          /** Block size of 64K */
46          K64(64 * 1024, 4),
47          /** Block size of 256K */
48          K256(256 * 1024, 5),
49          /** Block size of 1M */
50          M1(1024 * 1024, 6),
51          /** Block size of 4M */
52          M4(4096 * 1024, 7);
53  
54          private final int size, index;
55  
56          BlockSize(final int size, final int index) {
57              this.size = size;
58              this.index = index;
59          }
60  
61          int getIndex() {
62              return index;
63          }
64  
65          int getSize() {
66              return size;
67          }
68      }
69  
70      /**
71       * Parameters of the LZ4 frame format.
72       */
73      public static class Parameters {
74          /**
75           * The default parameters of 4M block size, enabled content checksum, disabled block checksums and independent blocks.
76           *
77           * <p>
78           * This matches the defaults of the lz4 command line utility.
79           * </p>
80           */
81          public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false);
82          private final BlockSize blockSize;
83          private final boolean withContentChecksum, withBlockChecksum, withBlockDependency;
84  
85          private final org.apache.commons.compress.compressors.lz77support.Parameters lz77params;
86  
87          /**
88           * Sets up custom a custom block size for the LZ4 stream but otherwise uses the defaults of enabled content checksum, disabled block checksums and
89           * independent blocks.
90           *
91           * @param blockSize the size of a single block.
92           */
93          public Parameters(final BlockSize blockSize) {
94              this(blockSize, true, false, false);
95          }
96  
97          /**
98           * Sets up custom parameters for the LZ4 stream.
99           *
100          * @param blockSize           the size of a single block.
101          * @param withContentChecksum whether to write a content checksum
102          * @param withBlockChecksum   whether to write a block checksum. Note that block checksums are not supported by the lz4 command line utility
103          * @param withBlockDependency whether a block may depend on the content of a previous block. Enabling this may improve compression ratio but makes it
104          *                            impossible to decompress the output in parallel.
105          */
106         public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, final boolean withBlockDependency) {
107             this(blockSize, withContentChecksum, withBlockChecksum, withBlockDependency, BlockLZ4CompressorOutputStream.createParameterBuilder().build());
108         }
109 
110         /**
111          * Sets up custom parameters for the LZ4 stream.
112          *
113          * @param blockSize           the size of a single block.
114          * @param withContentChecksum whether to write a content checksum
115          * @param withBlockChecksum   whether to write a block checksum. Note that block checksums are not supported by the lz4 command line utility
116          * @param withBlockDependency whether a block may depend on the content of a previous block. Enabling this may improve compression ratio but makes it
117          *                            impossible to decompress the output in parallel.
118          * @param lz77params          parameters used to fine-tune compression, in particular to balance compression ratio vs compression speed.
119          */
120         public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, final boolean withBlockDependency,
121                 final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) {
122             this.blockSize = blockSize;
123             this.withContentChecksum = withContentChecksum;
124             this.withBlockChecksum = withBlockChecksum;
125             this.withBlockDependency = withBlockDependency;
126             this.lz77params = lz77params;
127         }
128 
129         /**
130          * Sets up custom a custom block size for the LZ4 stream but otherwise uses the defaults of enabled content checksum, disabled block checksums and
131          * independent blocks.
132          *
133          * @param blockSize  the size of a single block.
134          * @param lz77params parameters used to fine-tune compression, in particular to balance compression ratio vs compression speed.
135          */
136         public Parameters(final BlockSize blockSize, final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) {
137             this(blockSize, true, false, false, lz77params);
138         }
139 
140         @Override
141         public String toString() {
142             return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum + ", withBlockChecksum " + withBlockChecksum
143                     + ", withBlockDependency " + withBlockDependency;
144         }
145     }
146 
147     private static final byte[] END_MARK = new byte[4];
148     // used in one-arg write method
149     private final byte[] oneByte = new byte[1];
150     private final byte[] blockData;
151     private final OutputStream out;
152     private final Parameters params;
153 
154     private boolean finished;
155 
156     // used for frame header checksum and content checksum, if requested
157     private final org.apache.commons.codec.digest.XXHash32 contentHash = new org.apache.commons.codec.digest.XXHash32();
158     // used for block checksum, if requested
159     private final org.apache.commons.codec.digest.XXHash32 blockHash;
160 
161     // only created if the config requires block dependency
162     private final byte[] blockDependencyBuffer;
163 
164     private int collectedBlockDependencyBytes;
165     private int currentIndex;
166 
167     /**
168      * Constructs a new output stream that compresses data using the LZ4 frame format using the default block size of 4MB.
169      *
170      * @param out the OutputStream to which to write the compressed data
171      * @throws IOException if writing the signature fails
172      */
173     public FramedLZ4CompressorOutputStream(final OutputStream out) throws IOException {
174         this(out, Parameters.DEFAULT);
175     }
176 
177     /**
178      * Constructs a new output stream that compresses data using the LZ4 frame format using the given block size.
179      *
180      * @param out    the OutputStream to which to write the compressed data
181      * @param params the parameters to use
182      * @throws IOException if writing the signature fails
183      */
184     public FramedLZ4CompressorOutputStream(final OutputStream out, final Parameters params) throws IOException {
185         this.params = params;
186         blockData = new byte[params.blockSize.getSize()];
187         this.out = out;
188         blockHash = params.withBlockChecksum ? new org.apache.commons.codec.digest.XXHash32() : null;
189         out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE);
190         writeFrameDescriptor();
191         blockDependencyBuffer = params.withBlockDependency ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE] : null;
192     }
193 
194     private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) {
195         len = Math.min(len, blockDependencyBuffer.length);
196         if (len > 0) {
197             final int keep = blockDependencyBuffer.length - len;
198             if (keep > 0) {
199                 // move last keep bytes towards the start of the buffer
200                 System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep);
201             }
202             // append new data
203             System.arraycopy(b, off, blockDependencyBuffer, keep, len);
204             collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len, blockDependencyBuffer.length);
205         }
206     }
207 
208     @Override
209     public void close() throws IOException {
210         try {
211             finish();
212         } finally {
213             out.close();
214         }
215     }
216 
217     /**
218      * Compresses all blockDataRemaining data and writes it to the stream, doesn't close the underlying stream.
219      *
220      * @throws IOException if an error occurs
221      */
222     public void finish() throws IOException {
223         if (!finished) {
224             flushBlock();
225             writeTrailer();
226             finished = true;
227         }
228     }
229 
230     private void flushBlock() throws IOException {
231         if (currentIndex == 0) {
232             return;
233         }
234         final boolean withBlockDependency = params.withBlockDependency;
235         final ByteArrayOutputStream baos = new ByteArrayOutputStream();
236         try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos, params.lz77params)) {
237             if (withBlockDependency) {
238                 o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes, collectedBlockDependencyBytes);
239             }
240             o.write(blockData, 0, currentIndex);
241         }
242         if (withBlockDependency) {
243             appendToBlockDependencyBuffer(blockData, 0, currentIndex);
244         }
245         final byte[] b = baos.toByteArray();
246         if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize
247             ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK, 4);
248             out.write(blockData, 0, currentIndex);
249             if (params.withBlockChecksum) {
250                 blockHash.update(blockData, 0, currentIndex);
251             }
252         } else {
253             ByteUtils.toLittleEndian(out, b.length, 4);
254             out.write(b);
255             if (params.withBlockChecksum) {
256                 blockHash.update(b, 0, b.length);
257             }
258         }
259         if (params.withBlockChecksum) {
260             ByteUtils.toLittleEndian(out, blockHash.getValue(), 4);
261             blockHash.reset();
262         }
263         currentIndex = 0;
264     }
265 
266     @Override
267     public void write(final byte[] data, int off, int len) throws IOException {
268         if (params.withContentChecksum) {
269             contentHash.update(data, off, len);
270         }
271         int blockDataRemaining = blockData.length - currentIndex;
272         while (len > 0) {
273             final int copyLen = Math.min(len, blockDataRemaining);
274             System.arraycopy(data, off, blockData, currentIndex, copyLen);
275             off += copyLen;
276             blockDataRemaining -= copyLen;
277             len -= copyLen;
278             currentIndex += copyLen;
279             if (blockDataRemaining == 0) {
280                 flushBlock();
281                 blockDataRemaining = blockData.length;
282             }
283         }
284     }
285 
286     @Override
287     public void write(final int b) throws IOException {
288         oneByte[0] = (byte) (b & 0xff);
289         write(oneByte);
290     }
291 
292     private void writeFrameDescriptor() throws IOException {
293         int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION;
294         if (!params.withBlockDependency) {
295             flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK;
296         }
297         if (params.withContentChecksum) {
298             flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK;
299         }
300         if (params.withBlockChecksum) {
301             flags |= FramedLZ4CompressorInputStream.BLOCK_CHECKSUM_MASK;
302         }
303         out.write(flags);
304         contentHash.update(flags);
305         final int bd = params.blockSize.getIndex() << 4 & FramedLZ4CompressorInputStream.BLOCK_MAX_SIZE_MASK;
306         out.write(bd);
307         contentHash.update(bd);
308         out.write((int) (contentHash.getValue() >> 8 & 0xff));
309         contentHash.reset();
310     }
311 
312     private void writeTrailer() throws IOException {
313         out.write(END_MARK);
314         if (params.withContentChecksum) {
315             ByteUtils.toLittleEndian(out, contentHash.getValue(), 4);
316         }
317     }
318 
319 }