001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lz4;
020
021import java.io.ByteArrayOutputStream;
022import java.io.IOException;
023import java.io.OutputStream;
024
025import org.apache.commons.compress.compressors.CompressorOutputStream;
026import org.apache.commons.compress.utils.ByteUtils;
027
028/**
029 * CompressorOutputStream for the LZ4 frame format.
030 *
031 * <p>
032 * Based on the "spec" in the version "1.5.1 (31/03/2015)"
033 * </p>
034 *
035 * @see <a href="https://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a>
036 * @since 1.14
037 * @NotThreadSafe
038 */
039public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
040
041    /**
042     * The block sizes supported by the format.
043     */
044    public enum BlockSize {
045        /** Block size of 64K */
046        K64(64 * 1024, 4),
047        /** Block size of 256K */
048        K256(256 * 1024, 5),
049        /** Block size of 1M */
050        M1(1024 * 1024, 6),
051        /** Block size of 4M */
052        M4(4096 * 1024, 7);
053
054        private final int size, index;
055
056        BlockSize(final int size, final int index) {
057            this.size = size;
058            this.index = index;
059        }
060
061        int getIndex() {
062            return index;
063        }
064
065        int getSize() {
066            return size;
067        }
068    }
069
070    /**
071     * Parameters of the LZ4 frame format.
072     */
073    public static class Parameters {
074        /**
075         * The default parameters of 4M block size, enabled content checksum, disabled block checksums and independent blocks.
076         *
077         * <p>
078         * This matches the defaults of the lz4 command line utility.
079         * </p>
080         */
081        public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false);
082        private final BlockSize blockSize;
083        private final boolean withContentChecksum, withBlockChecksum, withBlockDependency;
084
085        private final org.apache.commons.compress.compressors.lz77support.Parameters lz77params;
086
087        /**
088         * Sets up custom a custom block size for the LZ4 stream but otherwise uses the defaults of enabled content checksum, disabled block checksums and
089         * independent blocks.
090         *
091         * @param blockSize the size of a single block.
092         */
093        public Parameters(final BlockSize blockSize) {
094            this(blockSize, true, false, false);
095        }
096
097        /**
098         * Sets up custom parameters for the LZ4 stream.
099         *
100         * @param blockSize           the size of a single block.
101         * @param withContentChecksum whether to write a content checksum
102         * @param withBlockChecksum   whether to write a block checksum. Note that block checksums are not supported by the lz4 command line utility
103         * @param withBlockDependency whether a block may depend on the content of a previous block. Enabling this may improve compression ratio but makes it
104         *                            impossible to decompress the output in parallel.
105         */
106        public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, final boolean withBlockDependency) {
107            this(blockSize, withContentChecksum, withBlockChecksum, withBlockDependency, BlockLZ4CompressorOutputStream.createParameterBuilder().build());
108        }
109
110        /**
111         * Sets up custom parameters for the LZ4 stream.
112         *
113         * @param blockSize           the size of a single block.
114         * @param withContentChecksum whether to write a content checksum
115         * @param withBlockChecksum   whether to write a block checksum. Note that block checksums are not supported by the lz4 command line utility
116         * @param withBlockDependency whether a block may depend on the content of a previous block. Enabling this may improve compression ratio but makes it
117         *                            impossible to decompress the output in parallel.
118         * @param lz77params          parameters used to fine-tune compression, in particular to balance compression ratio vs compression speed.
119         */
120        public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, final boolean withBlockDependency,
121                final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) {
122            this.blockSize = blockSize;
123            this.withContentChecksum = withContentChecksum;
124            this.withBlockChecksum = withBlockChecksum;
125            this.withBlockDependency = withBlockDependency;
126            this.lz77params = lz77params;
127        }
128
129        /**
130         * Sets up custom a custom block size for the LZ4 stream but otherwise uses the defaults of enabled content checksum, disabled block checksums and
131         * independent blocks.
132         *
133         * @param blockSize  the size of a single block.
134         * @param lz77params parameters used to fine-tune compression, in particular to balance compression ratio vs compression speed.
135         */
136        public Parameters(final BlockSize blockSize, final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) {
137            this(blockSize, true, false, false, lz77params);
138        }
139
140        @Override
141        public String toString() {
142            return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum + ", withBlockChecksum " + withBlockChecksum
143                    + ", withBlockDependency " + withBlockDependency;
144        }
145    }
146
147    private static final byte[] END_MARK = new byte[4];
148    // used in one-arg write method
149    private final byte[] oneByte = new byte[1];
150    private final byte[] blockData;
151    private final OutputStream out;
152    private final Parameters params;
153
154    private boolean finished;
155
156    // used for frame header checksum and content checksum, if requested
157    private final org.apache.commons.codec.digest.XXHash32 contentHash = new org.apache.commons.codec.digest.XXHash32();
158    // used for block checksum, if requested
159    private final org.apache.commons.codec.digest.XXHash32 blockHash;
160
161    // only created if the config requires block dependency
162    private final byte[] blockDependencyBuffer;
163
164    private int collectedBlockDependencyBytes;
165    private int currentIndex;
166
167    /**
168     * Constructs a new output stream that compresses data using the LZ4 frame format using the default block size of 4MB.
169     *
170     * @param out the OutputStream to which to write the compressed data
171     * @throws IOException if writing the signature fails
172     */
173    public FramedLZ4CompressorOutputStream(final OutputStream out) throws IOException {
174        this(out, Parameters.DEFAULT);
175    }
176
177    /**
178     * Constructs a new output stream that compresses data using the LZ4 frame format using the given block size.
179     *
180     * @param out    the OutputStream to which to write the compressed data
181     * @param params the parameters to use
182     * @throws IOException if writing the signature fails
183     */
184    public FramedLZ4CompressorOutputStream(final OutputStream out, final Parameters params) throws IOException {
185        this.params = params;
186        blockData = new byte[params.blockSize.getSize()];
187        this.out = out;
188        blockHash = params.withBlockChecksum ? new org.apache.commons.codec.digest.XXHash32() : null;
189        out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE);
190        writeFrameDescriptor();
191        blockDependencyBuffer = params.withBlockDependency ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE] : null;
192    }
193
194    private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) {
195        len = Math.min(len, blockDependencyBuffer.length);
196        if (len > 0) {
197            final int keep = blockDependencyBuffer.length - len;
198            if (keep > 0) {
199                // move last keep bytes towards the start of the buffer
200                System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep);
201            }
202            // append new data
203            System.arraycopy(b, off, blockDependencyBuffer, keep, len);
204            collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len, blockDependencyBuffer.length);
205        }
206    }
207
208    @Override
209    public void close() throws IOException {
210        try {
211            finish();
212        } finally {
213            out.close();
214        }
215    }
216
217    /**
218     * Compresses all blockDataRemaining data and writes it to the stream, doesn't close the underlying stream.
219     *
220     * @throws IOException if an error occurs
221     */
222    public void finish() throws IOException {
223        if (!finished) {
224            flushBlock();
225            writeTrailer();
226            finished = true;
227        }
228    }
229
230    private void flushBlock() throws IOException {
231        if (currentIndex == 0) {
232            return;
233        }
234        final boolean withBlockDependency = params.withBlockDependency;
235        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
236        try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos, params.lz77params)) {
237            if (withBlockDependency) {
238                o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes, collectedBlockDependencyBytes);
239            }
240            o.write(blockData, 0, currentIndex);
241        }
242        if (withBlockDependency) {
243            appendToBlockDependencyBuffer(blockData, 0, currentIndex);
244        }
245        final byte[] b = baos.toByteArray();
246        if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize
247            ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK, 4);
248            out.write(blockData, 0, currentIndex);
249            if (params.withBlockChecksum) {
250                blockHash.update(blockData, 0, currentIndex);
251            }
252        } else {
253            ByteUtils.toLittleEndian(out, b.length, 4);
254            out.write(b);
255            if (params.withBlockChecksum) {
256                blockHash.update(b, 0, b.length);
257            }
258        }
259        if (params.withBlockChecksum) {
260            ByteUtils.toLittleEndian(out, blockHash.getValue(), 4);
261            blockHash.reset();
262        }
263        currentIndex = 0;
264    }
265
266    @Override
267    public void write(final byte[] data, int off, int len) throws IOException {
268        if (params.withContentChecksum) {
269            contentHash.update(data, off, len);
270        }
271        int blockDataRemaining = blockData.length - currentIndex;
272        while (len > 0) {
273            final int copyLen = Math.min(len, blockDataRemaining);
274            System.arraycopy(data, off, blockData, currentIndex, copyLen);
275            off += copyLen;
276            blockDataRemaining -= copyLen;
277            len -= copyLen;
278            currentIndex += copyLen;
279            if (blockDataRemaining == 0) {
280                flushBlock();
281                blockDataRemaining = blockData.length;
282            }
283        }
284    }
285
286    @Override
287    public void write(final int b) throws IOException {
288        oneByte[0] = (byte) (b & 0xff);
289        write(oneByte);
290    }
291
292    private void writeFrameDescriptor() throws IOException {
293        int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION;
294        if (!params.withBlockDependency) {
295            flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK;
296        }
297        if (params.withContentChecksum) {
298            flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK;
299        }
300        if (params.withBlockChecksum) {
301            flags |= FramedLZ4CompressorInputStream.BLOCK_CHECKSUM_MASK;
302        }
303        out.write(flags);
304        contentHash.update(flags);
305        final int bd = params.blockSize.getIndex() << 4 & FramedLZ4CompressorInputStream.BLOCK_MAX_SIZE_MASK;
306        out.write(bd);
307        contentHash.update(bd);
308        out.write((int) (contentHash.getValue() >> 8 & 0xff));
309        contentHash.reset();
310    }
311
312    private void writeTrailer() throws IOException {
313        out.write(END_MARK);
314        if (params.withContentChecksum) {
315            ByteUtils.toLittleEndian(out, contentHash.getValue(), 4);
316        }
317    }
318
319}