001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lz4;
020
021import java.io.ByteArrayOutputStream;
022import java.io.IOException;
023import java.io.OutputStream;
024
025import org.apache.commons.compress.compressors.CompressorOutputStream;
026import org.apache.commons.compress.utils.ByteUtils;
027
028/**
029 * CompressorOutputStream for the LZ4 frame format.
030 *
031 * <p>
032 * Based on the "spec" in the version "1.5.1 (31/03/2015)"
033 * </p>
034 *
035 * @see <a href="https://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a>
036 * @since 1.14
037 * @NotThreadSafe
038 */
039public class FramedLZ4CompressorOutputStream extends CompressorOutputStream<OutputStream> {
040
041    /**
042     * Enumerates the block sizes supported by the format.
043     */
044    public enum BlockSize {
045
046        /** Block size of 64K. */
047        K64(64 * 1024, 4),
048
049        /** Block size of 256K. */
050        K256(256 * 1024, 5),
051
052        /** Block size of 1M. */
053        M1(1024 * 1024, 6),
054
055        /** Block size of 4M. */
056        M4(4096 * 1024, 7);
057
058        private final int size;
059        private final int index;
060
061        BlockSize(final int size, final int index) {
062            this.size = size;
063            this.index = index;
064        }
065
066        int getIndex() {
067            return index;
068        }
069
070        int getSize() {
071            return size;
072        }
073    }
074
075    /**
076     * Parameters of the LZ4 frame format.
077     */
078    public static class Parameters {
079
080        /**
081         * The default parameters of 4M block size, enabled content checksum, disabled block checksums and independent blocks.
082         *
083         * <p>
084         * This matches the defaults of the lz4 command line utility.
085         * </p>
086         */
087        public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false);
088        private final BlockSize blockSize;
089        private final boolean withContentChecksum;
090        private final boolean withBlockChecksum;
091        private final boolean withBlockDependency;
092
093        private final org.apache.commons.compress.compressors.lz77support.Parameters lz77params;
094
095        /**
096         * Sets up custom a custom block size for the LZ4 stream but otherwise uses the defaults of enabled content checksum, disabled block checksums and
097         * independent blocks.
098         *
099         * @param blockSize the size of a single block.
100         */
101        public Parameters(final BlockSize blockSize) {
102            this(blockSize, true, false, false);
103        }
104
105        /**
106         * Sets up custom parameters for the LZ4 stream.
107         *
108         * @param blockSize           the size of a single block.
109         * @param withContentChecksum whether to write a content checksum
110         * @param withBlockChecksum   whether to write a block checksum. Note that block checksums are not supported by the lz4 command line utility
111         * @param withBlockDependency whether a block may depend on the content of a previous block. Enabling this may improve compression ratio but makes it
112         *                            impossible to decompress the output in parallel.
113         */
114        public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, final boolean withBlockDependency) {
115            this(blockSize, withContentChecksum, withBlockChecksum, withBlockDependency, BlockLZ4CompressorOutputStream.createParameterBuilder().build());
116        }
117
118        /**
119         * Sets up custom parameters for the LZ4 stream.
120         *
121         * @param blockSize           the size of a single block.
122         * @param withContentChecksum whether to write a content checksum
123         * @param withBlockChecksum   whether to write a block checksum. Note that block checksums are not supported by the lz4 command line utility
124         * @param withBlockDependency whether a block may depend on the content of a previous block. Enabling this may improve compression ratio but makes it
125         *                            impossible to decompress the output in parallel.
126         * @param lz77params          parameters used to fine-tune compression, in particular to balance compression ratio vs compression speed.
127         */
128        public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, final boolean withBlockDependency,
129                final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) {
130            this.blockSize = blockSize;
131            this.withContentChecksum = withContentChecksum;
132            this.withBlockChecksum = withBlockChecksum;
133            this.withBlockDependency = withBlockDependency;
134            this.lz77params = lz77params;
135        }
136
137        /**
138         * Sets up custom a custom block size for the LZ4 stream but otherwise uses the defaults of enabled content checksum, disabled block checksums and
139         * independent blocks.
140         *
141         * @param blockSize  the size of a single block.
142         * @param lz77params parameters used to fine-tune compression, in particular to balance compression ratio vs compression speed.
143         */
144        public Parameters(final BlockSize blockSize, final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) {
145            this(blockSize, true, false, false, lz77params);
146        }
147
148        @Override
149        public String toString() {
150            return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum + ", withBlockChecksum " + withBlockChecksum
151                    + ", withBlockDependency " + withBlockDependency;
152        }
153    }
154
155    private static final byte[] END_MARK = new byte[4];
156    // used in one-arg write method
157    private final byte[] oneByte = new byte[1];
158    private final byte[] blockData;
159    private final Parameters params;
160
161    // used for frame header checksum and content checksum, if requested
162    private final org.apache.commons.codec.digest.XXHash32 contentHash = new org.apache.commons.codec.digest.XXHash32();
163    // used for block checksum, if requested
164    private final org.apache.commons.codec.digest.XXHash32 blockHash;
165
166    // only created if the config requires block dependency
167    private final byte[] blockDependencyBuffer;
168
169    private int collectedBlockDependencyBytes;
170    private int currentIndex;
171
172    /**
173     * Constructs a new output stream that compresses data using the LZ4 frame format using the default block size of 4MB.
174     *
175     * @param out the OutputStream to which to write the compressed data
176     * @throws IOException if writing the signature fails
177     */
178    public FramedLZ4CompressorOutputStream(final OutputStream out) throws IOException {
179        this(out, Parameters.DEFAULT);
180    }
181
182    /**
183     * Constructs a new output stream that compresses data using the LZ4 frame format using the given block size.
184     *
185     * @param out    the OutputStream to which to write the compressed data
186     * @param params the parameters to use
187     * @throws IOException if writing the signature fails
188     */
189    public FramedLZ4CompressorOutputStream(final OutputStream out, final Parameters params) throws IOException {
190        super(out);
191        this.params = params;
192        blockData = new byte[params.blockSize.getSize()];
193        blockHash = params.withBlockChecksum ? new org.apache.commons.codec.digest.XXHash32() : null;
194        out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE);
195        writeFrameDescriptor();
196        blockDependencyBuffer = params.withBlockDependency ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE] : null;
197    }
198
199    private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) {
200        len = Math.min(len, blockDependencyBuffer.length);
201        if (len > 0) {
202            final int keep = blockDependencyBuffer.length - len;
203            if (keep > 0) {
204                // move last keep bytes towards the start of the buffer
205                System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep);
206            }
207            // append new data
208            System.arraycopy(b, off, blockDependencyBuffer, keep, len);
209            collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len, blockDependencyBuffer.length);
210        }
211    }
212
213    @Override
214    public void close() throws IOException {
215        try {
216            finish();
217        } finally {
218            super.close();
219        }
220    }
221
222    /**
223     * Compresses all blockDataRemaining data and writes it to the stream, doesn't close the underlying stream.
224     *
225     * @throws IOException if an error occurs
226     */
227    @Override
228    public void finish() throws IOException {
229        if (!isFinished()) {
230            flushBlock();
231            writeTrailer();
232            super.finish();
233        }
234    }
235
236    private void flushBlock() throws IOException {
237        if (currentIndex == 0) {
238            return;
239        }
240        final boolean withBlockDependency = params.withBlockDependency;
241        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
242        try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos, params.lz77params)) {
243            if (withBlockDependency) {
244                o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes, collectedBlockDependencyBytes);
245            }
246            o.write(blockData, 0, currentIndex);
247        }
248        if (withBlockDependency) {
249            appendToBlockDependencyBuffer(blockData, 0, currentIndex);
250        }
251        final byte[] b = baos.toByteArray();
252        if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize
253            ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK, 4);
254            out.write(blockData, 0, currentIndex);
255            if (params.withBlockChecksum) {
256                blockHash.update(blockData, 0, currentIndex);
257            }
258        } else {
259            ByteUtils.toLittleEndian(out, b.length, 4);
260            out.write(b);
261            if (params.withBlockChecksum) {
262                blockHash.update(b, 0, b.length);
263            }
264        }
265        if (params.withBlockChecksum) {
266            ByteUtils.toLittleEndian(out, blockHash.getValue(), 4);
267            blockHash.reset();
268        }
269        currentIndex = 0;
270    }
271
272    @Override
273    public void write(final byte[] data, int off, int len) throws IOException {
274        if (params.withContentChecksum) {
275            contentHash.update(data, off, len);
276        }
277        int blockDataRemaining = blockData.length - currentIndex;
278        while (len > 0) {
279            final int copyLen = Math.min(len, blockDataRemaining);
280            System.arraycopy(data, off, blockData, currentIndex, copyLen);
281            off += copyLen;
282            blockDataRemaining -= copyLen;
283            len -= copyLen;
284            currentIndex += copyLen;
285            if (blockDataRemaining == 0) {
286                flushBlock();
287                blockDataRemaining = blockData.length;
288            }
289        }
290    }
291
292    @Override
293    public void write(final int b) throws IOException {
294        oneByte[0] = (byte) (b & 0xff);
295        write(oneByte);
296    }
297
298    private void writeFrameDescriptor() throws IOException {
299        int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION;
300        if (!params.withBlockDependency) {
301            flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK;
302        }
303        if (params.withContentChecksum) {
304            flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK;
305        }
306        if (params.withBlockChecksum) {
307            flags |= FramedLZ4CompressorInputStream.BLOCK_CHECKSUM_MASK;
308        }
309        out.write(flags);
310        contentHash.update(flags);
311        final int bd = params.blockSize.getIndex() << 4 & FramedLZ4CompressorInputStream.BLOCK_MAX_SIZE_MASK;
312        out.write(bd);
313        contentHash.update(bd);
314        out.write((int) (contentHash.getValue() >> 8 & 0xff));
315        contentHash.reset();
316    }
317
318    private void writeTrailer() throws IOException {
319        out.write(END_MARK);
320        if (params.withContentChecksum) {
321            ByteUtils.toLittleEndian(out, contentHash.getValue(), 4);
322        }
323    }
324
325}