001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lz4; 020 021import java.io.ByteArrayOutputStream; 022import java.io.IOException; 023import java.io.OutputStream; 024 025import org.apache.commons.compress.compressors.CompressorOutputStream; 026import org.apache.commons.compress.utils.ByteUtils; 027 028/** 029 * CompressorOutputStream for the LZ4 frame format. 030 * 031 * <p> 032 * Based on the "spec" in the version "1.5.1 (31/03/2015)" 033 * </p> 034 * 035 * @see <a href="https://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a> 036 * @since 1.14 037 * @NotThreadSafe 038 */ 039public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { 040 041 /** 042 * The block sizes supported by the format. 043 */ 044 public enum BlockSize { 045 /** Block size of 64K */ 046 K64(64 * 1024, 4), 047 /** Block size of 256K */ 048 K256(256 * 1024, 5), 049 /** Block size of 1M */ 050 M1(1024 * 1024, 6), 051 /** Block size of 4M */ 052 M4(4096 * 1024, 7); 053 054 private final int size, index; 055 056 BlockSize(final int size, final int index) { 057 this.size = size; 058 this.index = index; 059 } 060 061 int getIndex() { 062 return index; 063 } 064 065 int getSize() { 066 return size; 067 } 068 } 069 070 /** 071 * Parameters of the LZ4 frame format. 072 */ 073 public static class Parameters { 074 /** 075 * The default parameters of 4M block size, enabled content checksum, disabled block checksums and independent blocks. 076 * 077 * <p> 078 * This matches the defaults of the lz4 command line utility. 079 * </p> 080 */ 081 public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false); 082 private final BlockSize blockSize; 083 private final boolean withContentChecksum, withBlockChecksum, withBlockDependency; 084 085 private final org.apache.commons.compress.compressors.lz77support.Parameters lz77params; 086 087 /** 088 * Sets up custom a custom block size for the LZ4 stream but otherwise uses the defaults of enabled content checksum, disabled block checksums and 089 * independent blocks. 090 * 091 * @param blockSize the size of a single block. 092 */ 093 public Parameters(final BlockSize blockSize) { 094 this(blockSize, true, false, false); 095 } 096 097 /** 098 * Sets up custom parameters for the LZ4 stream. 099 * 100 * @param blockSize the size of a single block. 101 * @param withContentChecksum whether to write a content checksum 102 * @param withBlockChecksum whether to write a block checksum. Note that block checksums are not supported by the lz4 command line utility 103 * @param withBlockDependency whether a block may depend on the content of a previous block. Enabling this may improve compression ratio but makes it 104 * impossible to decompress the output in parallel. 105 */ 106 public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, final boolean withBlockDependency) { 107 this(blockSize, withContentChecksum, withBlockChecksum, withBlockDependency, BlockLZ4CompressorOutputStream.createParameterBuilder().build()); 108 } 109 110 /** 111 * Sets up custom parameters for the LZ4 stream. 112 * 113 * @param blockSize the size of a single block. 114 * @param withContentChecksum whether to write a content checksum 115 * @param withBlockChecksum whether to write a block checksum. Note that block checksums are not supported by the lz4 command line utility 116 * @param withBlockDependency whether a block may depend on the content of a previous block. Enabling this may improve compression ratio but makes it 117 * impossible to decompress the output in parallel. 118 * @param lz77params parameters used to fine-tune compression, in particular to balance compression ratio vs compression speed. 119 */ 120 public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, final boolean withBlockDependency, 121 final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) { 122 this.blockSize = blockSize; 123 this.withContentChecksum = withContentChecksum; 124 this.withBlockChecksum = withBlockChecksum; 125 this.withBlockDependency = withBlockDependency; 126 this.lz77params = lz77params; 127 } 128 129 /** 130 * Sets up custom a custom block size for the LZ4 stream but otherwise uses the defaults of enabled content checksum, disabled block checksums and 131 * independent blocks. 132 * 133 * @param blockSize the size of a single block. 134 * @param lz77params parameters used to fine-tune compression, in particular to balance compression ratio vs compression speed. 135 */ 136 public Parameters(final BlockSize blockSize, final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) { 137 this(blockSize, true, false, false, lz77params); 138 } 139 140 @Override 141 public String toString() { 142 return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum + ", withBlockChecksum " + withBlockChecksum 143 + ", withBlockDependency " + withBlockDependency; 144 } 145 } 146 147 private static final byte[] END_MARK = new byte[4]; 148 // used in one-arg write method 149 private final byte[] oneByte = new byte[1]; 150 private final byte[] blockData; 151 private final OutputStream out; 152 private final Parameters params; 153 154 private boolean finished; 155 156 // used for frame header checksum and content checksum, if requested 157 private final org.apache.commons.codec.digest.XXHash32 contentHash = new org.apache.commons.codec.digest.XXHash32(); 158 // used for block checksum, if requested 159 private final org.apache.commons.codec.digest.XXHash32 blockHash; 160 161 // only created if the config requires block dependency 162 private final byte[] blockDependencyBuffer; 163 164 private int collectedBlockDependencyBytes; 165 private int currentIndex; 166 167 /** 168 * Constructs a new output stream that compresses data using the LZ4 frame format using the default block size of 4MB. 169 * 170 * @param out the OutputStream to which to write the compressed data 171 * @throws IOException if writing the signature fails 172 */ 173 public FramedLZ4CompressorOutputStream(final OutputStream out) throws IOException { 174 this(out, Parameters.DEFAULT); 175 } 176 177 /** 178 * Constructs a new output stream that compresses data using the LZ4 frame format using the given block size. 179 * 180 * @param out the OutputStream to which to write the compressed data 181 * @param params the parameters to use 182 * @throws IOException if writing the signature fails 183 */ 184 public FramedLZ4CompressorOutputStream(final OutputStream out, final Parameters params) throws IOException { 185 this.params = params; 186 blockData = new byte[params.blockSize.getSize()]; 187 this.out = out; 188 blockHash = params.withBlockChecksum ? new org.apache.commons.codec.digest.XXHash32() : null; 189 out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE); 190 writeFrameDescriptor(); 191 blockDependencyBuffer = params.withBlockDependency ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE] : null; 192 } 193 194 private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) { 195 len = Math.min(len, blockDependencyBuffer.length); 196 if (len > 0) { 197 final int keep = blockDependencyBuffer.length - len; 198 if (keep > 0) { 199 // move last keep bytes towards the start of the buffer 200 System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep); 201 } 202 // append new data 203 System.arraycopy(b, off, blockDependencyBuffer, keep, len); 204 collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len, blockDependencyBuffer.length); 205 } 206 } 207 208 @Override 209 public void close() throws IOException { 210 try { 211 finish(); 212 } finally { 213 out.close(); 214 } 215 } 216 217 /** 218 * Compresses all blockDataRemaining data and writes it to the stream, doesn't close the underlying stream. 219 * 220 * @throws IOException if an error occurs 221 */ 222 public void finish() throws IOException { 223 if (!finished) { 224 flushBlock(); 225 writeTrailer(); 226 finished = true; 227 } 228 } 229 230 private void flushBlock() throws IOException { 231 if (currentIndex == 0) { 232 return; 233 } 234 final boolean withBlockDependency = params.withBlockDependency; 235 final ByteArrayOutputStream baos = new ByteArrayOutputStream(); 236 try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos, params.lz77params)) { 237 if (withBlockDependency) { 238 o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes, collectedBlockDependencyBytes); 239 } 240 o.write(blockData, 0, currentIndex); 241 } 242 if (withBlockDependency) { 243 appendToBlockDependencyBuffer(blockData, 0, currentIndex); 244 } 245 final byte[] b = baos.toByteArray(); 246 if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize 247 ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK, 4); 248 out.write(blockData, 0, currentIndex); 249 if (params.withBlockChecksum) { 250 blockHash.update(blockData, 0, currentIndex); 251 } 252 } else { 253 ByteUtils.toLittleEndian(out, b.length, 4); 254 out.write(b); 255 if (params.withBlockChecksum) { 256 blockHash.update(b, 0, b.length); 257 } 258 } 259 if (params.withBlockChecksum) { 260 ByteUtils.toLittleEndian(out, blockHash.getValue(), 4); 261 blockHash.reset(); 262 } 263 currentIndex = 0; 264 } 265 266 @Override 267 public void write(final byte[] data, int off, int len) throws IOException { 268 if (params.withContentChecksum) { 269 contentHash.update(data, off, len); 270 } 271 int blockDataRemaining = blockData.length - currentIndex; 272 while (len > 0) { 273 final int copyLen = Math.min(len, blockDataRemaining); 274 System.arraycopy(data, off, blockData, currentIndex, copyLen); 275 off += copyLen; 276 blockDataRemaining -= copyLen; 277 len -= copyLen; 278 currentIndex += copyLen; 279 if (blockDataRemaining == 0) { 280 flushBlock(); 281 blockDataRemaining = blockData.length; 282 } 283 } 284 } 285 286 @Override 287 public void write(final int b) throws IOException { 288 oneByte[0] = (byte) (b & 0xff); 289 write(oneByte); 290 } 291 292 private void writeFrameDescriptor() throws IOException { 293 int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION; 294 if (!params.withBlockDependency) { 295 flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK; 296 } 297 if (params.withContentChecksum) { 298 flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK; 299 } 300 if (params.withBlockChecksum) { 301 flags |= FramedLZ4CompressorInputStream.BLOCK_CHECKSUM_MASK; 302 } 303 out.write(flags); 304 contentHash.update(flags); 305 final int bd = params.blockSize.getIndex() << 4 & FramedLZ4CompressorInputStream.BLOCK_MAX_SIZE_MASK; 306 out.write(bd); 307 contentHash.update(bd); 308 out.write((int) (contentHash.getValue() >> 8 & 0xff)); 309 contentHash.reset(); 310 } 311 312 private void writeTrailer() throws IOException { 313 out.write(END_MARK); 314 if (params.withContentChecksum) { 315 ByteUtils.toLittleEndian(out, contentHash.getValue(), 4); 316 } 317 } 318 319}