001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lz4; 020 021import java.io.ByteArrayOutputStream; 022import java.io.IOException; 023import java.io.OutputStream; 024 025import org.apache.commons.compress.compressors.CompressorOutputStream; 026import org.apache.commons.compress.utils.ByteUtils; 027 028/** 029 * CompressorOutputStream for the LZ4 frame format. 030 * 031 * <p> 032 * Based on the "spec" in the version "1.5.1 (31/03/2015)" 033 * </p> 034 * 035 * @see <a href="https://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a> 036 * @since 1.14 037 * @NotThreadSafe 038 */ 039public class FramedLZ4CompressorOutputStream extends CompressorOutputStream<OutputStream> { 040 041 /** 042 * Enumerates the block sizes supported by the format. 043 */ 044 public enum BlockSize { 045 046 /** Block size of 64K. */ 047 K64(64 * 1024, 4), 048 049 /** Block size of 256K. */ 050 K256(256 * 1024, 5), 051 052 /** Block size of 1M. */ 053 M1(1024 * 1024, 6), 054 055 /** Block size of 4M. */ 056 M4(4096 * 1024, 7); 057 058 private final int size; 059 private final int index; 060 061 BlockSize(final int size, final int index) { 062 this.size = size; 063 this.index = index; 064 } 065 066 int getIndex() { 067 return index; 068 } 069 070 int getSize() { 071 return size; 072 } 073 } 074 075 /** 076 * Parameters of the LZ4 frame format. 077 */ 078 public static class Parameters { 079 080 /** 081 * The default parameters of 4M block size, enabled content checksum, disabled block checksums and independent blocks. 082 * 083 * <p> 084 * This matches the defaults of the lz4 command line utility. 085 * </p> 086 */ 087 public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false); 088 private final BlockSize blockSize; 089 private final boolean withContentChecksum; 090 private final boolean withBlockChecksum; 091 private final boolean withBlockDependency; 092 093 private final org.apache.commons.compress.compressors.lz77support.Parameters lz77params; 094 095 /** 096 * Sets up custom a custom block size for the LZ4 stream but otherwise uses the defaults of enabled content checksum, disabled block checksums and 097 * independent blocks. 098 * 099 * @param blockSize the size of a single block. 100 */ 101 public Parameters(final BlockSize blockSize) { 102 this(blockSize, true, false, false); 103 } 104 105 /** 106 * Sets up custom parameters for the LZ4 stream. 107 * 108 * @param blockSize the size of a single block. 109 * @param withContentChecksum whether to write a content checksum 110 * @param withBlockChecksum whether to write a block checksum. Note that block checksums are not supported by the lz4 command line utility 111 * @param withBlockDependency whether a block may depend on the content of a previous block. Enabling this may improve compression ratio but makes it 112 * impossible to decompress the output in parallel. 113 */ 114 public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, final boolean withBlockDependency) { 115 this(blockSize, withContentChecksum, withBlockChecksum, withBlockDependency, BlockLZ4CompressorOutputStream.createParameterBuilder().build()); 116 } 117 118 /** 119 * Sets up custom parameters for the LZ4 stream. 120 * 121 * @param blockSize the size of a single block. 122 * @param withContentChecksum whether to write a content checksum 123 * @param withBlockChecksum whether to write a block checksum. Note that block checksums are not supported by the lz4 command line utility 124 * @param withBlockDependency whether a block may depend on the content of a previous block. Enabling this may improve compression ratio but makes it 125 * impossible to decompress the output in parallel. 126 * @param lz77params parameters used to fine-tune compression, in particular to balance compression ratio vs compression speed. 127 */ 128 public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, final boolean withBlockDependency, 129 final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) { 130 this.blockSize = blockSize; 131 this.withContentChecksum = withContentChecksum; 132 this.withBlockChecksum = withBlockChecksum; 133 this.withBlockDependency = withBlockDependency; 134 this.lz77params = lz77params; 135 } 136 137 /** 138 * Sets up custom a custom block size for the LZ4 stream but otherwise uses the defaults of enabled content checksum, disabled block checksums and 139 * independent blocks. 140 * 141 * @param blockSize the size of a single block. 142 * @param lz77params parameters used to fine-tune compression, in particular to balance compression ratio vs compression speed. 143 */ 144 public Parameters(final BlockSize blockSize, final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) { 145 this(blockSize, true, false, false, lz77params); 146 } 147 148 @Override 149 public String toString() { 150 return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum + ", withBlockChecksum " + withBlockChecksum 151 + ", withBlockDependency " + withBlockDependency; 152 } 153 } 154 155 private static final byte[] END_MARK = new byte[4]; 156 // used in one-arg write method 157 private final byte[] oneByte = new byte[1]; 158 private final byte[] blockData; 159 private final Parameters params; 160 161 // used for frame header checksum and content checksum, if requested 162 private final org.apache.commons.codec.digest.XXHash32 contentHash = new org.apache.commons.codec.digest.XXHash32(); 163 // used for block checksum, if requested 164 private final org.apache.commons.codec.digest.XXHash32 blockHash; 165 166 // only created if the config requires block dependency 167 private final byte[] blockDependencyBuffer; 168 169 private int collectedBlockDependencyBytes; 170 private int currentIndex; 171 172 /** 173 * Constructs a new output stream that compresses data using the LZ4 frame format using the default block size of 4MB. 174 * 175 * @param out the OutputStream to which to write the compressed data 176 * @throws IOException if writing the signature fails 177 */ 178 public FramedLZ4CompressorOutputStream(final OutputStream out) throws IOException { 179 this(out, Parameters.DEFAULT); 180 } 181 182 /** 183 * Constructs a new output stream that compresses data using the LZ4 frame format using the given block size. 184 * 185 * @param out the OutputStream to which to write the compressed data 186 * @param params the parameters to use 187 * @throws IOException if writing the signature fails 188 */ 189 public FramedLZ4CompressorOutputStream(final OutputStream out, final Parameters params) throws IOException { 190 super(out); 191 this.params = params; 192 blockData = new byte[params.blockSize.getSize()]; 193 blockHash = params.withBlockChecksum ? new org.apache.commons.codec.digest.XXHash32() : null; 194 out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE); 195 writeFrameDescriptor(); 196 blockDependencyBuffer = params.withBlockDependency ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE] : null; 197 } 198 199 private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) { 200 len = Math.min(len, blockDependencyBuffer.length); 201 if (len > 0) { 202 final int keep = blockDependencyBuffer.length - len; 203 if (keep > 0) { 204 // move last keep bytes towards the start of the buffer 205 System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep); 206 } 207 // append new data 208 System.arraycopy(b, off, blockDependencyBuffer, keep, len); 209 collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len, blockDependencyBuffer.length); 210 } 211 } 212 213 @Override 214 public void close() throws IOException { 215 try { 216 finish(); 217 } finally { 218 super.close(); 219 } 220 } 221 222 /** 223 * Compresses all blockDataRemaining data and writes it to the stream, doesn't close the underlying stream. 224 * 225 * @throws IOException if an error occurs 226 */ 227 @Override 228 public void finish() throws IOException { 229 if (!isFinished()) { 230 flushBlock(); 231 writeTrailer(); 232 super.finish(); 233 } 234 } 235 236 private void flushBlock() throws IOException { 237 if (currentIndex == 0) { 238 return; 239 } 240 final boolean withBlockDependency = params.withBlockDependency; 241 final ByteArrayOutputStream baos = new ByteArrayOutputStream(); 242 try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos, params.lz77params)) { 243 if (withBlockDependency) { 244 o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes, collectedBlockDependencyBytes); 245 } 246 o.write(blockData, 0, currentIndex); 247 } 248 if (withBlockDependency) { 249 appendToBlockDependencyBuffer(blockData, 0, currentIndex); 250 } 251 final byte[] b = baos.toByteArray(); 252 if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize 253 ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK, 4); 254 out.write(blockData, 0, currentIndex); 255 if (params.withBlockChecksum) { 256 blockHash.update(blockData, 0, currentIndex); 257 } 258 } else { 259 ByteUtils.toLittleEndian(out, b.length, 4); 260 out.write(b); 261 if (params.withBlockChecksum) { 262 blockHash.update(b, 0, b.length); 263 } 264 } 265 if (params.withBlockChecksum) { 266 ByteUtils.toLittleEndian(out, blockHash.getValue(), 4); 267 blockHash.reset(); 268 } 269 currentIndex = 0; 270 } 271 272 @Override 273 public void write(final byte[] data, int off, int len) throws IOException { 274 if (params.withContentChecksum) { 275 contentHash.update(data, off, len); 276 } 277 int blockDataRemaining = blockData.length - currentIndex; 278 while (len > 0) { 279 final int copyLen = Math.min(len, blockDataRemaining); 280 System.arraycopy(data, off, blockData, currentIndex, copyLen); 281 off += copyLen; 282 blockDataRemaining -= copyLen; 283 len -= copyLen; 284 currentIndex += copyLen; 285 if (blockDataRemaining == 0) { 286 flushBlock(); 287 blockDataRemaining = blockData.length; 288 } 289 } 290 } 291 292 @Override 293 public void write(final int b) throws IOException { 294 oneByte[0] = (byte) (b & 0xff); 295 write(oneByte); 296 } 297 298 private void writeFrameDescriptor() throws IOException { 299 int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION; 300 if (!params.withBlockDependency) { 301 flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK; 302 } 303 if (params.withContentChecksum) { 304 flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK; 305 } 306 if (params.withBlockChecksum) { 307 flags |= FramedLZ4CompressorInputStream.BLOCK_CHECKSUM_MASK; 308 } 309 out.write(flags); 310 contentHash.update(flags); 311 final int bd = params.blockSize.getIndex() << 4 & FramedLZ4CompressorInputStream.BLOCK_MAX_SIZE_MASK; 312 out.write(bd); 313 contentHash.update(bd); 314 out.write((int) (contentHash.getValue() >> 8 & 0xff)); 315 contentHash.reset(); 316 } 317 318 private void writeTrailer() throws IOException { 319 out.write(END_MARK); 320 if (params.withContentChecksum) { 321 ByteUtils.toLittleEndian(out, contentHash.getValue(), 4); 322 } 323 } 324 325}