1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * https://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 20 package org.apache.commons.compress.compressors.zstandard; 21 22 import java.io.IOException; 23 import java.io.OutputStream; 24 25 import org.apache.commons.compress.compressors.CompressorOutputStream; 26 import org.apache.commons.io.build.AbstractStreamBuilder; 27 import org.apache.commons.lang3.ArrayUtils; 28 29 import com.github.luben.zstd.ZstdOutputStream; 30 31 /** 32 * {@link CompressorOutputStream} implementation to create Zstandard encoded stream. 33 * <p> 34 * This class avoids making the underlying {@code zstd} classes part of the public or protected API. The underlying implementation is provided through the 35 * <a href="https://github.com/luben/zstd-jni/">Zstandard JNI</a> library which is based on <a href="https://github.com/facebook/zstd/">zstd</a>. 36 * </p> 37 * 38 * @see <a href="https://github.com/luben/zstd-jni/">Zstandard JNI</a> 39 * @see <a href="https://github.com/facebook/zstd/">zstd</a> 40 * @since 1.16 41 */ 42 public class ZstdCompressorOutputStream extends CompressorOutputStream<ZstdOutputStream> { 43 44 // @formatter:off 45 /** 46 * Builds a new {@link ZstdCompressorOutputStream}. 47 * 48 * <p> 49 * For example: 50 * </p> 51 * <pre>{@code 52 * ZstdCompressorOutputStream s = ZstdCompressorOutputStream.builder() 53 * .setPath(path) 54 * .setLevel(3) 55 * .setStrategy(0) 56 * .setWorkers(0) 57 * .get(); 58 * } 59 * </pre> 60 * <p> 61 * This class avoids making the underlying {@code zstd} classes part of the public or protected API. 62 * </p> 63 * @see #get() 64 * @see ZstdConstants 65 * @since 1.28.0 66 */ 67 // @formatter:on 68 public static final class Builder extends AbstractStreamBuilder<ZstdCompressorOutputStream, Builder> { 69 70 private int chainLog; 71 private boolean checksum; 72 private boolean closeFrameOnFlush; 73 private byte[] dict; 74 private int hashLog; 75 private int jobSize; 76 private int level = ZstdConstants.ZSTD_CLEVEL_DEFAULT; 77 private int minMatch; 78 private int overlapLog; 79 private int searchLog; 80 private int strategy; 81 private int targetLength; 82 private int windowLog; 83 private int workers; 84 85 /** 86 * Constructs a new builder of {@link ZstdCompressorOutputStream}. 87 */ 88 public Builder() { 89 // empty 90 } 91 92 @Override 93 public ZstdCompressorOutputStream get() throws IOException { 94 return new ZstdCompressorOutputStream(this); 95 } 96 97 /** 98 * Sets the size of the multi-probe search table, as a power of 2. 99 * <p> 100 * The value {@code 0} means use the default chainLog. 101 * </p> 102 * <p> 103 * The resulting memory usage is (in C) {@code (1 << (chainLog + 2))}. The input must be between {@link ZstdConstants#ZSTD_CHAINLOG_MIN} and 104 * {@link ZstdConstants#ZSTD_CHAINLOG_MAX}. A larger tables result in better and slower compression. This parameter is useless for "fast" strategy but 105 * still useful when using "dfast" strategy, in which case it defines a secondary probe table. 106 * </p> 107 * 108 * @param chainLog the size of the multi-probe search table, as a power of 2. 109 * @return this instance. 110 * @see ZstdConstants#ZSTD_CHAINLOG_MIN 111 * @see ZstdConstants#ZSTD_CHAINLOG_MAX 112 * @see <a href="https://facebook.github.io/zstd/zstd_manual.html#Chapter5">Zstd manual Chapter5</a> 113 * @see <a href="https://github.com/facebook/zstd/blob/dev/lib/zstd.h">zstd.h</a> 114 */ 115 public Builder setChainLog(final int chainLog) { 116 this.chainLog = chainLog; 117 return this; 118 } 119 120 /** 121 * Sets whether a 32-bits checksum of content is written at end of frame (defaults to {@code false}). 122 * <p> 123 * The value {@code false} means no checksum. 124 * </p> 125 * 126 * @param checksum Whether a 32-bits checksum of content is written at end of frame. 127 * @return this instance. 128 * @see <a href="https://facebook.github.io/zstd/zstd_manual.html#Chapter5">Zstd manual Chapter5</a> 129 * @see <a href="https://github.com/facebook/zstd/blob/dev/lib/zstd.h">zstd.h</a> 130 */ 131 public Builder setChecksum(final boolean checksum) { 132 this.checksum = checksum; 133 return this; 134 } 135 136 /** 137 * Sets whether to close the frame on flush. 138 * <p> 139 * This will guarantee that it can be ready fully if the process crashes before closing the stream. The downside is that this negatively affects the 140 * compression ratio. 141 * </p> 142 * <p> 143 * The value {@code false} means don't close on flush. 144 * </p> 145 * 146 * @param closeFrameOnFlush whether to close the frame on flush. 147 * @return this instance. 148 * @see <a href="https://facebook.github.io/zstd/zstd_manual.html#Chapter5">Zstd manual Chapter5</a> 149 * @see <a href="https://github.com/facebook/zstd/blob/dev/lib/zstd.h">zstd.h</a> 150 */ 151 public Builder setCloseFrameOnFlush(final boolean closeFrameOnFlush) { 152 this.closeFrameOnFlush = closeFrameOnFlush; 153 return this; 154 } 155 156 /** 157 * Sets an internal {@code CDict} from the given {@code dict} buffer. 158 * <p> 159 * Decompression will have to use same dictionary. 160 * </p> 161 * <strong>Using a dictionary</strong> 162 * <ul> 163 * <li>Loading a null (or 0-length) dictionary invalidates the previous dictionary, returning to no-dictionary mode.</li> 164 * <li>A dictionary is sticky, it will be used for all future compressed frames. To return to the no-dictionary mode, load a null dictionary.</li> 165 * <li>Loading a dictionary builds tables. This is a CPU consuming operation, with non-negligible impact on latency. Tables are dependent on compression 166 * parameters, and for this reason, compression parameters can no longer be changed after loading a dictionary.</li> 167 * <li>The dictionary content will be copied internally.</li> 168 * </ul> 169 * 170 * @param dict The dictionary buffer. 171 * @return this instance. 172 * @see <a href="https://facebook.github.io/zstd/zstd_manual.html#Chapter12">Zstd manual Chapter12</a> 173 * @see <a href="https://github.com/facebook/zstd/blob/dev/lib/zstd.h">zstd.h</a> 174 */ 175 public Builder setDict(final byte[] dict) { 176 this.dict = dict; 177 return this; 178 } 179 180 /** 181 * Size of the initial probe table, as a power of 2. 182 * <p> 183 * The value {@code 0} means "use default hashLog". 184 * </p> 185 * <p> 186 * The resulting memory usage is (in C) {@code (1 << (hashLog + 2))}. This value must be between {@link ZstdConstants#ZSTD_HASHLOG_MIN} and 187 * {@link ZstdConstants#ZSTD_HASHLOG_MAX}. Using a larger table improves the compression ratio of strategies <= dFast, and improves speed of 188 * strategies > dFast. 189 * </p> 190 * 191 * @param hashLog Size of the initial probe table, as a power of 2. 192 * @return this instance. 193 * @see ZstdConstants#ZSTD_HASHLOG_MIN 194 * @see ZstdConstants#ZSTD_HASHLOG_MAX 195 * @see <a href="https://facebook.github.io/zstd/zstd_manual.html#Chapter5">Zstd manual Chapter5</a> 196 * @see <a href="https://github.com/facebook/zstd/blob/dev/lib/zstd.h">zstd.h</a> 197 */ 198 public Builder setHashLog(final int hashLog) { 199 this.hashLog = hashLog; 200 return this; 201 } 202 203 /** 204 * Size of a compression job. 205 * <p> 206 * This value is enforced only when {@code workers >= 1}. Each compression job is completed in parallel, so this value can indirectly impact the number 207 * of active threads. A value of 0 uses a default behavior, which is dynamically determined based on compression parameters. Job size must be a minimum 208 * of overlap size, or <a href="https://github.com/facebook/zstd/blob/dev/lib/compress/zstdmt_compress.h">ZSTDMT_JOBSIZE_MIN (= 512 KB)</a>, whichever 209 * is largest. The minimum size is automatically and transparently enforced. 210 * </p> 211 * <p> 212 * This is a multi-threading parameters and is only active if multi-threading is enabled ( if the underlying native library is compiled with the build 213 * macro {@code ZSTD_MULTITHREAD}). 214 * </p> 215 * 216 * @param jobSize Size of a compression job. 217 * @return this instance. 218 * @see <a href="https://facebook.github.io/zstd/zstd_manual.html#Chapter5">Zstd manual Chapter5</a> 219 * @see <a href="https://github.com/facebook/zstd/blob/dev/lib/compress/zstdmt_compress.h">zstdmt_compress.h</a> 220 */ 221 public Builder setJobSize(final int jobSize) { 222 this.jobSize = jobSize; 223 return this; 224 } 225 226 /** 227 * Sets compression parameters according to a pre-defined {@code cLevel} table, from 0 to 9. 228 * <p> 229 * The exact compression parameters are dynamically determined, depending on both compression level and srcSize (when known). The default level is 230 * {@link ZstdConstants#ZSTD_CLEVEL_DEFAULT}. The special value 0 means default, which is controlled by {@link ZstdConstants#ZSTD_CLEVEL_DEFAULT}. 231 * </p> 232 * <ul> 233 * <li>The value 0 means use the default, which is controlled by {@link ZstdConstants#ZSTD_CLEVEL_DEFAULT}</li> 234 * <li>You may pass a negative compression level.</li> 235 * <li>Setting a level does not automatically set all other compression parameters to defaults. Setting this value will eventually dynamically impact 236 * the compression parameters which have not been manually set. The manually set values are used.</li> 237 * </ul> 238 * 239 * @param level The compression level, from 0 to 9, where the default is {@link ZstdConstants#ZSTD_CLEVEL_DEFAULT}. 240 * @return this instance 241 * @see ZstdConstants#ZSTD_CLEVEL_DEFAULT 242 * @see ZstdConstants#ZSTD_CLEVEL_MIN 243 * @see ZstdConstants#ZSTD_CLEVEL_MAX 244 * @see <a href="https://facebook.github.io/zstd/zstd_manual.html#Chapter5">Zstd manual Chapter5</a> 245 * @see <a href="https://github.com/facebook/zstd/blob/dev/lib/zstd.h">zstd.h</a> 246 */ 247 public Builder setLevel(final int level) { 248 this.level = level; 249 return this; 250 } 251 252 /** 253 * Sets minimum match size for long distance matcher. 254 * <p> 255 * Zstd can still find matches of smaller size, by updating its search algorithm to look for this size and larger. Using larger values increase 256 * compression and decompression speed, but decrease the ratio. The value must be between {@link ZstdConstants#ZSTD_MINMATCH_MIN} and 257 * {@link ZstdConstants#ZSTD_MINMATCH_MAX}. Note that currently, for all strategies < {@code btopt}, effective minimum is 4. , for all strategies 258 * > {@code fast}, effective maximum is {@code 6}. 259 * </p> 260 * <p> 261 * The value {@code 0} means use the default minMatchLength. 262 * </p> 263 * 264 * @param minMatch minimum match size for long distance matcher. 265 * @return this instance. 266 * @see <a href="https://facebook.github.io/zstd/zstd_manual.html#Chapter5">Zstd manual Chapter5</a> 267 * @see <a href="https://github.com/facebook/zstd/blob/dev/lib/zstd.h">zstd.h</a> 268 */ 269 public Builder setMinMatch(final int minMatch) { 270 this.minMatch = minMatch; 271 return this; 272 } 273 274 /** 275 * Sets the overlap size, as a fraction of window size. 276 * <p> 277 * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. It helps preserve compression ratio, while each job 278 * is compressed in parallel. This value is enforced only when workers >= 1. Larger values increase compression ratio, but decrease speed. Possible 279 * values range from 0 to 9: 280 * </p> 281 * <ul> 282 * <li>0 means "default" : value will be determined by the library, depending on strategy</li> 283 * <li>1 means "no overlap"</li> 284 * <li>9 means "full overlap", using a full window size.</li> 285 * </ul> 286 * <p> 287 * Each intermediate rank increases/decreases the load size by a factor 2: 288 * </p> 289 * <ul> 290 * <li>9: full window</li> 291 * <li>8: w / 2</li> 292 * <li>7: w / 4</li> 293 * <li>6: w / 8</li> 294 * <li>5: w / 16</li> 295 * <li>4: w / 32</li> 296 * <li>3: w / 64</li> 297 * <li>2: w / 128</li> 298 * <li>1: no overlap</li> 299 * <li>0: default 300 * </ul> 301 * <p> 302 * The default value varies between 6 and 9, depending on the strategy. 303 * </p> 304 * <p> 305 * This is a multi-threading parameters and is only active if multi-threading is enabled ( if the underlying native library is compiled with the build 306 * macro {@code ZSTD_MULTITHREAD}). 307 * </p> 308 * 309 * @param overlapLog the overlap size, as a fraction of window size. 310 * @return this instance. 311 * @see <a href="https://facebook.github.io/zstd/zstd_manual.html#Chapter5">Zstd manual Chapter5</a> 312 * @see <a href="https://github.com/facebook/zstd/blob/dev/lib/zstd.h">zstd.h</a> 313 */ 314 public Builder setOverlapLog(final int overlapLog) { 315 this.overlapLog = overlapLog; 316 return this; 317 } 318 319 /** 320 * Sets number of search attempts, as a power of 2. 321 * <p> 322 * More attempts result in better and slower compression. This parameter is useless for "fast" and "dFast" strategies. 323 * </p> 324 * <p> 325 * The value {@code 0} means use the default searchLog. 326 * </p> 327 * 328 * @param searchLog number of search attempts, as a power of 2. 329 * @return this instance. 330 * @see ZstdConstants#ZSTD_SEARCHLOG_MIN 331 * @see ZstdConstants#ZSTD_SEARCHLOG_MAX 332 * @see <a href="https://facebook.github.io/zstd/zstd_manual.html#Chapter5">Zstd manual Chapter5</a> 333 * @see <a href="https://github.com/facebook/zstd/blob/dev/lib/zstd.h">zstd.h</a> 334 */ 335 public Builder setSearchLog(final int searchLog) { 336 this.searchLog = searchLog; 337 return this; 338 } 339 340 /** 341 * Sets the {@code ZSTD_strategy} from the C enum definition. 342 * <p> 343 * The higher the value of selected strategy, the more complex it is, resulting in stronger and slower compression. 344 * </p> 345 * <p> 346 * The value {@code 0} means use the default strategy. 347 * </p> 348 * <ul> 349 * <li>{@code ZSTD_fast = 1}</li> 350 * <li>{@code ZSTD_dfast = 2}</li> 351 * <li>{@code ZSTD_greedy = 3}</li> 352 * <li>{@code ZSTD_lazy = 4}</li> 353 * <li>{@code ZSTD_lazy2 = 5}</li> 354 * <li>{@code ZSTD_btlazy2 = 6}</li> 355 * <li>{@code ZSTD_btopt = 7}</li> 356 * <li>{@code ZSTD_btultra = 8}</li> 357 * <li>{@code ZSTD_btultra2 = 9}</li> 358 * </ul> 359 * 360 * @param strategy the {@code ZSTD_strategy} from the C enum definition. 361 * @return this instance. 362 * @see <a href="https://facebook.github.io/zstd/zstd_manual.html#Chapter5">Zstd manual Chapter5</a> 363 * @see <a href="https://github.com/facebook/zstd/blob/dev/lib/zstd.h">zstd.h</a> 364 */ 365 public Builder setStrategy(final int strategy) { 366 this.strategy = strategy; 367 return this; 368 } 369 370 /** 371 * Sets a value that depends on the strategy, see {@code ZSTD_c_targetLength}. 372 * <p> 373 * For strategies {@code btopt}, {@code btultra} and {@code btultra2}: 374 * </p> 375 * <ul> 376 * <li>Length of Match considered "good enough" to stop search.</li> 377 * <li>Larger values make compression stronger, and slower.</li> 378 * </ul> 379 * <p> 380 * For strategy {@code fast}: 381 * </p> 382 * <ul> 383 * <li>Distance between match sampling.</li> 384 * <li>Larger values make compression faster, and weaker.</li> 385 * </ul> 386 * <p> 387 * The value {@code 0} means use the default targetLength. 388 * </p> 389 * 390 * @param targetLength a value that depends on the strategy, see {@code ZSTD_c_targetLength}. 391 * @return this instance. 392 * @see <a href="https://facebook.github.io/zstd/zstd_manual.html#Chapter5">Zstd manual Chapter5</a> 393 * @see <a href="https://github.com/facebook/zstd/blob/dev/lib/zstd.h">zstd.h</a> 394 */ 395 public Builder setTargetLength(final int targetLength) { 396 this.targetLength = targetLength; 397 return this; 398 } 399 400 /** 401 * Sets maximum allowed back-reference distance, expressed as power of 2. 402 * <p> 403 * This will set a memory budget for streaming decompression, with larger values requiring more memory and typically compressing more. This value be 404 * between {@link ZstdConstants#ZSTD_WINDOWLOG_MIN} and {@link ZstdConstants#ZSTD_WINDOWLOG_MAX}. 405 * </p> 406 * <p> 407 * <strong>Note</strong>: Using a windowLog greater than {@link ZstdConstants#ZSTD_WINDOWLOG_LIMIT_DEFAULT} requires explicitly allowing such size at 408 * streaming decompression stage. 409 * </p> 410 * <p> 411 * The value {@code 0} means use the default windowLog. 412 * </p> 413 * 414 * @param windowLog maximum allowed back-reference distance, expressed as power of 2. 415 * @return this instance. 416 * @see ZstdConstants#ZSTD_WINDOWLOG_MIN 417 * @see ZstdConstants#ZSTD_WINDOWLOG_MAX 418 * @see <a href="https://facebook.github.io/zstd/zstd_manual.html#Chapter5">Zstd manual Chapter5</a> 419 * @see <a href="https://github.com/facebook/zstd/blob/dev/lib/zstd.h">zstd.h</a> 420 */ 421 public Builder setWindowLog(final int windowLog) { 422 this.windowLog = windowLog; 423 return this; 424 } 425 426 /** 427 * Sets how many threads will be spawned to compress in parallel. 428 * <p> 429 * When workers >= 1, this triggers asynchronous mode when compressing which consumes input and flushes output if possible, but immediately gives 430 * back control to the caller, while compression is performed in parallel, within worker threads. More workers improve speed, but also increase memory 431 * usage. Compression is performed from the calling thread, and all invocations are blocking. 432 * </p> 433 * <p> 434 * The value {@code 0} means "single-threaded mode", nothing is spawned. 435 * </p> 436 * <p> 437 * This is a multi-threading parameters and is only active if multi-threading is enabled ( if the underlying native library is compiled with the build 438 * macro {@code ZSTD_MULTITHREAD}). 439 * </p> 440 * 441 * @param workers How many threads will be spawned to compress in parallel. 442 * @return this instance. 443 * @see <a href="https://facebook.github.io/zstd/zstd_manual.html#Chapter5">Zstd manual Chapter5</a> 444 * @see <a href="https://github.com/facebook/zstd/blob/dev/lib/zstd.h">zstd.h</a> 445 */ 446 public Builder setWorkers(final int workers) { 447 this.workers = workers; 448 return this; 449 } 450 } 451 452 /** 453 * Constructs a new builder of {@link ZstdCompressorOutputStream}. 454 * 455 * @return a new builder of {@link ZstdCompressorOutputStream}. 456 * @since 1.28.0 457 */ 458 public static Builder builder() { 459 return new Builder(); 460 } 461 462 @SuppressWarnings("resource") // Caller closes 463 private static ZstdOutputStream toZstdOutputStream(final Builder builder) throws IOException { 464 final OutputStream outputStream = builder.getOutputStream(); 465 if (outputStream instanceof ZstdOutputStream) { 466 // Builder properties are not applied when a ZstdOutputStream is provided. 467 return (ZstdOutputStream) outputStream; 468 } 469 // @formatter:off 470 return new ZstdOutputStream(outputStream) 471 .setChainLog(builder.chainLog) 472 .setChecksum(builder.checksum) 473 .setCloseFrameOnFlush(builder.closeFrameOnFlush) 474 .setDict(builder.dict != null ? builder.dict : ArrayUtils.EMPTY_BYTE_ARRAY) 475 .setHashLog(builder.hashLog) 476 .setJobSize(builder.jobSize) 477 .setLevel(builder.level) 478 .setMinMatch(builder.minMatch) 479 .setOverlapLog(builder.overlapLog) 480 .setSearchLog(builder.searchLog) 481 .setStrategy(builder.strategy) 482 .setTargetLength(builder.targetLength) 483 .setWindowLog(builder.windowLog) 484 .setWorkers(builder.workers); 485 // @formatter:on 486 } 487 488 @SuppressWarnings("resource") // Caller closes 489 private ZstdCompressorOutputStream(final Builder builder) throws IOException { 490 super(toZstdOutputStream(builder)); 491 } 492 493 /** 494 * Constructs a new instance using default Zstd parameter values. 495 * 496 * @param outStream the output stream. 497 * @throws IOException if an I/O error occurs. 498 */ 499 public ZstdCompressorOutputStream(final OutputStream outStream) throws IOException { 500 this(builder().setOutputStream(outStream)); 501 } 502 503 /** 504 * Constructs a new instance using default Zstd parameter values plus a compression level. 505 * 506 * @param outStream the output stream. 507 * @param level The compression level, from 0 to 9, where the default is {@link ZstdConstants#ZSTD_CLEVEL_DEFAULT}. 508 * @throws IOException if an I/O error occurs. 509 * @since 1.18 510 * @deprecated Use {@link #builder()}. 511 */ 512 @Deprecated 513 public ZstdCompressorOutputStream(final OutputStream outStream, final int level) throws IOException { 514 this(builder().setOutputStream(outStream).setLevel(level)); 515 } 516 517 /** 518 * Constructs a new instance using default Zstd parameter values plus a compression level and checksum setting. 519 * 520 * @param outStream the output stream. 521 * @param level The compression level, from 0 to 9, where the default is {@link ZstdConstants#ZSTD_CLEVEL_DEFAULT}. 522 * @param closeFrameOnFlush whether to close the frame on flush. 523 * @throws IOException if an I/O error occurs. 524 * @since 1.18 525 * @deprecated Use {@link #builder()}. 526 */ 527 @Deprecated 528 public ZstdCompressorOutputStream(final OutputStream outStream, final int level, final boolean closeFrameOnFlush) throws IOException { 529 this(builder().setOutputStream(outStream).setLevel(level).setCloseFrameOnFlush(closeFrameOnFlush)); 530 } 531 532 /** 533 * Constructs a new instance using default Zstd parameter values plus a compression level, closeFrameOnFlush and checksum settings. 534 * 535 * @param outStream the output stream. 536 * @param level The compression level, from 0 to 9, where the default is {@link ZstdConstants#ZSTD_CLEVEL_DEFAULT}. 537 * @param closeFrameOnFlush whether to close the frame on flush. 538 * @param checksum Whether a 32-bits checksum of content is written at end of frame. 539 * @throws IOException if an I/O error occurs. 540 * @since 1.18 541 * @deprecated Use {@link #builder()}. 542 */ 543 @Deprecated 544 public ZstdCompressorOutputStream(final OutputStream outStream, final int level, final boolean closeFrameOnFlush, final boolean checksum) 545 throws IOException { 546 this(builder().setOutputStream(outStream).setLevel(level).setCloseFrameOnFlush(closeFrameOnFlush).setChecksum(checksum)); 547 } 548 549 @Override 550 public void write(final byte[] buf, final int off, final int len) throws IOException { 551 out.write(buf, off, len); 552 } 553 }