1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * https://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20 package org.apache.commons.compress.compressors.gzip;
21
22 import java.io.OutputStream;
23 import java.nio.charset.Charset;
24 import java.time.Instant;
25 import java.util.Objects;
26 import java.util.zip.Deflater;
27
28 import org.apache.commons.io.Charsets;
29 import org.apache.commons.lang3.ArrayUtils;
30 import org.apache.commons.lang3.StringUtils;
31
32 /**
33 * Parameters for the GZIP compressor.
34 *
35 * @see GzipCompressorInputStream
36 * @see GzipCompressorOutputStream
37 * @see <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952 GZIP File Format Specification</a>
38 * @since 1.7
39 */
40 public class GzipParameters {
41
42 /**
43 * Enumerates OS types.
44 * <ul>
45 * <li>0 - FAT filesystem (MS-DOS, OS/2, NT/Win32)</li>
46 * <li>1 - Amiga</li>
47 * <li>2 - VMS (or OpenVMS)</li>
48 * <li>3 - Unix</li>
49 * <li>4 - VM/CMS</li>
50 * <li>5 - Atari TOS</li>
51 * <li>6 - HPFS filesystem (OS/2, NT)</li>
52 * <li>7 - Macintosh</li>
53 * <li>8 - Z-System</li>
54 * <li>9 - CP/M</li>
55 * <li>10 - TOPS-20</li>
56 * <li>11 - NTFS filesystem (NT)</li>
57 * <li>12 - QDOS</li>
58 * <li>13 - Acorn RISCOS</li>
59 * <li>255 - unknown</li>
60 * </ul>
61 *
62 * @see <a href="https://datatracker.ietf.org/doc/html/rfc1952#page-7">RFC 1952: GZIP File Format Specification - OS (Operating System)</a>
63 * @since 1.28.0
64 */
65 public enum OS {
66
67 /**
68 * 13: Acorn RISCOS.
69 */
70 ACORN_RISCOS(OS_ACORN_RISCOS),
71
72 /**
73 * 1: Amiga.
74 */
75 AMIGA(OS_AMIGA),
76
77 /**
78 * 5: Atari TOS.
79 */
80 ATARI_TOS(OS_ATARI_TOS),
81
82 /**
83 * 9: CP/M.
84 */
85 CPM(OS_CPM),
86
87 // @formatter:off
88 /**
89 * 0: FAT filesystem (MS-DOS, OS/2, NT/Win32).
90 */
91 FAT(OS_FAT),
92
93 /**
94 * 6: HPFS filesystem (OS/2, NT).
95 */
96 HPFS(OS_HPFS),
97
98 /**
99 * 7: Macintosh.
100 */
101 MACINTOSH(OS_MACINTOSH),
102
103 /**
104 * 11: NTFS filesystem (NT).
105 */
106 NTFS(OS_NTFS),
107
108 /**
109 * 12: QDOS.
110 */
111 QDOS(OS_QDOS),
112
113 /**
114 * 10: TOPS-20.
115 */
116 TOPS_20(OS_TOPS_20),
117
118 /**
119 * 3: Unix.
120 */
121 UNIX(OS_UNIX),
122
123 /**
124 * 255: unknown.
125 */
126 UNKNOWN(OS_UNKNOWN),
127
128 /**
129 * 4: VM/CMS.
130 */
131 VM_CMS(OS_VM_CMS),
132
133 /**
134 * 2: VMS (or OpenVMS).
135 */
136 VMS(OS_VMS),
137
138 /**
139 * 8: Z-System.
140 */
141 Z_SYSTEM(OS_Z_SYSTEM);
142 // @formatter:on
143
144 /**
145 * Gets the {@link OS} matching the given code.
146 *
147 * @param code an OS or {@link #UNKNOWN} for no match.
148 * @return a {@link OS}.
149 */
150 public static OS from(final int code) {
151 switch (code) {
152 case OS_ACORN_RISCOS:
153 return ACORN_RISCOS;
154 case OS_AMIGA:
155 return AMIGA;
156 case OS_ATARI_TOS:
157 return ATARI_TOS;
158 case OS_CPM:
159 return CPM;
160 case OS_FAT:
161 return FAT;
162 case OS_HPFS:
163 return HPFS;
164 case OS_MACINTOSH:
165 return MACINTOSH;
166 case OS_NTFS:
167 return NTFS;
168 case OS_QDOS:
169 return QDOS;
170 case OS_TOPS_20:
171 return TOPS_20;
172 case OS_UNIX:
173 return UNIX;
174 case OS_UNKNOWN:
175 return UNKNOWN;
176 case OS_VM_CMS:
177 return VM_CMS;
178 case OS_VMS:
179 return VMS;
180 case OS_Z_SYSTEM:
181 return Z_SYSTEM;
182 default:
183 return UNKNOWN;
184 }
185 }
186
187 private final int type;
188
189 /**
190 * Constructs a new instance.
191 *
192 * @param type the OS type.
193 */
194 OS(final int type) {
195 this.type = type;
196 }
197
198 /**
199 * Gets the OS type.
200 *
201 * @return the OS type.
202 */
203 public int type() {
204 return type;
205 }
206
207 }
208
209 private static final int BUFFER_SIZE = 512;
210
211 /**
212 * 13: Acorn RISCOS.
213 */
214 private static final int OS_ACORN_RISCOS = 13;
215
216 /**
217 * 1: Amiga.
218 */
219 private static final int OS_AMIGA = 1;
220
221 /**
222 * 5: Atari TOS.
223 */
224 private static final int OS_ATARI_TOS = 5;
225
226 /**
227 * 9: CP/M.
228 */
229 private static final int OS_CPM = 9;
230
231 /**
232 * 0: FAT.
233 */
234 private static final int OS_FAT = 0;
235
236 /**
237 * 6: HPFS filesystem (OS/2, NT).
238 */
239 private static final int OS_HPFS = 6;
240
241 /**
242 * 7: Macintosh.
243 */
244 private static final int OS_MACINTOSH = 7;
245
246 /**
247 * 11: NTFS filesystem (NT).
248 */
249 private static final int OS_NTFS = 11;
250
251 /**
252 * 12: QDOS.
253 */
254 private static final int OS_QDOS = 12;
255
256 /**
257 * 10: TOPS-20.
258 */
259 private static final int OS_TOPS_20 = 10;
260
261 /**
262 * 3: Unix.
263 */
264 private static final int OS_UNIX = 3;
265
266 /**
267 * 255: unknown.
268 */
269 private static final int OS_UNKNOWN = 255;
270
271 /**
272 * 4: VM/CMS.
273 */
274 private static final int OS_VM_CMS = 4;
275
276 /**
277 * 2: VMS (or OpenVMS).
278 */
279 private static final int OS_VMS = 2;
280
281 /**
282 * 8: Z-System.
283 */
284 private static final int OS_Z_SYSTEM = 8;
285
286 private int bufferSize = BUFFER_SIZE;
287
288 private String comment;
289 private int compressionLevel = Deflater.DEFAULT_COMPRESSION;
290 private int deflateStrategy = Deflater.DEFAULT_STRATEGY;
291 private ExtraField extraField;
292 private String fileName;
293 private Charset fileNameCharset = GzipUtils.GZIP_ENCODING;
294 private boolean headerCrc;
295 /**
296 * The most recent modification time (MTIME) of the original file being compressed.
297 * <p>
298 * The time is in Unix format, for example, seconds since 00:00:00 GMT, Jan. 1, 1970. (Note that this may cause problems for MS-DOS and other systems that
299 * use local rather than Universal time.) If the compressed data did not come from a file, MTIME is set to the time at which compression started. MTIME = 0
300 * means no time stamp is available.
301 * </p>
302 */
303 private Instant modificationInstant = Instant.EPOCH;
304 private OS operatingSystem = OS.UNKNOWN; // Unknown OS by default
305 private long trailerCrc;
306 private long trailerISize;
307
308 @Override
309 public boolean equals(final Object obj) {
310 if (this == obj) {
311 return true;
312 }
313 if (!(obj instanceof GzipParameters)) {
314 return false;
315 }
316 final GzipParameters other = (GzipParameters) obj;
317 return bufferSize == other.bufferSize && Objects.equals(comment, other.comment) && compressionLevel == other.compressionLevel
318 && deflateStrategy == other.deflateStrategy && Objects.equals(extraField, other.extraField) && Objects.equals(fileName, other.fileName)
319 && Objects.equals(fileNameCharset, other.fileNameCharset) && headerCrc == other.headerCrc
320 && Objects.equals(modificationInstant, other.modificationInstant) && operatingSystem == other.operatingSystem && trailerCrc == other.trailerCrc
321 && trailerISize == other.trailerISize;
322 }
323
324 /**
325 * Gets size of the buffer used to retrieve compressed data.
326 *
327 * @return The size of the buffer used to retrieve compressed data.
328 * @see #setBufferSize(int)
329 * @since 1.21
330 */
331 public int getBufferSize() {
332 return this.bufferSize;
333 }
334
335 /**
336 * Gets an arbitrary user-defined comment.
337 *
338 * @return a user-defined comment.
339 */
340 public String getComment() {
341 return comment;
342 }
343
344 /**
345 * Gets the compression level.
346 *
347 * @return the compression level.
348 * @see Deflater#NO_COMPRESSION
349 * @see Deflater#BEST_SPEED
350 * @see Deflater#DEFAULT_COMPRESSION
351 * @see Deflater#BEST_COMPRESSION
352 */
353 public int getCompressionLevel() {
354 return compressionLevel;
355 }
356
357 /**
358 * Gets the deflater strategy.
359 *
360 * @return the deflater strategy, {@link Deflater#DEFAULT_STRATEGY} by default.
361 * @see #setDeflateStrategy(int)
362 * @see Deflater#setStrategy(int)
363 * @since 1.23
364 */
365 public int getDeflateStrategy() {
366 return deflateStrategy;
367 }
368
369 /**
370 * Gets the Extra subfields from the header.
371 *
372 * @return the extra subfields from the header.
373 * @since 1.28.0
374 */
375 public ExtraField getExtraField() {
376 return extraField;
377 }
378
379 /**
380 * Gets the file name.
381 *
382 * @return the file name.
383 * @deprecated Use {@link #getFileName()}.
384 */
385 @Deprecated
386 public String getFilename() {
387 return fileName;
388 }
389
390 /**
391 * Gets the file name.
392 *
393 * @return the file name.
394 * @since 1.25.0
395 */
396 public String getFileName() {
397 return fileName;
398 }
399
400 /**
401 * Gets the Charset to use for writing file names and comments.
402 * <p>
403 * The default value is {@link GzipUtils#GZIP_ENCODING}.
404 * </p>
405 *
406 * @return the Charset to use for writing file names and comments.
407 * @since 1.28.0
408 */
409 public Charset getFileNameCharset() {
410 return fileNameCharset;
411 }
412
413 /**
414 * Returns if the header CRC is to be added (when writing) or was present (when reading).
415 *
416 * @return true is header CRC will be added (on write) or was found (after read).
417 * @since 1.28.0
418 */
419 public boolean getHeaderCRC() {
420 return headerCrc;
421 }
422
423 /**
424 * Gets the most recent modification time (MTIME) of the original file being compressed.
425 *
426 * @return the most recent modification time.
427 * @since 1.28.0
428 */
429 public Instant getModificationInstant() {
430 return modificationInstant;
431 }
432
433 /**
434 * Gets the most recent modification time (MTIME) of the original file being compressed, in seconds since 00:00:00 GMT, Jan. 1, 1970.
435 * <p>
436 * The time is in Unix format, for example, seconds since 00:00:00 GMT, Jan. 1, 1970. (Note that this may cause problems for MS-DOS and other systems that
437 * use local rather than Universal time.) If the compressed data did not come from a file, MTIME is set to the time at which compression started. MTIME = 0
438 * means no time stamp is available.
439 * </p>
440 *
441 * @return the most recent modification time in seconds since 00:00:00 GMT, Jan. 1, 1970.
442 */
443 public long getModificationTime() {
444 return modificationInstant.getEpochSecond();
445 }
446
447 /**
448 * Gets the OS code type.
449 *
450 * @return the OS code type.
451 */
452 public int getOperatingSystem() {
453 return operatingSystem.type;
454 }
455
456 /**
457 * Gets the OS type.
458 *
459 * @return the OS type.
460 * @since 1.28.0
461 */
462 public OS getOS() {
463 return operatingSystem;
464 }
465
466 /**
467 * Gets the trailer CRC value.
468 *
469 * @return the trailer CRC value.
470 * @since 1.28.0
471 */
472 public long getTrailerCrc() {
473 return trailerCrc;
474 }
475
476 /**
477 * Gets the trailer ISIZE value.
478 *
479 * @return the trailer ISIZE value.
480 * @since 1.28.0
481 */
482 public long getTrailerISize() {
483 return trailerISize;
484 }
485
486 @Override
487 public int hashCode() {
488 return Objects.hash(bufferSize, comment, compressionLevel, deflateStrategy, extraField, fileName, fileNameCharset, headerCrc, modificationInstant,
489 operatingSystem, trailerCrc, trailerISize);
490 }
491
492 private String requireNonNulByte(final String text) {
493 if (StringUtils.isNotEmpty(text) && ArrayUtils.contains(text.getBytes(fileNameCharset), (byte) 0)) {
494 throw new IllegalArgumentException("String encoded in Charset '" + fileNameCharset + "' contains the nul byte 0 which is not supported in gzip.");
495 }
496 return text;
497 }
498
499 /**
500 * Sets size of the buffer used to retrieve compressed data from {@link Deflater} and write to underlying {@link OutputStream}.
501 *
502 * @param bufferSize the bufferSize to set. Must be a positive type.
503 * @since 1.21
504 */
505 public void setBufferSize(final int bufferSize) {
506 if (bufferSize <= 0) {
507 throw new IllegalArgumentException("invalid buffer size: " + bufferSize);
508 }
509 this.bufferSize = bufferSize;
510 }
511
512 /**
513 * Sets an arbitrary user-defined comment.
514 *
515 * @param comment a user-defined comment.
516 * @throws IllegalArgumentException if the encoded bytes would contain a nul byte '\0' reserved for gzip field termination.
517 */
518 public void setComment(final String comment) {
519 this.comment = requireNonNulByte(comment);
520 }
521
522 /**
523 * Sets the compression level.
524 *
525 * @param compressionLevel the compression level (between 0 and 9)
526 * @see Deflater#NO_COMPRESSION
527 * @see Deflater#BEST_SPEED
528 * @see Deflater#DEFAULT_COMPRESSION
529 * @see Deflater#BEST_COMPRESSION
530 */
531 public void setCompressionLevel(final int compressionLevel) {
532 if (compressionLevel < -1 || compressionLevel > 9) {
533 throw new IllegalArgumentException("Invalid gzip compression level: " + compressionLevel);
534 }
535 this.compressionLevel = compressionLevel;
536 }
537
538 /**
539 * Sets the deflater strategy.
540 *
541 * @param deflateStrategy the new compression strategy
542 * @see Deflater#setStrategy(int)
543 * @since 1.23
544 */
545 public void setDeflateStrategy(final int deflateStrategy) {
546 this.deflateStrategy = deflateStrategy;
547 }
548
549 /**
550 * Sets the extra subfields. Note that a non-null extra will appear in the gzip header regardless of the presence of subfields, while a null extra will not
551 * appear at all.
552 *
553 * @param extra the series of extra sub fields.
554 * @since 1.28.0
555 */
556 public void setExtraField(final ExtraField extra) {
557 this.extraField = extra;
558 }
559
560 /**
561 * Sets the name of the compressed file.
562 *
563 * @param fileName the name of the file without the directory path
564 * @throws IllegalArgumentException if the encoded bytes would contain a nul byte '\0' reserved for gzip field termination.
565 * @deprecated Use {@link #setFileName(String)}.
566 */
567 @Deprecated
568 public void setFilename(final String fileName) {
569 setFileName(fileName);
570 }
571
572 /**
573 * Sets the name of the compressed file.
574 *
575 * @param fileName the name of the file without the directory path
576 * @throws IllegalArgumentException if the encoded bytes would contain a nul byte '\0' reserved for gzip field termination.
577 */
578 public void setFileName(final String fileName) {
579 this.fileName = requireNonNulByte(fileName);
580 }
581
582 /**
583 * Sets the Charset to use for writing file names and comments, where null maps to {@link GzipUtils#GZIP_ENCODING}.
584 * <p>
585 * <em>Setting a value other than {@link GzipUtils#GZIP_ENCODING} is not compliant with the <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952
586 * GZIP File Format Specification</a></em>. Use at your own risk of interoperability issues.
587 * </p>
588 * <p>
589 * The default value is {@link GzipUtils#GZIP_ENCODING}.
590 * </p>
591 *
592 * @param charset the Charset to use for writing file names and comments, null maps to {@link GzipUtils#GZIP_ENCODING}.
593 * @since 1.28.0
594 */
595 public void setFileNameCharset(final Charset charset) {
596 this.fileNameCharset = Charsets.toCharset(charset, GzipUtils.GZIP_ENCODING);
597 }
598
599 /**
600 * Establishes the presence of the header flag FLG.FHCRC and its headers CRC16 value.
601 *
602 * @param headerCRC when true, the header CRC16 (actually low 16 buts of a CRC32) is calculated and inserted
603 * in the gzip header on write; on read it means the field was present.
604 * @since 1.28.0
605 */
606 public void setHeaderCRC(final boolean headerCRC) {
607 this.headerCrc = headerCRC;
608 }
609
610 /**
611 * Sets the modification time (MTIME) of the compressed file.
612 *
613 * @param modificationTime the modification time, in milliseconds
614 * @since 1.28.0
615 */
616 public void setModificationInstant(final Instant modificationTime) {
617 this.modificationInstant = modificationTime != null ? modificationTime : Instant.EPOCH;
618 }
619
620 /**
621 * Sets the modification time (MTIME) of the compressed file, in seconds since 00:00:00 GMT, Jan. 1, 1970.
622 * <p>
623 * The time is in Unix format, for example, seconds since 00:00:00 GMT, Jan. 1, 1970. (Note that this may cause problems for MS-DOS and other systems that
624 * use local rather than Universal time.) If the compressed data did not come from a file, MTIME is set to the time at which compression started. MTIME = 0
625 * means no time stamp is available.
626 * </p>
627 *
628 * @param modificationTimeSeconds the modification time, in seconds.
629 */
630 public void setModificationTime(final long modificationTimeSeconds) {
631 this.modificationInstant = Instant.ofEpochSecond(modificationTimeSeconds);
632 }
633
634 /**
635 * Sets the operating system on which the compression took place. The defined values are:
636 * <ul>
637 * <li>0: FAT file system (MS-DOS, OS/2, NT/Win32)</li>
638 * <li>1: Amiga</li>
639 * <li>2: VMS (or OpenVMS)</li>
640 * <li>3: Unix</li>
641 * <li>4: VM/CMS</li>
642 * <li>5: Atari TOS</li>
643 * <li>6: HPFS file system (OS/2, NT)</li>
644 * <li>7: Macintosh</li>
645 * <li>8: Z-System</li>
646 * <li>9: CP/M</li>
647 * <li>10: TOPS-20</li>
648 * <li>11: NTFS file system (NT)</li>
649 * <li>12: QDOS</li>
650 * <li>13: Acorn RISCOS</li>
651 * <li>255: Unknown</li>
652 * </ul>
653 *
654 * @param operatingSystem the code of the operating system
655 */
656 public void setOperatingSystem(final int operatingSystem) {
657 this.operatingSystem = OS.from(operatingSystem);
658 }
659
660 /**
661 * Sets the operating system on which the compression took place.
662 *
663 * @param os operating system, null maps to {@link OS#UNKNOWN}.
664 * @since 1.28.0
665 */
666 public void setOS(final OS os) {
667 this.operatingSystem = os != null ? os : OS.UNKNOWN;
668 }
669
670 void setTrailerCrc(final long trailerCrc) {
671 this.trailerCrc = trailerCrc;
672 }
673
674 void setTrailerISize(final long trailerISize) {
675 this.trailerISize = trailerISize;
676 }
677
678 @Override
679 public String toString() {
680 final StringBuilder builder = new StringBuilder();
681 builder.append("GzipParameters [bufferSize=").append(bufferSize).append(", comment=").append(comment).append(", compressionLevel=")
682 .append(compressionLevel).append(", deflateStrategy=").append(deflateStrategy).append(", extraField=").append(extraField).append(", fileName=")
683 .append(fileName).append(", fileNameCharset=").append(fileNameCharset).append(", headerCrc=").append(headerCrc).append(", modificationInstant=")
684 .append(modificationInstant).append(", operatingSystem=").append(operatingSystem).append(", trailerCrc=").append(trailerCrc)
685 .append(", trailerISize=").append(trailerISize).append("]");
686 return builder.toString();
687 }
688 }