1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.commons.compress.compressors.gzip;
20
21 import java.io.BufferedInputStream;
22 import java.io.ByteArrayOutputStream;
23 import java.io.DataInput;
24 import java.io.DataInputStream;
25 import java.io.EOFException;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.nio.charset.Charset;
29 import java.util.zip.CRC32;
30 import java.util.zip.DataFormatException;
31 import java.util.zip.Deflater;
32 import java.util.zip.Inflater;
33
34 import org.apache.commons.compress.compressors.CompressorInputStream;
35 import org.apache.commons.compress.utils.ByteUtils;
36 import org.apache.commons.compress.utils.InputStreamStatistics;
37 import org.apache.commons.io.IOUtils;
38 import org.apache.commons.io.build.AbstractOrigin;
39 import org.apache.commons.io.build.AbstractStreamBuilder;
40 import org.apache.commons.io.function.IOConsumer;
41 import org.apache.commons.io.input.BoundedInputStream;
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73 public class GzipCompressorInputStream extends CompressorInputStream implements InputStreamStatistics {
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93 public static class Builder extends AbstractStreamBuilder<GzipCompressorInputStream, Builder> {
94
95
96 private boolean decompressConcatenated;
97
98 private Charset fileNameCharset = GzipUtils.GZIP_ENCODING;
99
100 private IOConsumer<GzipCompressorInputStream> onMemberStart;
101
102 private IOConsumer<GzipCompressorInputStream> onMemberEnd;
103
104
105
106
107 public Builder() {
108
109 }
110
111
112
113
114
115
116
117
118
119
120
121
122 @Override
123 public GzipCompressorInputStream get() throws IOException {
124 return new GzipCompressorInputStream(this);
125 }
126
127
128
129
130
131
132
133 public Builder setDecompressConcatenated(final boolean decompressConcatenated) {
134 this.decompressConcatenated = decompressConcatenated;
135 return this;
136 }
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151 public Builder setFileNameCharset(final Charset fileNameCharset) {
152 this.fileNameCharset = fileNameCharset;
153 return this;
154 }
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169 public Builder setOnMemberEnd(final IOConsumer<GzipCompressorInputStream> onMemberEnd) {
170 this.onMemberEnd = onMemberEnd;
171 return this;
172 }
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187 public Builder setOnMemberStart(final IOConsumer<GzipCompressorInputStream> onMemberStart) {
188 this.onMemberStart = onMemberStart;
189 return this;
190 }
191 }
192
193 private static final IOConsumer<GzipCompressorInputStream> NOOP = IOConsumer.noop();
194
195
196
197
198
199
200
201 public static Builder builder() {
202 return new Builder();
203 }
204
205
206
207
208
209
210
211
212
213 public static boolean matches(final byte[] signature, final int length) {
214 return length >= 2 && signature[0] == 31 && signature[1] == -117;
215 }
216
217 private static byte[] readToNull(final DataInput inData) throws IOException {
218 try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) {
219 int b;
220 while ((b = inData.readUnsignedByte()) != 0) {
221 bos.write(b);
222 }
223 return bos.toByteArray();
224 }
225 }
226
227
228 private final byte[] buf = new byte[8192];
229
230
231 private int bufUsed;
232
233 private final BoundedInputStream countingStream;
234
235
236 private final CRC32 crc = new CRC32();
237
238
239 private final boolean decompressConcatenated;
240
241
242 private boolean endReached;
243
244 private final Charset fileNameCharset;
245
246
247
248
249 private final InputStream in;
250
251
252 private Inflater inflater = new Inflater(true);
253
254
255 private final byte[] oneByte = new byte[1];
256
257 private GzipParameters parameters;
258
259 private final IOConsumer<GzipCompressorInputStream> onMemberStart;
260
261 private final IOConsumer<GzipCompressorInputStream> onMemberEnd;
262
263 @SuppressWarnings("resource")
264 private GzipCompressorInputStream(final Builder builder) throws IOException {
265 countingStream = BoundedInputStream.builder().setInputStream(builder.getInputStream()).get();
266
267
268 in = countingStream.markSupported() ? countingStream : new BufferedInputStream(countingStream);
269 this.decompressConcatenated = builder.decompressConcatenated;
270 this.fileNameCharset = builder.fileNameCharset;
271 this.onMemberStart = builder.onMemberStart != null ? builder.onMemberStart : NOOP;
272 this.onMemberEnd = builder.onMemberEnd != null ? builder.onMemberEnd : NOOP;
273 init(true);
274 }
275
276
277
278
279
280
281
282
283
284
285 public GzipCompressorInputStream(final InputStream inputStream) throws IOException {
286 this(builder().setInputStream(inputStream));
287 }
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302 @Deprecated
303 public GzipCompressorInputStream(final InputStream inputStream, final boolean decompressConcatenated) throws IOException {
304 this(builder().setInputStream(inputStream).setDecompressConcatenated(decompressConcatenated));
305 }
306
307
308
309
310
311
312 @Override
313 public void close() throws IOException {
314 if (inflater != null) {
315 inflater.end();
316 inflater = null;
317 }
318 if (this.in != System.in) {
319 this.in.close();
320 }
321 }
322
323
324
325
326
327
328 @Override
329 public long getCompressedCount() {
330 return countingStream.getCount();
331 }
332
333
334
335
336
337
338
339 public GzipParameters getMetaData() {
340 return parameters;
341 }
342
343 private boolean init(final boolean isFirstMember) throws IOException {
344 if (!isFirstMember && !decompressConcatenated) {
345 throw new IllegalStateException("Unexpected: isFirstMember and decompressConcatenated are both false.");
346 }
347
348 final int magic0 = in.read();
349
350
351 if (magic0 == -1 && !isFirstMember) {
352 return false;
353 }
354 if (magic0 != GzipUtils.ID1 || in.read() != GzipUtils.ID2) {
355 throw new IOException(isFirstMember ? "Input is not in the .gz format." : "Unexpected data after a valid .gz stream.");
356 }
357 parameters = new GzipParameters();
358 parameters.setFileNameCharset(fileNameCharset);
359
360 final DataInput inData = new DataInputStream(in);
361 final int method = inData.readUnsignedByte();
362 if (method != Deflater.DEFLATED) {
363 throw new IOException("Unsupported compression method " + method + " in the .gz header");
364 }
365 final int flg = inData.readUnsignedByte();
366 if ((flg & GzipUtils.FRESERVED) != 0) {
367 throw new IOException("Reserved flags are set in the .gz header.");
368 }
369 parameters.setModificationTime(ByteUtils.fromLittleEndian(inData, 4));
370 switch (inData.readUnsignedByte()) {
371 case GzipUtils.XFL_MAX_COMPRESSION:
372 parameters.setCompressionLevel(Deflater.BEST_COMPRESSION);
373 break;
374 case GzipUtils.XFL_MAX_SPEED:
375 parameters.setCompressionLevel(Deflater.BEST_SPEED);
376 break;
377 default:
378 parameters.setCompressionLevel(Deflater.DEFAULT_COMPRESSION);
379 break;
380 }
381 parameters.setOperatingSystem(inData.readUnsignedByte());
382
383 if ((flg & GzipUtils.FEXTRA) != 0) {
384 int xlen = inData.readUnsignedByte();
385 xlen |= inData.readUnsignedByte() << 8;
386 final byte[] extra = new byte[xlen];
387 inData.readFully(extra);
388 parameters.setExtraField(ExtraField.fromBytes(extra));
389 }
390
391 if ((flg & GzipUtils.FNAME) != 0) {
392 parameters.setFileName(new String(readToNull(inData), parameters.getFileNameCharset()));
393 }
394
395 if ((flg & GzipUtils.FCOMMENT) != 0) {
396 parameters.setComment(new String(readToNull(inData), parameters.getFileNameCharset()));
397 }
398
399
400
401
402
403 if ((flg & GzipUtils.FHCRC) != 0) {
404 parameters.setHeaderCRC(true);
405 inData.readShort();
406 }
407
408 inflater.reset();
409 crc.reset();
410 onMemberStart.accept(this);
411 return true;
412 }
413
414 @Override
415 public int read() throws IOException {
416 return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
417 }
418
419
420
421
422
423
424 @Override
425 public int read(final byte[] b, int off, int len) throws IOException {
426 if (len == 0) {
427 return 0;
428 }
429 if (endReached) {
430 return -1;
431 }
432
433 int size = 0;
434
435 while (len > 0) {
436 if (inflater.needsInput()) {
437
438
439 in.mark(buf.length);
440
441 bufUsed = in.read(buf);
442 if (bufUsed == -1) {
443 throw new EOFException();
444 }
445
446 inflater.setInput(buf, 0, bufUsed);
447 }
448
449 final int ret;
450 try {
451 ret = inflater.inflate(b, off, len);
452 } catch (final DataFormatException e) {
453 throw new IOException("Gzip-compressed data is corrupt.", e);
454 }
455
456 crc.update(b, off, ret);
457 off += ret;
458 len -= ret;
459 size += ret;
460 count(ret);
461
462 if (inflater.finished()) {
463
464
465 in.reset();
466 final int skipAmount = bufUsed - inflater.getRemaining();
467 if (IOUtils.skip(in, skipAmount) != skipAmount) {
468 throw new IOException();
469 }
470 bufUsed = 0;
471 final DataInput inData = new DataInputStream(in);
472
473 final long trailerCrc = ByteUtils.fromLittleEndian(inData, 4);
474 if (trailerCrc != crc.getValue()) {
475 throw new IOException("Gzip-compressed data is corrupt (CRC32 error).");
476 }
477
478 final long iSize = ByteUtils.fromLittleEndian(inData, 4);
479 if (iSize != (inflater.getBytesWritten() & 0xffffffffL)) {
480 throw new IOException("Gzip-compressed data is corrupt (uncompressed size mismatch).");
481 }
482 parameters.setTrailerCrc(trailerCrc);
483 parameters.setTrailerISize(iSize);
484 onMemberEnd.accept(this);
485
486 if (!decompressConcatenated || !init(false)) {
487 inflater.end();
488 inflater = null;
489 endReached = true;
490 return size == 0 ? -1 : size;
491 }
492 }
493 }
494
495 return size;
496 }
497 }