1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.apache.commons.compress.archivers.tar;
26
27 import java.io.ByteArrayOutputStream;
28 import java.io.FileInputStream;
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.HashMap;
34 import java.util.List;
35 import java.util.Map;
36
37 import org.apache.commons.compress.archivers.ArchiveEntry;
38 import org.apache.commons.compress.archivers.ArchiveInputStream;
39 import org.apache.commons.compress.archivers.zip.ZipEncoding;
40 import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
41 import org.apache.commons.compress.utils.ArchiveUtils;
42 import org.apache.commons.compress.utils.IOUtils;
43 import org.apache.commons.io.input.BoundedInputStream;
44
45
46
47
48
49
50
51 public class TarArchiveInputStream extends ArchiveInputStream<TarArchiveEntry> {
52
53
54
55
56 private static final String VERSION_AIX = "0 ";
57
58 private static final int SMALL_BUFFER_SIZE = 256;
59
60
61
62
63
64
65
66
67 public static boolean matches(final byte[] signature, final int length) {
68 final int versionOffset = TarConstants.VERSION_OFFSET;
69 final int versionLen = TarConstants.VERSIONLEN;
70 if (length < versionOffset + versionLen) {
71 return false;
72 }
73 final int magicOffset = TarConstants.MAGIC_OFFSET;
74 final int magicLen = TarConstants.MAGICLEN;
75 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, signature, magicOffset, magicLen)
76 && ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, signature, versionOffset, versionLen)) {
77 return true;
78 }
79
80 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, signature, magicOffset, magicLen)
81 && ArchiveUtils.matchAsciiBuffer(VERSION_AIX, signature, versionOffset, versionLen)) {
82 return true;
83 }
84 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, signature, magicOffset, magicLen)
85 && (ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, signature, versionOffset, versionLen)
86 || ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, signature, versionOffset, versionLen))) {
87 return true;
88 }
89
90 return ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT, signature, magicOffset, magicLen)
91 && ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT, signature, versionOffset, versionLen);
92 }
93
94 private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE];
95
96
97 private final byte[] recordBuffer;
98
99
100 private final int blockSize;
101
102
103 private boolean atEof;
104
105
106 private long entrySize;
107
108
109 private long entryOffset;
110
111
112 private List<InputStream> sparseInputStreams;
113
114
115 private int currentSparseInputStreamIndex;
116
117
118 private TarArchiveEntry currEntry;
119
120
121 private final ZipEncoding zipEncoding;
122
123
124 private Map<String, String> globalPaxHeaders = new HashMap<>();
125
126
127 private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>();
128
129 private final boolean lenient;
130
131
132
133
134
135
136 public TarArchiveInputStream(final InputStream inputStream) {
137 this(inputStream, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE);
138 }
139
140
141
142
143
144
145
146
147
148 public TarArchiveInputStream(final InputStream inputStream, final boolean lenient) {
149 this(inputStream, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
150 }
151
152
153
154
155
156
157
158 public TarArchiveInputStream(final InputStream inputStream, final int blockSize) {
159 this(inputStream, blockSize, TarConstants.DEFAULT_RCDSIZE);
160 }
161
162
163
164
165
166
167
168
169 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final int recordSize) {
170 this(inputStream, blockSize, recordSize, null);
171 }
172
173
174
175
176
177
178
179
180
181
182 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final int recordSize, final String encoding) {
183 this(inputStream, blockSize, recordSize, encoding, false);
184 }
185
186
187
188
189
190
191
192
193
194
195
196
197 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final int recordSize, final String encoding, final boolean lenient) {
198 super(inputStream, encoding);
199 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
200 this.recordBuffer = new byte[recordSize];
201 this.blockSize = blockSize;
202 this.lenient = lenient;
203 }
204
205
206
207
208
209
210
211
212
213 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final String encoding) {
214 this(inputStream, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding);
215 }
216
217
218
219
220
221
222
223
224 public TarArchiveInputStream(final InputStream inputStream, final String encoding) {
225 this(inputStream, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding);
226 }
227
228 private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) throws IOException {
229 currEntry.updateEntryFromPaxHeaders(headers);
230 currEntry.setSparseHeaders(sparseHeaders);
231 }
232
233
234
235
236
237
238
239
240
241 @Override
242 public int available() throws IOException {
243 if (isDirectory()) {
244 return 0;
245 }
246 final long available = currEntry.getRealSize() - entryOffset;
247 if (available > Integer.MAX_VALUE) {
248 return Integer.MAX_VALUE;
249 }
250 return (int) available;
251 }
252
253
254
255
256
257
258
259
260 private void buildSparseInputStreams() throws IOException {
261 currentSparseInputStreamIndex = -1;
262 sparseInputStreams = new ArrayList<>();
263
264 final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders();
265
266
267 final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream();
268
269 long offset = 0;
270 for (final TarArchiveStructSparse sparseHeader : sparseHeaders) {
271 final long zeroBlockSize = sparseHeader.getOffset() - offset;
272 if (zeroBlockSize < 0) {
273
274 throw new IOException("Corrupted struct sparse detected");
275 }
276
277 if (zeroBlockSize > 0) {
278
279 sparseInputStreams.add(BoundedInputStream.builder()
280 .setInputStream(zeroInputStream)
281 .setMaxCount(sparseHeader.getOffset() - offset)
282 .get());
283
284 }
285
286 if (sparseHeader.getNumbytes() > 0) {
287
288 sparseInputStreams.add(BoundedInputStream.builder()
289 .setInputStream(in)
290 .setMaxCount(sparseHeader.getNumbytes())
291 .get());
292
293 }
294 offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
295 }
296 if (!sparseInputStreams.isEmpty()) {
297 currentSparseInputStreamIndex = 0;
298 }
299 }
300
301
302
303
304
305
306 @Override
307 public boolean canReadEntryData(final ArchiveEntry archiveEntry) {
308 return archiveEntry instanceof TarArchiveEntry;
309 }
310
311
312
313
314
315
316 @Override
317 public void close() throws IOException {
318
319 if (sparseInputStreams != null) {
320 for (final InputStream inputStream : sparseInputStreams) {
321 inputStream.close();
322 }
323 }
324 in.close();
325 }
326
327
328
329
330
331 private void consumeRemainderOfLastBlock() throws IOException {
332 final long bytesReadOfLastBlock = getBytesRead() % blockSize;
333 if (bytesReadOfLastBlock > 0) {
334 count(IOUtils.skip(in, blockSize - bytesReadOfLastBlock));
335 }
336 }
337
338
339
340
341
342
343
344
345
346
347 private long getActuallySkipped(final long available, final long skipped, final long expected) throws IOException {
348 long actuallySkipped = skipped;
349 if (in instanceof FileInputStream) {
350 actuallySkipped = Math.min(skipped, available);
351 }
352 if (actuallySkipped != expected) {
353 throw new IOException("Truncated TAR archive");
354 }
355 return actuallySkipped;
356 }
357
358
359
360
361
362
363 public TarArchiveEntry getCurrentEntry() {
364 return currEntry;
365 }
366
367
368
369
370
371
372
373 protected byte[] getLongNameData() throws IOException {
374
375 final ByteArrayOutputStream longName = new ByteArrayOutputStream();
376 int length = 0;
377 while ((length = read(smallBuf)) >= 0) {
378 longName.write(smallBuf, 0, length);
379 }
380 getNextEntry();
381 if (currEntry == null) {
382
383
384 return null;
385 }
386 byte[] longNameData = longName.toByteArray();
387
388 length = longNameData.length;
389 while (length > 0 && longNameData[length - 1] == 0) {
390 --length;
391 }
392 if (length != longNameData.length) {
393 longNameData = Arrays.copyOf(longNameData, length);
394 }
395 return longNameData;
396 }
397
398
399
400
401
402
403
404 @Override
405 public TarArchiveEntry getNextEntry() throws IOException {
406 return getNextTarEntry();
407 }
408
409
410
411
412
413
414
415
416
417
418 @Deprecated
419 public TarArchiveEntry getNextTarEntry() throws IOException {
420 if (isAtEOF()) {
421 return null;
422 }
423 if (currEntry != null) {
424
425 IOUtils.skip(this, Long.MAX_VALUE);
426
427 skipRecordPadding();
428 }
429 final byte[] headerBuf = getRecord();
430 if (headerBuf == null) {
431
432 currEntry = null;
433 return null;
434 }
435 try {
436 currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf, zipEncoding, lenient);
437 } catch (final IllegalArgumentException e) {
438 throw new IOException("Error detected parsing the header", e);
439 }
440 entryOffset = 0;
441 entrySize = currEntry.getSize();
442 if (currEntry.isGNULongLinkEntry()) {
443 final byte[] longLinkData = getLongNameData();
444 if (longLinkData == null) {
445
446
447 return null;
448 }
449 currEntry.setLinkName(zipEncoding.decode(longLinkData));
450 }
451 if (currEntry.isGNULongNameEntry()) {
452 final byte[] longNameData = getLongNameData();
453 if (longNameData == null) {
454
455
456 return null;
457 }
458
459 final String name = zipEncoding.decode(longNameData);
460 currEntry.setName(name);
461 if (currEntry.isDirectory() && !name.endsWith("/")) {
462 currEntry.setName(name + "/");
463 }
464 }
465 if (currEntry.isGlobalPaxHeader()) {
466 readGlobalPaxHeaders();
467 }
468 try {
469 if (currEntry.isPaxHeader()) {
470 paxHeaders();
471 } else if (!globalPaxHeaders.isEmpty()) {
472 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders);
473 }
474 } catch (final NumberFormatException e) {
475 throw new IOException("Error detected parsing the pax header", e);
476 }
477 if (currEntry.isOldGNUSparse()) {
478 readOldGNUSparse();
479 }
480
481
482
483
484 entrySize = currEntry.getSize();
485 return currEntry;
486 }
487
488
489
490
491
492
493
494
495
496
497
498
499 private byte[] getRecord() throws IOException {
500 byte[] headerBuf = readRecord();
501 setAtEOF(isEOFRecord(headerBuf));
502 if (isAtEOF() && headerBuf != null) {
503 tryToConsumeSecondEOFRecord();
504 consumeRemainderOfLastBlock();
505 headerBuf = null;
506 }
507 return headerBuf;
508 }
509
510
511
512
513
514
515 public int getRecordSize() {
516 return recordBuffer.length;
517 }
518
519
520
521
522
523
524 protected final boolean isAtEOF() {
525 return atEof;
526 }
527
528 private boolean isDirectory() {
529 return currEntry != null && currEntry.isDirectory();
530 }
531
532
533
534
535
536
537
538 protected boolean isEOFRecord(final byte[] record) {
539 return record == null || ArchiveUtils.isArrayZero(record, getRecordSize());
540 }
541
542
543
544
545
546
547 @Override
548 public synchronized void mark(final int markLimit) {
549 }
550
551
552
553
554
555
556 @Override
557 public boolean markSupported() {
558 return false;
559 }
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580 private void paxHeaders() throws IOException {
581 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
582 final Map<String, String> headers = TarUtils.parsePaxHeaders(this, sparseHeaders, globalPaxHeaders, entrySize);
583
584 if (headers.containsKey(TarGnuSparseKeys.MAP)) {
585 sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get(TarGnuSparseKeys.MAP)));
586 }
587 getNextEntry();
588 if (currEntry == null) {
589 throw new IOException("premature end of tar archive. Didn't find any entry after PAX header.");
590 }
591 applyPaxHeadersToCurrentEntry(headers, sparseHeaders);
592
593 if (currEntry.isPaxGNU1XSparse()) {
594 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(in, getRecordSize());
595 currEntry.setSparseHeaders(sparseHeaders);
596 }
597
598
599 buildSparseInputStreams();
600 }
601
602
603
604
605
606
607
608
609
610
611
612
613
614 @Override
615 public int read(final byte[] buf, final int offset, int numToRead) throws IOException {
616 if (numToRead == 0) {
617 return 0;
618 }
619 int totalRead = 0;
620 if (isAtEOF() || isDirectory()) {
621 return -1;
622 }
623 if (currEntry == null) {
624 throw new IllegalStateException("No current tar entry");
625 }
626 if (entryOffset >= currEntry.getRealSize()) {
627 return -1;
628 }
629 numToRead = Math.min(numToRead, available());
630 if (currEntry.isSparse()) {
631
632 totalRead = readSparse(buf, offset, numToRead);
633 } else {
634 totalRead = in.read(buf, offset, numToRead);
635 }
636 if (totalRead == -1) {
637 if (numToRead > 0) {
638 throw new IOException("Truncated TAR archive");
639 }
640 setAtEOF(true);
641 } else {
642 count(totalRead);
643 entryOffset += totalRead;
644 }
645 return totalRead;
646 }
647
648 private void readGlobalPaxHeaders() throws IOException {
649 globalPaxHeaders = TarUtils.parsePaxHeaders(this, globalSparseHeaders, globalPaxHeaders, entrySize);
650 getNextEntry();
651 if (currEntry == null) {
652 throw new IOException("Error detected parsing the pax header");
653 }
654 }
655
656
657
658
659
660
661 private void readOldGNUSparse() throws IOException {
662 if (currEntry.isExtended()) {
663 TarArchiveSparseEntry entry;
664 do {
665 final byte[] headerBuf = getRecord();
666 if (headerBuf == null) {
667 throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag.");
668 }
669 entry = new TarArchiveSparseEntry(headerBuf);
670 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
671 } while (entry.isExtended());
672 }
673
674
675 buildSparseInputStreams();
676 }
677
678
679
680
681
682
683
684 protected byte[] readRecord() throws IOException {
685 final int readCount = IOUtils.readFully(in, recordBuffer);
686 count(readCount);
687 if (readCount != getRecordSize()) {
688 return null;
689 }
690 return recordBuffer;
691 }
692
693
694
695
696
697
698
699
700
701
702
703
704
705 private int readSparse(final byte[] buf, final int offset, final int numToRead) throws IOException {
706
707 if (sparseInputStreams == null || sparseInputStreams.isEmpty()) {
708 return in.read(buf, offset, numToRead);
709 }
710 if (currentSparseInputStreamIndex >= sparseInputStreams.size()) {
711 return -1;
712 }
713 final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
714 final int readLen = currentInputStream.read(buf, offset, numToRead);
715
716
717 if (currentSparseInputStreamIndex == sparseInputStreams.size() - 1) {
718 return readLen;
719 }
720
721 if (readLen == -1) {
722 currentSparseInputStreamIndex++;
723 return readSparse(buf, offset, numToRead);
724 }
725
726
727 if (readLen < numToRead) {
728 currentSparseInputStreamIndex++;
729 final int readLenOfNext = readSparse(buf, offset + readLen, numToRead - readLen);
730 if (readLenOfNext == -1) {
731 return readLen;
732 }
733 return readLen + readLenOfNext;
734 }
735
736 return readLen;
737 }
738
739
740
741
742 @Override
743 public synchronized void reset() {
744
745 }
746
747
748
749
750
751
752 protected final void setAtEOF(final boolean atEof) {
753 this.atEof = atEof;
754 }
755
756
757
758
759
760
761 protected final void setCurrentEntry(final TarArchiveEntry currEntry) {
762 this.currEntry = currEntry;
763 }
764
765
766
767
768
769
770
771
772
773
774 @Override
775 public long skip(final long n) throws IOException {
776 if (n <= 0 || isDirectory()) {
777 return 0;
778 }
779 final long availableOfInputStream = in.available();
780 final long available = currEntry.getRealSize() - entryOffset;
781 final long numToSkip = Math.min(n, available);
782 long skipped;
783 if (!currEntry.isSparse()) {
784 skipped = IOUtils.skip(in, numToSkip);
785
786
787 skipped = getActuallySkipped(availableOfInputStream, skipped, numToSkip);
788 } else {
789 skipped = skipSparse(numToSkip);
790 }
791 count(skipped);
792 entryOffset += skipped;
793 return skipped;
794 }
795
796
797
798
799
800
801 private void skipRecordPadding() throws IOException {
802 if (!isDirectory() && this.entrySize > 0 && this.entrySize % getRecordSize() != 0) {
803 final long available = in.available();
804 final long numRecords = this.entrySize / getRecordSize() + 1;
805 final long padding = numRecords * getRecordSize() - this.entrySize;
806 long skipped = IOUtils.skip(in, padding);
807 skipped = getActuallySkipped(available, skipped, padding);
808 count(skipped);
809 }
810 }
811
812
813
814
815
816
817
818
819
820 private long skipSparse(final long n) throws IOException {
821 if (sparseInputStreams == null || sparseInputStreams.isEmpty()) {
822 return in.skip(n);
823 }
824 long bytesSkipped = 0;
825 while (bytesSkipped < n && currentSparseInputStreamIndex < sparseInputStreams.size()) {
826 final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
827 bytesSkipped += currentInputStream.skip(n - bytesSkipped);
828 if (bytesSkipped < n) {
829 currentSparseInputStreamIndex++;
830 }
831 }
832 return bytesSkipped;
833 }
834
835
836
837
838
839
840
841
842
843 private void tryToConsumeSecondEOFRecord() throws IOException {
844 boolean shouldReset = true;
845 final boolean marked = in.markSupported();
846 if (marked) {
847 in.mark(getRecordSize());
848 }
849 try {
850 shouldReset = !isEOFRecord(readRecord());
851 } finally {
852 if (shouldReset && marked) {
853 pushedBackBytes(getRecordSize());
854 in.reset();
855 }
856 }
857 }
858 }