1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.commons.compress.archivers.tar;
20
21 import java.io.ByteArrayOutputStream;
22 import java.io.Closeable;
23 import java.io.File;
24 import java.io.IOException;
25 import java.io.InputStream;
26 import java.nio.ByteBuffer;
27 import java.nio.channels.SeekableByteChannel;
28 import java.nio.file.Files;
29 import java.nio.file.Path;
30 import java.util.ArrayList;
31 import java.util.Arrays;
32 import java.util.HashMap;
33 import java.util.LinkedList;
34 import java.util.List;
35 import java.util.Map;
36
37 import org.apache.commons.compress.archivers.zip.ZipEncoding;
38 import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
39 import org.apache.commons.compress.utils.ArchiveUtils;
40 import org.apache.commons.compress.utils.BoundedArchiveInputStream;
41 import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
42 import org.apache.commons.compress.utils.SeekableInMemoryByteChannel;
43 import org.apache.commons.io.input.BoundedInputStream;
44
45
46
47
48
49
50 public class TarFile implements Closeable {
51
52 private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream {
53
54 private final SeekableByteChannel channel;
55
56 private final TarArchiveEntry entry;
57
58 private long entryOffset;
59
60 private int currentSparseInputStreamIndex;
61
62 BoundedTarEntryInputStream(final TarArchiveEntry entry, final SeekableByteChannel channel) throws IOException {
63 super(entry.getDataOffset(), entry.getRealSize());
64 if (channel.size() - entry.getSize() < entry.getDataOffset()) {
65 throw new IOException("entry size exceeds archive size");
66 }
67 this.entry = entry;
68 this.channel = channel;
69 }
70
71 @Override
72 protected int read(final long pos, final ByteBuffer buf) throws IOException {
73 if (entryOffset >= entry.getRealSize()) {
74 return -1;
75 }
76
77 final int totalRead;
78 if (entry.isSparse()) {
79 totalRead = readSparse(entryOffset, buf, buf.limit());
80 } else {
81 totalRead = readArchive(pos, buf);
82 }
83
84 if (totalRead == -1) {
85 if (buf.array().length > 0) {
86 throw new IOException("Truncated TAR archive");
87 }
88 setAtEOF(true);
89 } else {
90 entryOffset += totalRead;
91 buf.flip();
92 }
93 return totalRead;
94 }
95
96 private int readArchive(final long pos, final ByteBuffer buf) throws IOException {
97 channel.position(pos);
98 return channel.read(buf);
99 }
100
101 private int readSparse(final long pos, final ByteBuffer buf, final int numToRead) throws IOException {
102
103 final List<InputStream> entrySparseInputStreams = sparseInputStreams.get(entry.getName());
104 if (entrySparseInputStreams == null || entrySparseInputStreams.isEmpty()) {
105 return readArchive(entry.getDataOffset() + pos, buf);
106 }
107
108 if (currentSparseInputStreamIndex >= entrySparseInputStreams.size()) {
109 return -1;
110 }
111
112 final InputStream currentInputStream = entrySparseInputStreams.get(currentSparseInputStreamIndex);
113 final byte[] bufArray = new byte[numToRead];
114 final int readLen = currentInputStream.read(bufArray);
115 if (readLen != -1) {
116 buf.put(bufArray, 0, readLen);
117 }
118
119
120
121 if (currentSparseInputStreamIndex == entrySparseInputStreams.size() - 1) {
122 return readLen;
123 }
124
125
126 if (readLen == -1) {
127 currentSparseInputStreamIndex++;
128 return readSparse(pos, buf, numToRead);
129 }
130
131
132
133 if (readLen < numToRead) {
134 currentSparseInputStreamIndex++;
135 final int readLenOfNext = readSparse(pos + readLen, buf, numToRead - readLen);
136 if (readLenOfNext == -1) {
137 return readLen;
138 }
139
140 return readLen + readLenOfNext;
141 }
142
143
144 return readLen;
145 }
146 }
147
148 private static final int SMALL_BUFFER_SIZE = 256;
149
150 private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE];
151
152 private final SeekableByteChannel archive;
153
154
155
156
157 private final ZipEncoding zipEncoding;
158
159 private final LinkedList<TarArchiveEntry> entries = new LinkedList<>();
160
161 private final int blockSize;
162
163 private final boolean lenient;
164
165 private final int recordSize;
166
167 private final ByteBuffer recordBuffer;
168
169
170 private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>();
171
172 private boolean eof;
173
174
175
176
177 private TarArchiveEntry currEntry;
178
179
180 private Map<String, String> globalPaxHeaders = new HashMap<>();
181
182 private final Map<String, List<InputStream>> sparseInputStreams = new HashMap<>();
183
184
185
186
187
188
189
190 public TarFile(final byte[] content) throws IOException {
191 this(new SeekableInMemoryByteChannel(content));
192 }
193
194
195
196
197
198
199
200
201
202 public TarFile(final byte[] content, final boolean lenient) throws IOException {
203 this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
204 }
205
206
207
208
209
210
211
212
213 public TarFile(final byte[] content, final String encoding) throws IOException {
214 this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false);
215 }
216
217
218
219
220
221
222
223 public TarFile(final File archive) throws IOException {
224 this(archive.toPath());
225 }
226
227
228
229
230
231
232
233
234
235 public TarFile(final File archive, final boolean lenient) throws IOException {
236 this(archive.toPath(), lenient);
237 }
238
239
240
241
242
243
244
245
246 public TarFile(final File archive, final String encoding) throws IOException {
247 this(archive.toPath(), encoding);
248 }
249
250
251
252
253
254
255
256 public TarFile(final Path archivePath) throws IOException {
257 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false);
258 }
259
260
261
262
263
264
265
266
267
268 public TarFile(final Path archivePath, final boolean lenient) throws IOException {
269 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
270 }
271
272
273
274
275
276
277
278
279 public TarFile(final Path archivePath, final String encoding) throws IOException {
280 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false);
281 }
282
283
284
285
286
287
288
289 public TarFile(final SeekableByteChannel content) throws IOException {
290 this(content, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false);
291 }
292
293
294
295
296
297
298
299
300
301
302
303
304 public TarFile(final SeekableByteChannel archive, final int blockSize, final int recordSize, final String encoding, final boolean lenient)
305 throws IOException {
306 this.archive = archive;
307 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
308 this.recordSize = recordSize;
309 this.recordBuffer = ByteBuffer.allocate(this.recordSize);
310 this.blockSize = blockSize;
311 this.lenient = lenient;
312
313 TarArchiveEntry entry;
314 while ((entry = getNextTarEntry()) != null) {
315 entries.add(entry);
316 }
317 }
318
319
320
321
322
323
324
325 private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) throws IOException {
326 currEntry.updateEntryFromPaxHeaders(headers);
327 currEntry.setSparseHeaders(sparseHeaders);
328 }
329
330
331
332
333
334
335
336
337 private void buildSparseInputStreams() throws IOException {
338 final List<InputStream> streams = new ArrayList<>();
339 final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders();
340
341 final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream();
342
343 long offset = 0;
344 long numberOfZeroBytesInSparseEntry = 0;
345 for (final TarArchiveStructSparse sparseHeader : sparseHeaders) {
346 final long zeroBlockSize = sparseHeader.getOffset() - offset;
347 if (zeroBlockSize < 0) {
348
349 throw new IOException("Corrupted struct sparse detected");
350 }
351
352 if (zeroBlockSize > 0) {
353 streams.add(BoundedInputStream.builder().setInputStream(zeroInputStream).setMaxCount(zeroBlockSize).get());
354 numberOfZeroBytesInSparseEntry += zeroBlockSize;
355 }
356
357 if (sparseHeader.getNumbytes() > 0) {
358 final long start = currEntry.getDataOffset() + sparseHeader.getOffset() - numberOfZeroBytesInSparseEntry;
359 if (start + sparseHeader.getNumbytes() < start) {
360
361 throw new IOException("Unreadable TAR archive, sparse block offset or length too big");
362 }
363 streams.add(new BoundedSeekableByteChannelInputStream(start, sparseHeader.getNumbytes(), archive));
364 }
365 offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
366 }
367 sparseInputStreams.put(currEntry.getName(), streams);
368 }
369
370 @Override
371 public void close() throws IOException {
372 archive.close();
373 }
374
375
376
377
378
379 private void consumeRemainderOfLastBlock() throws IOException {
380 final long bytesReadOfLastBlock = archive.position() % blockSize;
381 if (bytesReadOfLastBlock > 0) {
382 repositionForwardBy(blockSize - bytesReadOfLastBlock);
383 }
384 }
385
386
387
388
389
390
391 public List<TarArchiveEntry> getEntries() {
392 return new ArrayList<>(entries);
393 }
394
395
396
397
398
399
400
401
402 public InputStream getInputStream(final TarArchiveEntry entry) throws IOException {
403 try {
404 return new BoundedTarEntryInputStream(entry, archive);
405 } catch (final RuntimeException ex) {
406 throw new IOException("Corrupted TAR archive. Can't read entry", ex);
407 }
408 }
409
410
411
412
413
414
415
416 private byte[] getLongNameData() throws IOException {
417 final ByteArrayOutputStream longName = new ByteArrayOutputStream();
418 int length;
419 try (InputStream in = getInputStream(currEntry)) {
420 while ((length = in.read(smallBuf)) >= 0) {
421 longName.write(smallBuf, 0, length);
422 }
423 }
424 getNextTarEntry();
425 if (currEntry == null) {
426
427
428 return null;
429 }
430 byte[] longNameData = longName.toByteArray();
431
432 length = longNameData.length;
433 while (length > 0 && longNameData[length - 1] == 0) {
434 --length;
435 }
436 if (length != longNameData.length) {
437 longNameData = Arrays.copyOf(longNameData, length);
438 }
439 return longNameData;
440 }
441
442
443
444
445
446
447
448
449
450 private TarArchiveEntry getNextTarEntry() throws IOException {
451 if (isAtEOF()) {
452 return null;
453 }
454
455 if (currEntry != null) {
456
457 repositionForwardTo(currEntry.getDataOffset() + currEntry.getSize());
458 throwExceptionIfPositionIsNotInArchive();
459 skipRecordPadding();
460 }
461
462 final ByteBuffer headerBuf = getRecord();
463 if (null == headerBuf) {
464
465 currEntry = null;
466 return null;
467 }
468
469 try {
470 final long position = archive.position();
471 currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf.array(), zipEncoding, lenient, position);
472 } catch (final IllegalArgumentException e) {
473 throw new IOException("Error detected parsing the header", e);
474 }
475
476 if (currEntry.isGNULongLinkEntry()) {
477 final byte[] longLinkData = getLongNameData();
478 if (longLinkData == null) {
479
480
481
482 return null;
483 }
484 currEntry.setLinkName(zipEncoding.decode(longLinkData));
485 }
486
487 if (currEntry.isGNULongNameEntry()) {
488 final byte[] longNameData = getLongNameData();
489 if (longNameData == null) {
490
491
492
493 return null;
494 }
495
496
497 final String name = zipEncoding.decode(longNameData);
498 currEntry.setName(name);
499 if (currEntry.isDirectory() && !name.endsWith("/")) {
500 currEntry.setName(name + "/");
501 }
502 }
503
504 if (currEntry.isGlobalPaxHeader()) {
505 readGlobalPaxHeaders();
506 }
507
508 try {
509 if (currEntry.isPaxHeader()) {
510 paxHeaders();
511 } else if (!globalPaxHeaders.isEmpty()) {
512 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders);
513 }
514 } catch (final NumberFormatException e) {
515 throw new IOException("Error detected parsing the pax header", e);
516 }
517
518 if (currEntry.isOldGNUSparse()) {
519 readOldGNUSparse();
520 }
521
522 return currEntry;
523 }
524
525
526
527
528
529
530
531
532
533
534
535
536
537 private ByteBuffer getRecord() throws IOException {
538 ByteBuffer headerBuf = readRecord();
539 setAtEOF(isEOFRecord(headerBuf));
540 if (isAtEOF() && headerBuf != null) {
541
542 tryToConsumeSecondEOFRecord();
543 consumeRemainderOfLastBlock();
544 headerBuf = null;
545 }
546 return headerBuf;
547 }
548
549
550
551
552
553
554 protected final boolean isAtEOF() {
555 return eof;
556 }
557
558 private boolean isDirectory() {
559 return currEntry != null && currEntry.isDirectory();
560 }
561
562 private boolean isEOFRecord(final ByteBuffer headerBuf) {
563 return headerBuf == null || ArchiveUtils.isArrayZero(headerBuf.array(), recordSize);
564 }
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595 private void paxHeaders() throws IOException {
596 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
597 final Map<String, String> headers;
598 try (InputStream input = getInputStream(currEntry)) {
599 headers = TarUtils.parsePaxHeaders(input, sparseHeaders, globalPaxHeaders, currEntry.getSize());
600 }
601
602
603 if (headers.containsKey(TarGnuSparseKeys.MAP)) {
604 sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get(TarGnuSparseKeys.MAP)));
605 }
606 getNextTarEntry();
607 if (currEntry == null) {
608 throw new IOException("premature end of tar archive. Didn't find any entry after PAX header.");
609 }
610 applyPaxHeadersToCurrentEntry(headers, sparseHeaders);
611
612
613 if (currEntry.isPaxGNU1XSparse()) {
614 try (InputStream input = getInputStream(currEntry)) {
615 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(input, recordSize);
616 }
617 currEntry.setSparseHeaders(sparseHeaders);
618
619 currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
620 }
621
622
623
624 buildSparseInputStreams();
625 }
626
627 private void readGlobalPaxHeaders() throws IOException {
628 try (InputStream input = getInputStream(currEntry)) {
629 globalPaxHeaders = TarUtils.parsePaxHeaders(input, globalSparseHeaders, globalPaxHeaders, currEntry.getSize());
630 }
631 getNextTarEntry();
632
633 if (currEntry == null) {
634 throw new IOException("Error detected parsing the pax header");
635 }
636 }
637
638
639
640
641
642
643 private void readOldGNUSparse() throws IOException {
644 if (currEntry.isExtended()) {
645 TarArchiveSparseEntry entry;
646 do {
647 final ByteBuffer headerBuf = getRecord();
648 if (headerBuf == null) {
649 throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag.");
650 }
651 entry = new TarArchiveSparseEntry(headerBuf.array());
652 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
653 currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
654 } while (entry.isExtended());
655 }
656
657
658
659 buildSparseInputStreams();
660 }
661
662
663
664
665
666
667
668 private ByteBuffer readRecord() throws IOException {
669 recordBuffer.rewind();
670 final int readNow = archive.read(recordBuffer);
671 if (readNow != recordSize) {
672 return null;
673 }
674 return recordBuffer;
675 }
676
677 private void repositionForwardBy(final long offset) throws IOException {
678 repositionForwardTo(archive.position() + offset);
679 }
680
681 private void repositionForwardTo(final long newPosition) throws IOException {
682 final long currPosition = archive.position();
683 if (newPosition < currPosition) {
684 throw new IOException("trying to move backwards inside of the archive");
685 }
686 archive.position(newPosition);
687 }
688
689
690
691
692
693
694 protected final void setAtEOF(final boolean eof) {
695 this.eof = eof;
696 }
697
698
699
700
701
702
703 private void skipRecordPadding() throws IOException {
704 if (!isDirectory() && currEntry.getSize() > 0 && currEntry.getSize() % recordSize != 0) {
705 final long numRecords = currEntry.getSize() / recordSize + 1;
706 final long padding = numRecords * recordSize - currEntry.getSize();
707 repositionForwardBy(padding);
708 throwExceptionIfPositionIsNotInArchive();
709 }
710 }
711
712
713
714
715
716
717 private void throwExceptionIfPositionIsNotInArchive() throws IOException {
718 if (archive.size() < archive.position()) {
719 throw new IOException("Truncated TAR archive");
720 }
721 }
722
723
724
725
726
727
728
729
730
731
732
733
734 private void tryToConsumeSecondEOFRecord() throws IOException {
735 boolean shouldReset = true;
736 try {
737 shouldReset = !isEOFRecord(readRecord());
738 } finally {
739 if (shouldReset) {
740 archive.position(archive.position() - recordSize);
741 }
742 }
743 }
744 }