001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 020/* 021 * This package is based on the work done by Timothy Gerard Endres 022 * (time@ice.com) to whom the Ant project is very grateful for his great code. 023 */ 024 025package org.apache.commons.compress.archivers.tar; 026 027import java.io.ByteArrayOutputStream; 028import java.io.FileInputStream; 029import java.io.IOException; 030import java.io.InputStream; 031import java.util.ArrayList; 032import java.util.Arrays; 033import java.util.HashMap; 034import java.util.List; 035import java.util.Map; 036 037import org.apache.commons.compress.archivers.ArchiveEntry; 038import org.apache.commons.compress.archivers.ArchiveInputStream; 039import org.apache.commons.compress.archivers.zip.ZipEncoding; 040import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 041import org.apache.commons.compress.utils.ArchiveUtils; 042import org.apache.commons.compress.utils.IOUtils; 043import org.apache.commons.io.input.BoundedInputStream; 044 045/** 046 * The TarInputStream reads a Unix tar archive as an InputStream. methods are provided to position at each successive entry in the archive, and the read each 047 * entry as a normal input stream using read(). 048 * 049 * @NotThreadSafe 050 */ 051public class TarArchiveInputStream extends ArchiveInputStream<TarArchiveEntry> { 052 053 /** 054 * IBM AIX <a href=""https://www.ibm.com/docs/sv/aix/7.2.0?topic=files-tarh-file">tar.h</a>: "This field is terminated with a space only." 055 */ 056 private static final String VERSION_AIX = "0 "; 057 058 private static final int SMALL_BUFFER_SIZE = 256; 059 060 /** 061 * Checks if the signature matches what is expected for a tar file. 062 * 063 * @param signature the bytes to check. 064 * @param length the number of bytes to check. 065 * @return true, if this stream is a tar archive stream, false otherwise. 066 */ 067 public static boolean matches(final byte[] signature, final int length) { 068 final int versionOffset = TarConstants.VERSION_OFFSET; 069 final int versionLen = TarConstants.VERSIONLEN; 070 if (length < versionOffset + versionLen) { 071 return false; 072 } 073 final int magicOffset = TarConstants.MAGIC_OFFSET; 074 final int magicLen = TarConstants.MAGICLEN; 075 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, signature, magicOffset, magicLen) 076 && ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, signature, versionOffset, versionLen)) { 077 return true; 078 } 079 // IBM AIX tar.h https://www.ibm.com/docs/sv/aix/7.2.0?topic=files-tarh-file : "This field is terminated with a space only." 080 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, signature, magicOffset, magicLen) 081 && ArchiveUtils.matchAsciiBuffer(VERSION_AIX, signature, versionOffset, versionLen)) { 082 return true; 083 } 084 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, signature, magicOffset, magicLen) 085 && (ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, signature, versionOffset, versionLen) 086 || ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, signature, versionOffset, versionLen))) { 087 return true; 088 } 089 // COMPRESS-107 - recognize Ant tar files 090 return ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT, signature, magicOffset, magicLen) 091 && ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT, signature, versionOffset, versionLen); 092 } 093 094 private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE]; 095 096 /** The buffer to store the TAR header. **/ 097 private final byte[] recordBuffer; 098 099 /** The size of a block. */ 100 private final int blockSize; 101 102 /** True if stream is at EOF. */ 103 private boolean atEof; 104 105 /** Size of the current . */ 106 private long entrySize; 107 108 /** How far into the entry the stream is at. */ 109 private long entryOffset; 110 111 /** Input streams for reading sparse entries. **/ 112 private List<InputStream> sparseInputStreams; 113 114 /** The index of current input stream being read when reading sparse entries. */ 115 private int currentSparseInputStreamIndex; 116 117 /** The meta-data about the current entry. */ 118 private TarArchiveEntry currEntry; 119 120 /** The encoding of the file. */ 121 private final ZipEncoding zipEncoding; 122 123 /** The global PAX header. */ 124 private Map<String, String> globalPaxHeaders = new HashMap<>(); 125 126 /** The global sparse headers, this is only used in PAX Format 0.X. */ 127 private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>(); 128 129 private final boolean lenient; 130 131 /** 132 * Constructs a new instance. 133 * 134 * @param inputStream the input stream to use 135 */ 136 public TarArchiveInputStream(final InputStream inputStream) { 137 this(inputStream, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE); 138 } 139 140 /** 141 * Constructs a new instance. 142 * 143 * @param inputStream the input stream to use 144 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to 145 * {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead. 146 * @since 1.19 147 */ 148 public TarArchiveInputStream(final InputStream inputStream, final boolean lenient) { 149 this(inputStream, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient); 150 } 151 152 /** 153 * Constructs a new instance. 154 * 155 * @param inputStream the input stream to use 156 * @param blockSize the block size to use 157 */ 158 public TarArchiveInputStream(final InputStream inputStream, final int blockSize) { 159 this(inputStream, blockSize, TarConstants.DEFAULT_RCDSIZE); 160 } 161 162 /** 163 * Constructs a new instance. 164 * 165 * @param inputStream the input stream to use 166 * @param blockSize the block size to use 167 * @param recordSize the record size to use 168 */ 169 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final int recordSize) { 170 this(inputStream, blockSize, recordSize, null); 171 } 172 173 /** 174 * Constructs a new instance. 175 * 176 * @param inputStream the input stream to use 177 * @param blockSize the block size to use 178 * @param recordSize the record size to use 179 * @param encoding name of the encoding to use for file names 180 * @since 1.4 181 */ 182 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final int recordSize, final String encoding) { 183 this(inputStream, blockSize, recordSize, encoding, false); 184 } 185 186 /** 187 * Constructs a new instance. 188 * 189 * @param inputStream the input stream to use 190 * @param blockSize the block size to use 191 * @param recordSize the record size to use 192 * @param encoding name of the encoding to use for file names 193 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to 194 * {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead. 195 * @since 1.19 196 */ 197 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final int recordSize, final String encoding, final boolean lenient) { 198 super(inputStream, encoding); 199 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 200 this.recordBuffer = new byte[recordSize]; 201 this.blockSize = blockSize; 202 this.lenient = lenient; 203 } 204 205 /** 206 * Constructs a new instance. 207 * 208 * @param inputStream the input stream to use 209 * @param blockSize the block size to use 210 * @param encoding name of the encoding to use for file names 211 * @since 1.4 212 */ 213 public TarArchiveInputStream(final InputStream inputStream, final int blockSize, final String encoding) { 214 this(inputStream, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding); 215 } 216 217 /** 218 * Constructs a new instance. 219 * 220 * @param inputStream the input stream to use 221 * @param encoding name of the encoding to use for file names 222 * @since 1.4 223 */ 224 public TarArchiveInputStream(final InputStream inputStream, final String encoding) { 225 this(inputStream, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding); 226 } 227 228 private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) throws IOException { 229 currEntry.updateEntryFromPaxHeaders(headers); 230 currEntry.setSparseHeaders(sparseHeaders); 231 } 232 233 /** 234 * Gets the available data that can be read from the current entry in the archive. This does not indicate how much data is left in the entire archive, only 235 * in the current entry. This value is determined from the entry's size header field and the amount of data already read from the current entry. 236 * Integer.MAX_VALUE is returned in case more than Integer.MAX_VALUE bytes are left in the current entry in the archive. 237 * 238 * @return The number of available bytes for the current entry. 239 * @throws IOException for signature 240 */ 241 @Override 242 public int available() throws IOException { 243 if (isDirectory()) { 244 return 0; 245 } 246 final long available = currEntry.getRealSize() - entryOffset; 247 if (available > Integer.MAX_VALUE) { 248 return Integer.MAX_VALUE; 249 } 250 return (int) available; 251 } 252 253 /** 254 * Build the input streams consisting of all-zero input streams and non-zero input streams. When reading from the non-zero input streams, the data is 255 * actually read from the original input stream. The size of each input stream is introduced by the sparse headers. 256 * <p> 257 * NOTE : Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the 0 size input streams because they are meaningless. 258 * </p> 259 */ 260 private void buildSparseInputStreams() throws IOException { 261 currentSparseInputStreamIndex = -1; 262 sparseInputStreams = new ArrayList<>(); 263 264 final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders(); 265 266 // Stream doesn't need to be closed at all as it doesn't use any resources 267 final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); // NOSONAR 268 // logical offset into the extracted entry 269 long offset = 0; 270 for (final TarArchiveStructSparse sparseHeader : sparseHeaders) { 271 final long zeroBlockSize = sparseHeader.getOffset() - offset; 272 if (zeroBlockSize < 0) { 273 // sparse header says to move backwards inside the extracted entry 274 throw new IOException("Corrupted struct sparse detected"); 275 } 276 // only store the zero block if it is not empty 277 if (zeroBlockSize > 0) { 278 // @formatter:off 279 sparseInputStreams.add(BoundedInputStream.builder() 280 .setInputStream(zeroInputStream) 281 .setMaxCount(sparseHeader.getOffset() - offset) 282 .get()); 283 // @formatter:on 284 } 285 // only store the input streams with non-zero size 286 if (sparseHeader.getNumbytes() > 0) { 287 // @formatter:off 288 sparseInputStreams.add(BoundedInputStream.builder() 289 .setInputStream(in) 290 .setMaxCount(sparseHeader.getNumbytes()) 291 .get()); 292 // @formatter:on 293 } 294 offset = sparseHeader.getOffset() + sparseHeader.getNumbytes(); 295 } 296 if (!sparseInputStreams.isEmpty()) { 297 currentSparseInputStreamIndex = 0; 298 } 299 } 300 301 /** 302 * Tests whether this class is able to read the given entry. 303 * 304 * @return The implementation will return true if the {@link ArchiveEntry} is an instance of {@link TarArchiveEntry} 305 */ 306 @Override 307 public boolean canReadEntryData(final ArchiveEntry archiveEntry) { 308 return archiveEntry instanceof TarArchiveEntry; 309 } 310 311 /** 312 * Closes this stream. Calls the TarBuffer's close() method. 313 * 314 * @throws IOException on error 315 */ 316 @Override 317 public void close() throws IOException { 318 // Close all the input streams in sparseInputStreams 319 if (sparseInputStreams != null) { 320 for (final InputStream inputStream : sparseInputStreams) { 321 inputStream.close(); 322 } 323 } 324 in.close(); 325 } 326 327 /** 328 * This method is invoked once the end of the archive is hit, it tries to consume the remaining bytes under the assumption that the tool creating this 329 * archive has padded the last block. 330 */ 331 private void consumeRemainderOfLastBlock() throws IOException { 332 final long bytesReadOfLastBlock = getBytesRead() % blockSize; 333 if (bytesReadOfLastBlock > 0) { 334 count(IOUtils.skip(in, blockSize - bytesReadOfLastBlock)); 335 } 336 } 337 338 /** 339 * For FileInputStream, the skip always return the number you input, so we need the available bytes to determine how many bytes are actually skipped 340 * 341 * @param available available bytes returned by inputStream.available() 342 * @param skipped skipped bytes returned by inputStream.skip() 343 * @param expected bytes expected to skip 344 * @return number of bytes actually skipped 345 * @throws IOException if a truncated tar archive is detected 346 */ 347 private long getActuallySkipped(final long available, final long skipped, final long expected) throws IOException { 348 long actuallySkipped = skipped; 349 if (in instanceof FileInputStream) { 350 actuallySkipped = Math.min(skipped, available); 351 } 352 if (actuallySkipped != expected) { 353 throw new IOException("Truncated TAR archive"); 354 } 355 return actuallySkipped; 356 } 357 358 /** 359 * Gets the current TAR Archive Entry that this input stream is processing 360 * 361 * @return The current Archive Entry 362 */ 363 public TarArchiveEntry getCurrentEntry() { 364 return currEntry; 365 } 366 367 /** 368 * Gets the next entry in this tar archive as long name data. 369 * 370 * @return The next entry in the archive as long name data, or null. 371 * @throws IOException on error 372 */ 373 protected byte[] getLongNameData() throws IOException { 374 // read in the name 375 final ByteArrayOutputStream longName = new ByteArrayOutputStream(); 376 int length = 0; 377 while ((length = read(smallBuf)) >= 0) { 378 longName.write(smallBuf, 0, length); 379 } 380 getNextEntry(); 381 if (currEntry == null) { 382 // Bugzilla: 40334 383 // Malformed tar file - long entry name not followed by entry 384 return null; 385 } 386 byte[] longNameData = longName.toByteArray(); 387 // remove trailing null terminator(s) 388 length = longNameData.length; 389 while (length > 0 && longNameData[length - 1] == 0) { 390 --length; 391 } 392 if (length != longNameData.length) { 393 longNameData = Arrays.copyOf(longNameData, length); 394 } 395 return longNameData; 396 } 397 398 /** 399 * Gets the next TarArchiveEntry in this stream. 400 * 401 * @return the next entry, or {@code null} if there are no more entries 402 * @throws IOException if the next entry could not be read 403 */ 404 @Override 405 public TarArchiveEntry getNextEntry() throws IOException { 406 return getNextTarEntry(); 407 } 408 409 /** 410 * Gets the next entry in this tar archive. This will skip over any remaining data in the current entry, if there is one, and place the input stream at the 411 * header of the next entry, and read the header and instantiate a new TarEntry from the header bytes and return that entry. If there are no more entries in 412 * the archive, null will be returned to indicate that the end of the archive has been reached. 413 * 414 * @return The next TarEntry in the archive, or null. 415 * @throws IOException on error 416 * @deprecated Use {@link #getNextEntry()}. 417 */ 418 @Deprecated 419 public TarArchiveEntry getNextTarEntry() throws IOException { 420 if (isAtEOF()) { 421 return null; 422 } 423 if (currEntry != null) { 424 /* Skip will only go to the end of the current entry */ 425 IOUtils.skip(this, Long.MAX_VALUE); 426 /* skip to the end of the last record */ 427 skipRecordPadding(); 428 } 429 final byte[] headerBuf = getRecord(); 430 if (headerBuf == null) { 431 /* hit EOF */ 432 currEntry = null; 433 return null; 434 } 435 try { 436 currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf, zipEncoding, lenient); 437 } catch (final IllegalArgumentException e) { 438 throw new IOException("Error detected parsing the header", e); 439 } 440 entryOffset = 0; 441 entrySize = currEntry.getSize(); 442 if (currEntry.isGNULongLinkEntry()) { 443 final byte[] longLinkData = getLongNameData(); 444 if (longLinkData == null) { 445 // Bugzilla: 40334 446 // Malformed tar file - long link entry name not followed by entry 447 return null; 448 } 449 currEntry.setLinkName(zipEncoding.decode(longLinkData)); 450 } 451 if (currEntry.isGNULongNameEntry()) { 452 final byte[] longNameData = getLongNameData(); 453 if (longNameData == null) { 454 // Bugzilla: 40334 455 // Malformed tar file - long entry name not followed by entry 456 return null; 457 } 458 // COMPRESS-509 : the name of directories should end with '/' 459 final String name = zipEncoding.decode(longNameData); 460 currEntry.setName(name); 461 if (currEntry.isDirectory() && !name.endsWith("/")) { 462 currEntry.setName(name + "/"); 463 } 464 } 465 if (currEntry.isGlobalPaxHeader()) { // Process Global Pax headers 466 readGlobalPaxHeaders(); 467 } 468 try { 469 if (currEntry.isPaxHeader()) { // Process Pax headers 470 paxHeaders(); 471 } else if (!globalPaxHeaders.isEmpty()) { 472 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders); 473 } 474 } catch (final NumberFormatException e) { 475 throw new IOException("Error detected parsing the pax header", e); 476 } 477 if (currEntry.isOldGNUSparse()) { // Process sparse files 478 readOldGNUSparse(); 479 } 480 // If the size of the next element in the archive has changed 481 // due to a new size being reported in the POSIX header 482 // information, we update entrySize here so that it contains 483 // the correct value. 484 entrySize = currEntry.getSize(); 485 return currEntry; 486 } 487 488 /** 489 * Gets the next record in this tar archive. This will skip over any remaining data in the current entry, if there is one, and place the input stream at the 490 * header of the next entry. 491 * <p> 492 * If there are no more entries in the archive, null will be returned to indicate that the end of the archive has been reached. At the same time the 493 * {@code hasHitEOF} marker will be set to true. 494 * </p> 495 * 496 * @return The next header in the archive, or null. 497 * @throws IOException on error 498 */ 499 private byte[] getRecord() throws IOException { 500 byte[] headerBuf = readRecord(); 501 setAtEOF(isEOFRecord(headerBuf)); 502 if (isAtEOF() && headerBuf != null) { 503 tryToConsumeSecondEOFRecord(); 504 consumeRemainderOfLastBlock(); 505 headerBuf = null; 506 } 507 return headerBuf; 508 } 509 510 /** 511 * Gets the record size being used by this stream's buffer. 512 * 513 * @return The TarBuffer record size. 514 */ 515 public int getRecordSize() { 516 return recordBuffer.length; 517 } 518 519 /** 520 * Tests whether we are at the end-of-file. 521 * 522 * @return whether we are at the end-of-file. 523 */ 524 protected final boolean isAtEOF() { 525 return atEof; 526 } 527 528 private boolean isDirectory() { 529 return currEntry != null && currEntry.isDirectory(); 530 } 531 532 /** 533 * Tests if an archive record indicate End of Archive. End of archive is indicated by a record that consists entirely of null bytes. 534 * 535 * @param record The record data to check. 536 * @return true if the record data is an End of Archive 537 */ 538 protected boolean isEOFRecord(final byte[] record) { 539 return record == null || ArchiveUtils.isArrayZero(record, getRecordSize()); 540 } 541 542 /** 543 * Since we do not support marking just yet, we do nothing. 544 * 545 * @param markLimit The limit to mark. 546 */ 547 @Override 548 public synchronized void mark(final int markLimit) { 549 } 550 551 /** 552 * Since we do not support marking just yet, we return false. 553 * 554 * @return false. 555 */ 556 @Override 557 public boolean markSupported() { 558 return false; 559 } 560 561 /** 562 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like: 563 * <p> 564 * GNU.sparse.size=size GNU.sparse.numblocks=numblocks repeat numblocks times GNU.sparse.offset=offset GNU.sparse.numbytes=numbytes end repeat 565 * </p> 566 * <p> 567 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 568 * </p> 569 * <p> 570 * GNU.sparse.map Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 571 * </p> 572 * <p> 573 * For PAX Format 1.X: The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers 574 * delimited by newlines. The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are 575 * map entries, each one consisting of two numbers giving the offset and size of the data block it describes. 576 * </p> 577 * 578 * @throws IOException if an I/O error occurs. 579 */ 580 private void paxHeaders() throws IOException { 581 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 582 final Map<String, String> headers = TarUtils.parsePaxHeaders(this, sparseHeaders, globalPaxHeaders, entrySize); 583 // for 0.1 PAX Headers 584 if (headers.containsKey(TarGnuSparseKeys.MAP)) { 585 sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get(TarGnuSparseKeys.MAP))); 586 } 587 getNextEntry(); // Get the actual file entry 588 if (currEntry == null) { 589 throw new IOException("premature end of tar archive. Didn't find any entry after PAX header."); 590 } 591 applyPaxHeadersToCurrentEntry(headers, sparseHeaders); 592 // for 1.0 PAX Format, the sparse map is stored in the file data block 593 if (currEntry.isPaxGNU1XSparse()) { 594 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(in, getRecordSize()); 595 currEntry.setSparseHeaders(sparseHeaders); 596 } 597 // sparse headers are all done reading, we need to build 598 // sparse input streams using these sparse headers 599 buildSparseInputStreams(); 600 } 601 602 /** 603 * Reads bytes from the current tar archive entry. 604 * <p> 605 * This method is aware of the boundaries of the current entry in the archive and will deal with them as if they were this stream's start and EOF. 606 * </p> 607 * 608 * @param buf The buffer into which to place bytes read. 609 * @param offset The offset at which to place bytes read. 610 * @param numToRead The number of bytes to read. 611 * @return The number of bytes read, or -1 at EOF. 612 * @throws IOException on error 613 */ 614 @Override 615 public int read(final byte[] buf, final int offset, int numToRead) throws IOException { 616 if (numToRead == 0) { 617 return 0; 618 } 619 int totalRead = 0; 620 if (isAtEOF() || isDirectory()) { 621 return -1; 622 } 623 if (currEntry == null) { 624 throw new IllegalStateException("No current tar entry"); 625 } 626 if (entryOffset >= currEntry.getRealSize()) { 627 return -1; 628 } 629 numToRead = Math.min(numToRead, available()); 630 if (currEntry.isSparse()) { 631 // for sparse entries, we need to read them in another way 632 totalRead = readSparse(buf, offset, numToRead); 633 } else { 634 totalRead = in.read(buf, offset, numToRead); 635 } 636 if (totalRead == -1) { 637 if (numToRead > 0) { 638 throw new IOException("Truncated TAR archive"); 639 } 640 setAtEOF(true); 641 } else { 642 count(totalRead); 643 entryOffset += totalRead; 644 } 645 return totalRead; 646 } 647 648 private void readGlobalPaxHeaders() throws IOException { 649 globalPaxHeaders = TarUtils.parsePaxHeaders(this, globalSparseHeaders, globalPaxHeaders, entrySize); 650 getNextEntry(); // Get the actual file entry 651 if (currEntry == null) { 652 throw new IOException("Error detected parsing the pax header"); 653 } 654 } 655 656 /** 657 * Adds the sparse chunks from the current entry to the sparse chunks, including any additional sparse entries following the current entry. 658 * 659 * @throws IOException on error 660 */ 661 private void readOldGNUSparse() throws IOException { 662 if (currEntry.isExtended()) { 663 TarArchiveSparseEntry entry; 664 do { 665 final byte[] headerBuf = getRecord(); 666 if (headerBuf == null) { 667 throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag."); 668 } 669 entry = new TarArchiveSparseEntry(headerBuf); 670 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders()); 671 } while (entry.isExtended()); 672 } 673 // sparse headers are all done reading, we need to build 674 // sparse input streams using these sparse headers 675 buildSparseInputStreams(); 676 } 677 678 /** 679 * Reads a record from the input stream and return the data. 680 * 681 * @return The record data or null if EOF has been hit. 682 * @throws IOException on error 683 */ 684 protected byte[] readRecord() throws IOException { 685 final int readCount = IOUtils.readFully(in, recordBuffer); 686 count(readCount); 687 if (readCount != getRecordSize()) { 688 return null; 689 } 690 return recordBuffer; 691 } 692 693 /** 694 * For sparse tar entries, there are many "holes"(consisting of all 0) in the file. Only the non-zero data is stored in tar files, and they are stored 695 * separately. The structure of non-zero data is introduced by the sparse headers using the offset, where a block of non-zero data starts, and numbytes, the 696 * length of the non-zero data block. When reading sparse entries, the actual data is read out with "holes" and non-zero data combined together according to 697 * the sparse headers. 698 * 699 * @param buf The buffer into which to place bytes read. 700 * @param offset The offset at which to place bytes read. 701 * @param numToRead The number of bytes to read. 702 * @return The number of bytes read, or -1 at EOF. 703 * @throws IOException on error 704 */ 705 private int readSparse(final byte[] buf, final int offset, final int numToRead) throws IOException { 706 // if there are no actual input streams, just read from the original input stream 707 if (sparseInputStreams == null || sparseInputStreams.isEmpty()) { 708 return in.read(buf, offset, numToRead); 709 } 710 if (currentSparseInputStreamIndex >= sparseInputStreams.size()) { 711 return -1; 712 } 713 final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex); 714 final int readLen = currentInputStream.read(buf, offset, numToRead); 715 // if the current input stream is the last input stream, 716 // just return the number of bytes read from current input stream 717 if (currentSparseInputStreamIndex == sparseInputStreams.size() - 1) { 718 return readLen; 719 } 720 // if EOF of current input stream is meet, open a new input stream and recursively call read 721 if (readLen == -1) { 722 currentSparseInputStreamIndex++; 723 return readSparse(buf, offset, numToRead); 724 } 725 // if the rest data of current input stream is not long enough, open a new input stream 726 // and recursively call read 727 if (readLen < numToRead) { 728 currentSparseInputStreamIndex++; 729 final int readLenOfNext = readSparse(buf, offset + readLen, numToRead - readLen); 730 if (readLenOfNext == -1) { 731 return readLen; 732 } 733 return readLen + readLenOfNext; 734 } 735 // if the rest data of current input stream is enough(which means readLen == len), just return readLen 736 return readLen; 737 } 738 739 /** 740 * Since we do not support marking just yet, we do nothing. 741 */ 742 @Override 743 public synchronized void reset() { 744 // empty 745 } 746 747 /** 748 * Sets whether we are at the end-of-file. 749 * 750 * @param atEof whether we are at the end-of-file. 751 */ 752 protected final void setAtEOF(final boolean atEof) { 753 this.atEof = atEof; 754 } 755 756 /** 757 * Sets the current entry. 758 * 759 * @param currEntry the current entry. 760 */ 761 protected final void setCurrentEntry(final TarArchiveEntry currEntry) { 762 this.currEntry = currEntry; 763 } 764 765 /** 766 * Skips over and discards {@code n} bytes of data from this input stream. The {@code skip} method may, for a variety of reasons, end up skipping over some 767 * smaller number of bytes, possibly {@code 0}. This may result from any of a number of conditions; reaching end of file or end of entry before {@code n} 768 * bytes have been skipped; are only two possibilities. The actual number of bytes skipped is returned. If {@code n} is negative, no bytes are skipped. 769 * 770 * @param n the number of bytes to be skipped. 771 * @return the actual number of bytes skipped. 772 * @throws IOException if a truncated tar archive is detected or some other I/O error occurs 773 */ 774 @Override 775 public long skip(final long n) throws IOException { 776 if (n <= 0 || isDirectory()) { 777 return 0; 778 } 779 final long availableOfInputStream = in.available(); 780 final long available = currEntry.getRealSize() - entryOffset; 781 final long numToSkip = Math.min(n, available); 782 long skipped; 783 if (!currEntry.isSparse()) { 784 skipped = IOUtils.skip(in, numToSkip); 785 // for non-sparse entry, we should get the bytes actually skipped bytes along with 786 // inputStream.available() if inputStream is instance of FileInputStream 787 skipped = getActuallySkipped(availableOfInputStream, skipped, numToSkip); 788 } else { 789 skipped = skipSparse(numToSkip); 790 } 791 count(skipped); 792 entryOffset += skipped; 793 return skipped; 794 } 795 796 /** 797 * The last record block should be written at the full size, so skip any additional space used to fill a record after an entry. 798 * 799 * @throws IOException if a truncated tar archive is detected 800 */ 801 private void skipRecordPadding() throws IOException { 802 if (!isDirectory() && this.entrySize > 0 && this.entrySize % getRecordSize() != 0) { 803 final long available = in.available(); 804 final long numRecords = this.entrySize / getRecordSize() + 1; 805 final long padding = numRecords * getRecordSize() - this.entrySize; 806 long skipped = IOUtils.skip(in, padding); 807 skipped = getActuallySkipped(available, skipped, padding); 808 count(skipped); 809 } 810 } 811 812 /** 813 * Skip n bytes from current input stream, if the current input stream doesn't have enough data to skip, jump to the next input stream and skip the rest 814 * bytes, keep doing this until total n bytes are skipped or the input streams are all skipped 815 * 816 * @param n bytes of data to skip 817 * @return actual bytes of data skipped 818 * @throws IOException if an I/O error occurs. 819 */ 820 private long skipSparse(final long n) throws IOException { 821 if (sparseInputStreams == null || sparseInputStreams.isEmpty()) { 822 return in.skip(n); 823 } 824 long bytesSkipped = 0; 825 while (bytesSkipped < n && currentSparseInputStreamIndex < sparseInputStreams.size()) { 826 final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex); 827 bytesSkipped += currentInputStream.skip(n - bytesSkipped); 828 if (bytesSkipped < n) { 829 currentSparseInputStreamIndex++; 830 } 831 } 832 return bytesSkipped; 833 } 834 835 /** 836 * Tries to read the next record rewinding the stream if it is not an EOF record. 837 * <p> 838 * This is meant to protect against cases where a tar implementation has written only one EOF record when two are expected. Actually this won't help since a 839 * non-conforming implementation likely won't fill full blocks consisting of - by default - ten records either so we probably have already read beyond the 840 * archive anyway. 841 * </p> 842 */ 843 private void tryToConsumeSecondEOFRecord() throws IOException { 844 boolean shouldReset = true; 845 final boolean marked = in.markSupported(); 846 if (marked) { 847 in.mark(getRecordSize()); 848 } 849 try { 850 shouldReset = !isEOFRecord(readRecord()); 851 } finally { 852 if (shouldReset && marked) { 853 pushedBackBytes(getRecordSize()); 854 in.reset(); 855 } 856 } 857 } 858}