001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.tar; 020 021import java.io.ByteArrayOutputStream; 022import java.io.Closeable; 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStream; 026import java.nio.ByteBuffer; 027import java.nio.channels.SeekableByteChannel; 028import java.nio.file.Files; 029import java.nio.file.Path; 030import java.util.ArrayList; 031import java.util.Arrays; 032import java.util.HashMap; 033import java.util.LinkedList; 034import java.util.List; 035import java.util.Map; 036 037import org.apache.commons.compress.archivers.zip.ZipEncoding; 038import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 039import org.apache.commons.compress.utils.ArchiveUtils; 040import org.apache.commons.compress.utils.BoundedArchiveInputStream; 041import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; 042import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; 043import org.apache.commons.io.input.BoundedInputStream; 044 045/** 046 * Provides random access to Unix archives. 047 * 048 * @since 1.21 049 */ 050public class TarFile implements Closeable { 051 052 private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream { 053 054 private final SeekableByteChannel channel; 055 056 private final TarArchiveEntry entry; 057 058 private long entryOffset; 059 060 private int currentSparseInputStreamIndex; 061 062 BoundedTarEntryInputStream(final TarArchiveEntry entry, final SeekableByteChannel channel) throws IOException { 063 super(entry.getDataOffset(), entry.getRealSize()); 064 if (channel.size() - entry.getSize() < entry.getDataOffset()) { 065 throw new IOException("entry size exceeds archive size"); 066 } 067 this.entry = entry; 068 this.channel = channel; 069 } 070 071 @Override 072 protected int read(final long pos, final ByteBuffer buf) throws IOException { 073 if (entryOffset >= entry.getRealSize()) { 074 return -1; 075 } 076 077 final int totalRead; 078 if (entry.isSparse()) { 079 totalRead = readSparse(entryOffset, buf, buf.limit()); 080 } else { 081 totalRead = readArchive(pos, buf); 082 } 083 084 if (totalRead == -1) { 085 if (buf.array().length > 0) { 086 throw new IOException("Truncated TAR archive"); 087 } 088 setAtEOF(true); 089 } else { 090 entryOffset += totalRead; 091 buf.flip(); 092 } 093 return totalRead; 094 } 095 096 private int readArchive(final long pos, final ByteBuffer buf) throws IOException { 097 channel.position(pos); 098 return channel.read(buf); 099 } 100 101 private int readSparse(final long pos, final ByteBuffer buf, final int numToRead) throws IOException { 102 // if there are no actual input streams, just read from the original archive 103 final List<InputStream> entrySparseInputStreams = sparseInputStreams.get(entry.getName()); 104 if (entrySparseInputStreams == null || entrySparseInputStreams.isEmpty()) { 105 return readArchive(entry.getDataOffset() + pos, buf); 106 } 107 108 if (currentSparseInputStreamIndex >= entrySparseInputStreams.size()) { 109 return -1; 110 } 111 112 final InputStream currentInputStream = entrySparseInputStreams.get(currentSparseInputStreamIndex); 113 final byte[] bufArray = new byte[numToRead]; 114 final int readLen = currentInputStream.read(bufArray); 115 if (readLen != -1) { 116 buf.put(bufArray, 0, readLen); 117 } 118 119 // if the current input stream is the last input stream, 120 // just return the number of bytes read from current input stream 121 if (currentSparseInputStreamIndex == entrySparseInputStreams.size() - 1) { 122 return readLen; 123 } 124 125 // if EOF of current input stream is meet, open a new input stream and recursively call read 126 if (readLen == -1) { 127 currentSparseInputStreamIndex++; 128 return readSparse(pos, buf, numToRead); 129 } 130 131 // if the rest data of current input stream is not long enough, open a new input stream 132 // and recursively call read 133 if (readLen < numToRead) { 134 currentSparseInputStreamIndex++; 135 final int readLenOfNext = readSparse(pos + readLen, buf, numToRead - readLen); 136 if (readLenOfNext == -1) { 137 return readLen; 138 } 139 140 return readLen + readLenOfNext; 141 } 142 143 // if the rest data of current input stream is enough(which means readLen == len), just return readLen 144 return readLen; 145 } 146 } 147 148 private static final int SMALL_BUFFER_SIZE = 256; 149 150 private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE]; 151 152 private final SeekableByteChannel archive; 153 154 /** 155 * The encoding of the tar file 156 */ 157 private final ZipEncoding zipEncoding; 158 159 private final LinkedList<TarArchiveEntry> entries = new LinkedList<>(); 160 161 private final int blockSize; 162 163 private final boolean lenient; 164 165 private final int recordSize; 166 167 private final ByteBuffer recordBuffer; 168 169 // the global sparse headers, this is only used in PAX Format 0.X 170 private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>(); 171 172 private boolean eof; 173 174 /** 175 * The meta-data about the current entry 176 */ 177 private TarArchiveEntry currEntry; 178 179 // the global PAX header 180 private Map<String, String> globalPaxHeaders = new HashMap<>(); 181 182 private final Map<String, List<InputStream>> sparseInputStreams = new HashMap<>(); 183 184 /** 185 * Constructor for TarFile. 186 * 187 * @param content the content to use 188 * @throws IOException when reading the tar archive fails 189 */ 190 public TarFile(final byte[] content) throws IOException { 191 this(new SeekableInMemoryByteChannel(content)); 192 } 193 194 /** 195 * Constructor for TarFile. 196 * 197 * @param content the content to use 198 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to 199 * {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead. 200 * @throws IOException when reading the tar archive fails 201 */ 202 public TarFile(final byte[] content, final boolean lenient) throws IOException { 203 this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient); 204 } 205 206 /** 207 * Constructor for TarFile. 208 * 209 * @param content the content to use 210 * @param encoding the encoding to use 211 * @throws IOException when reading the tar archive fails 212 */ 213 public TarFile(final byte[] content, final String encoding) throws IOException { 214 this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false); 215 } 216 217 /** 218 * Constructor for TarFile. 219 * 220 * @param archive the file of the archive to use 221 * @throws IOException when reading the tar archive fails 222 */ 223 public TarFile(final File archive) throws IOException { 224 this(archive.toPath()); 225 } 226 227 /** 228 * Constructor for TarFile. 229 * 230 * @param archive the file of the archive to use 231 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to 232 * {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead. 233 * @throws IOException when reading the tar archive fails 234 */ 235 public TarFile(final File archive, final boolean lenient) throws IOException { 236 this(archive.toPath(), lenient); 237 } 238 239 /** 240 * Constructor for TarFile. 241 * 242 * @param archive the file of the archive to use 243 * @param encoding the encoding to use 244 * @throws IOException when reading the tar archive fails 245 */ 246 public TarFile(final File archive, final String encoding) throws IOException { 247 this(archive.toPath(), encoding); 248 } 249 250 /** 251 * Constructor for TarFile. 252 * 253 * @param archivePath the path of the archive to use 254 * @throws IOException when reading the tar archive fails 255 */ 256 public TarFile(final Path archivePath) throws IOException { 257 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false); 258 } 259 260 /** 261 * Constructor for TarFile. 262 * 263 * @param archivePath the path of the archive to use 264 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to 265 * {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead. 266 * @throws IOException when reading the tar archive fails 267 */ 268 public TarFile(final Path archivePath, final boolean lenient) throws IOException { 269 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient); 270 } 271 272 /** 273 * Constructor for TarFile. 274 * 275 * @param archivePath the path of the archive to use 276 * @param encoding the encoding to use 277 * @throws IOException when reading the tar archive fails 278 */ 279 public TarFile(final Path archivePath, final String encoding) throws IOException { 280 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false); 281 } 282 283 /** 284 * Constructor for TarFile. 285 * 286 * @param content the content to use 287 * @throws IOException when reading the tar archive fails 288 */ 289 public TarFile(final SeekableByteChannel content) throws IOException { 290 this(content, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false); 291 } 292 293 /** 294 * Constructor for TarFile. 295 * 296 * @param archive the seekable byte channel to use 297 * @param blockSize the blocks size to use 298 * @param recordSize the record size to use 299 * @param encoding the encoding to use 300 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to 301 * {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead. 302 * @throws IOException when reading the tar archive fails 303 */ 304 public TarFile(final SeekableByteChannel archive, final int blockSize, final int recordSize, final String encoding, final boolean lenient) 305 throws IOException { 306 this.archive = archive; 307 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 308 this.recordSize = recordSize; 309 this.recordBuffer = ByteBuffer.allocate(this.recordSize); 310 this.blockSize = blockSize; 311 this.lenient = lenient; 312 313 TarArchiveEntry entry; 314 while ((entry = getNextTarEntry()) != null) { 315 entries.add(entry); 316 } 317 } 318 319 /** 320 * Update the current entry with the read pax headers 321 * 322 * @param headers Headers read from the pax header 323 * @param sparseHeaders Sparse headers read from pax header 324 */ 325 private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) throws IOException { 326 currEntry.updateEntryFromPaxHeaders(headers); 327 currEntry.setSparseHeaders(sparseHeaders); 328 } 329 330 /** 331 * Build the input streams consisting of all-zero input streams and non-zero input streams. When reading from the non-zero input streams, the data is 332 * actually read from the original input stream. The size of each input stream is introduced by the sparse headers. 333 * 334 * @implNote Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the 0 size input streams because they are 335 * meaningless. 336 */ 337 private void buildSparseInputStreams() throws IOException { 338 final List<InputStream> streams = new ArrayList<>(); 339 final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders(); 340 // Stream doesn't need to be closed at all as it doesn't use any resources 341 final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); // NOSONAR 342 // logical offset into the extracted entry 343 long offset = 0; 344 long numberOfZeroBytesInSparseEntry = 0; 345 for (final TarArchiveStructSparse sparseHeader : sparseHeaders) { 346 final long zeroBlockSize = sparseHeader.getOffset() - offset; 347 if (zeroBlockSize < 0) { 348 // sparse header says to move backwards inside the extracted entry 349 throw new IOException("Corrupted struct sparse detected"); 350 } 351 // only store the zero block if it is not empty 352 if (zeroBlockSize > 0) { 353 streams.add(BoundedInputStream.builder().setInputStream(zeroInputStream).setMaxCount(zeroBlockSize).get()); 354 numberOfZeroBytesInSparseEntry += zeroBlockSize; 355 } 356 // only store the input streams with non-zero size 357 if (sparseHeader.getNumbytes() > 0) { 358 final long start = currEntry.getDataOffset() + sparseHeader.getOffset() - numberOfZeroBytesInSparseEntry; 359 if (start + sparseHeader.getNumbytes() < start) { 360 // possible integer overflow 361 throw new IOException("Unreadable TAR archive, sparse block offset or length too big"); 362 } 363 streams.add(new BoundedSeekableByteChannelInputStream(start, sparseHeader.getNumbytes(), archive)); 364 } 365 offset = sparseHeader.getOffset() + sparseHeader.getNumbytes(); 366 } 367 sparseInputStreams.put(currEntry.getName(), streams); 368 } 369 370 @Override 371 public void close() throws IOException { 372 archive.close(); 373 } 374 375 /** 376 * This method is invoked once the end of the archive is hit, it tries to consume the remaining bytes under the assumption that the tool creating this 377 * archive has padded the last block. 378 */ 379 private void consumeRemainderOfLastBlock() throws IOException { 380 final long bytesReadOfLastBlock = archive.position() % blockSize; 381 if (bytesReadOfLastBlock > 0) { 382 repositionForwardBy(blockSize - bytesReadOfLastBlock); 383 } 384 } 385 386 /** 387 * Gets all TAR Archive Entries from the TarFile 388 * 389 * @return All entries from the tar file 390 */ 391 public List<TarArchiveEntry> getEntries() { 392 return new ArrayList<>(entries); 393 } 394 395 /** 396 * Gets the input stream for the provided Tar Archive Entry. 397 * 398 * @param entry Entry to get the input stream from 399 * @return Input stream of the provided entry 400 * @throws IOException Corrupted TAR archive. Can't read entry. 401 */ 402 public InputStream getInputStream(final TarArchiveEntry entry) throws IOException { 403 try { 404 return new BoundedTarEntryInputStream(entry, archive); 405 } catch (final RuntimeException ex) { 406 throw new IOException("Corrupted TAR archive. Can't read entry", ex); 407 } 408 } 409 410 /** 411 * Gets the next entry in this tar archive as long name data. 412 * 413 * @return The next entry in the archive as long name data, or null. 414 * @throws IOException on error 415 */ 416 private byte[] getLongNameData() throws IOException { 417 final ByteArrayOutputStream longName = new ByteArrayOutputStream(); 418 int length; 419 try (InputStream in = getInputStream(currEntry)) { 420 while ((length = in.read(smallBuf)) >= 0) { 421 longName.write(smallBuf, 0, length); 422 } 423 } 424 getNextTarEntry(); 425 if (currEntry == null) { 426 // Bugzilla: 40334 427 // Malformed tar file - long entry name not followed by entry 428 return null; 429 } 430 byte[] longNameData = longName.toByteArray(); 431 // remove trailing null terminator(s) 432 length = longNameData.length; 433 while (length > 0 && longNameData[length - 1] == 0) { 434 --length; 435 } 436 if (length != longNameData.length) { 437 longNameData = Arrays.copyOf(longNameData, length); 438 } 439 return longNameData; 440 } 441 442 /** 443 * Gets the next entry in this tar archive. This will skip to the end of the current entry, if there is one, and place the position of the channel at the 444 * header of the next entry, and read the header and instantiate a new TarEntry from the header bytes and return that entry. If there are no more entries in 445 * the archive, null will be returned to indicate that the end of the archive has been reached. 446 * 447 * @return The next TarEntry in the archive, or null if there is no next entry. 448 * @throws IOException when reading the next TarEntry fails 449 */ 450 private TarArchiveEntry getNextTarEntry() throws IOException { 451 if (isAtEOF()) { 452 return null; 453 } 454 455 if (currEntry != null) { 456 // Skip to the end of the entry 457 repositionForwardTo(currEntry.getDataOffset() + currEntry.getSize()); 458 throwExceptionIfPositionIsNotInArchive(); 459 skipRecordPadding(); 460 } 461 462 final ByteBuffer headerBuf = getRecord(); 463 if (null == headerBuf) { 464 // Hit EOF 465 currEntry = null; 466 return null; 467 } 468 469 try { 470 final long position = archive.position(); 471 currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf.array(), zipEncoding, lenient, position); 472 } catch (final IllegalArgumentException e) { 473 throw new IOException("Error detected parsing the header", e); 474 } 475 476 if (currEntry.isGNULongLinkEntry()) { 477 final byte[] longLinkData = getLongNameData(); 478 if (longLinkData == null) { 479 // Bugzilla: 40334 480 // Malformed tar file - long link entry name not followed by 481 // entry 482 return null; 483 } 484 currEntry.setLinkName(zipEncoding.decode(longLinkData)); 485 } 486 487 if (currEntry.isGNULongNameEntry()) { 488 final byte[] longNameData = getLongNameData(); 489 if (longNameData == null) { 490 // Bugzilla: 40334 491 // Malformed tar file - long entry name not followed by 492 // entry 493 return null; 494 } 495 496 // COMPRESS-509 : the name of directories should end with '/' 497 final String name = zipEncoding.decode(longNameData); 498 currEntry.setName(name); 499 if (currEntry.isDirectory() && !name.endsWith("/")) { 500 currEntry.setName(name + "/"); 501 } 502 } 503 504 if (currEntry.isGlobalPaxHeader()) { // Process Global Pax headers 505 readGlobalPaxHeaders(); 506 } 507 508 try { 509 if (currEntry.isPaxHeader()) { // Process Pax headers 510 paxHeaders(); 511 } else if (!globalPaxHeaders.isEmpty()) { 512 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders); 513 } 514 } catch (final NumberFormatException e) { 515 throw new IOException("Error detected parsing the pax header", e); 516 } 517 518 if (currEntry.isOldGNUSparse()) { // Process sparse files 519 readOldGNUSparse(); 520 } 521 522 return currEntry; 523 } 524 525 /** 526 * Gets the next record in this tar archive. This will skip over any remaining data in the current entry, if there is one, and place the input stream at the 527 * header of the next entry. 528 * 529 * <p> 530 * If there are no more entries in the archive, null will be returned to indicate that the end of the archive has been reached. At the same time the 531 * {@code hasHitEOF} marker will be set to true. 532 * </p> 533 * 534 * @return The next TarEntry in the archive, or null if there is no next entry. 535 * @throws IOException when reading the next TarEntry fails 536 */ 537 private ByteBuffer getRecord() throws IOException { 538 ByteBuffer headerBuf = readRecord(); 539 setAtEOF(isEOFRecord(headerBuf)); 540 if (isAtEOF() && headerBuf != null) { 541 // Consume rest 542 tryToConsumeSecondEOFRecord(); 543 consumeRemainderOfLastBlock(); 544 headerBuf = null; 545 } 546 return headerBuf; 547 } 548 549 /** 550 * Tests whether or not we are at the end-of-file. 551 * 552 * @return whether or not we are at the end-of-file. 553 */ 554 protected final boolean isAtEOF() { 555 return eof; 556 } 557 558 private boolean isDirectory() { 559 return currEntry != null && currEntry.isDirectory(); 560 } 561 562 private boolean isEOFRecord(final ByteBuffer headerBuf) { 563 return headerBuf == null || ArchiveUtils.isArrayZero(headerBuf.array(), recordSize); 564 } 565 566 /** 567 * <p> 568 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like: 569 * 570 * <pre> 571 * GNU.sparse.size=size 572 * GNU.sparse.numblocks=numblocks 573 * repeat numblocks times 574 * GNU.sparse.offset=offset 575 * GNU.sparse.numbytes=numbytes 576 * end repeat 577 * </pre> 578 * 579 * <p> 580 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 581 * 582 * <pre> 583 * GNU.sparse.map 584 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 585 * </pre> 586 * 587 * <p> 588 * For PAX Format 1.X: <br> 589 * The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers delimited by newlines. 590 * The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are map entries, each one 591 * consisting of two numbers giving the offset and size of the data block it describes. 592 * 593 * @throws IOException if an I/O error occurs. 594 */ 595 private void paxHeaders() throws IOException { 596 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 597 final Map<String, String> headers; 598 try (InputStream input = getInputStream(currEntry)) { 599 headers = TarUtils.parsePaxHeaders(input, sparseHeaders, globalPaxHeaders, currEntry.getSize()); 600 } 601 602 // for 0.1 PAX Headers 603 if (headers.containsKey(TarGnuSparseKeys.MAP)) { 604 sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get(TarGnuSparseKeys.MAP))); 605 } 606 getNextTarEntry(); // Get the actual file entry 607 if (currEntry == null) { 608 throw new IOException("premature end of tar archive. Didn't find any entry after PAX header."); 609 } 610 applyPaxHeadersToCurrentEntry(headers, sparseHeaders); 611 612 // for 1.0 PAX Format, the sparse map is stored in the file data block 613 if (currEntry.isPaxGNU1XSparse()) { 614 try (InputStream input = getInputStream(currEntry)) { 615 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(input, recordSize); 616 } 617 currEntry.setSparseHeaders(sparseHeaders); 618 // data of the entry is after the pax gnu entry. So we need to update the data position once again 619 currEntry.setDataOffset(currEntry.getDataOffset() + recordSize); 620 } 621 622 // sparse headers are all done reading, we need to build 623 // sparse input streams using these sparse headers 624 buildSparseInputStreams(); 625 } 626 627 private void readGlobalPaxHeaders() throws IOException { 628 try (InputStream input = getInputStream(currEntry)) { 629 globalPaxHeaders = TarUtils.parsePaxHeaders(input, globalSparseHeaders, globalPaxHeaders, currEntry.getSize()); 630 } 631 getNextTarEntry(); // Get the actual file entry 632 633 if (currEntry == null) { 634 throw new IOException("Error detected parsing the pax header"); 635 } 636 } 637 638 /** 639 * Adds the sparse chunks from the current entry to the sparse chunks, including any additional sparse entries following the current entry. 640 * 641 * @throws IOException when reading the sparse entry fails 642 */ 643 private void readOldGNUSparse() throws IOException { 644 if (currEntry.isExtended()) { 645 TarArchiveSparseEntry entry; 646 do { 647 final ByteBuffer headerBuf = getRecord(); 648 if (headerBuf == null) { 649 throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag."); 650 } 651 entry = new TarArchiveSparseEntry(headerBuf.array()); 652 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders()); 653 currEntry.setDataOffset(currEntry.getDataOffset() + recordSize); 654 } while (entry.isExtended()); 655 } 656 657 // sparse headers are all done reading, we need to build 658 // sparse input streams using these sparse headers 659 buildSparseInputStreams(); 660 } 661 662 /** 663 * Reads a record from the input stream and return the data. 664 * 665 * @return The record data or null if EOF has been hit. 666 * @throws IOException if reading from the archive fails 667 */ 668 private ByteBuffer readRecord() throws IOException { 669 recordBuffer.rewind(); 670 final int readNow = archive.read(recordBuffer); 671 if (readNow != recordSize) { 672 return null; 673 } 674 return recordBuffer; 675 } 676 677 private void repositionForwardBy(final long offset) throws IOException { 678 repositionForwardTo(archive.position() + offset); 679 } 680 681 private void repositionForwardTo(final long newPosition) throws IOException { 682 final long currPosition = archive.position(); 683 if (newPosition < currPosition) { 684 throw new IOException("trying to move backwards inside of the archive"); 685 } 686 archive.position(newPosition); 687 } 688 689 /** 690 * Sets whether we are at end-of-file. 691 * 692 * @param eof whether we are at end-of-file. 693 */ 694 protected final void setAtEOF(final boolean eof) { 695 this.eof = eof; 696 } 697 698 /** 699 * The last record block should be written at the full size, so skip any additional space used to fill a record after an entry 700 * 701 * @throws IOException when skipping the padding of the record fails 702 */ 703 private void skipRecordPadding() throws IOException { 704 if (!isDirectory() && currEntry.getSize() > 0 && currEntry.getSize() % recordSize != 0) { 705 final long numRecords = currEntry.getSize() / recordSize + 1; 706 final long padding = numRecords * recordSize - currEntry.getSize(); 707 repositionForwardBy(padding); 708 throwExceptionIfPositionIsNotInArchive(); 709 } 710 } 711 712 /** 713 * Checks if the current position of the SeekableByteChannel is in the archive. 714 * 715 * @throws IOException If the position is not in the archive 716 */ 717 private void throwExceptionIfPositionIsNotInArchive() throws IOException { 718 if (archive.size() < archive.position()) { 719 throw new IOException("Truncated TAR archive"); 720 } 721 } 722 723 /** 724 * Tries to read the next record resetting the position in the archive if it is not an EOF record. 725 * 726 * <p> 727 * This is meant to protect against cases where a tar implementation has written only one EOF record when two are expected. Actually this won't help since a 728 * non-conforming implementation likely won't fill full blocks consisting of - by default - ten records either so we probably have already read beyond the 729 * archive anyway. 730 * </p> 731 * 732 * @throws IOException if reading the record of resetting the position in the archive fails 733 */ 734 private void tryToConsumeSecondEOFRecord() throws IOException { 735 boolean shouldReset = true; 736 try { 737 shouldReset = !isEOFRecord(readRecord()); 738 } finally { 739 if (shouldReset) { 740 archive.position(archive.position() - recordSize); 741 } 742 } 743 } 744}