TarFile.java

  1. /*
  2.  *  Licensed to the Apache Software Foundation (ASF) under one or more
  3.  *  contributor license agreements.  See the NOTICE file distributed with
  4.  *  this work for additional information regarding copyright ownership.
  5.  *  The ASF licenses this file to You under the Apache License, Version 2.0
  6.  *  (the "License"); you may not use this file except in compliance with
  7.  *  the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  *  Unless required by applicable law or agreed to in writing, software
  12.  *  distributed under the License is distributed on an "AS IS" BASIS,
  13.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  *  See the License for the specific language governing permissions and
  15.  *  limitations under the License.
  16.  */
  17. package org.apache.commons.compress.archivers.tar;

  18. import java.io.ByteArrayOutputStream;
  19. import java.io.Closeable;
  20. import java.io.File;
  21. import java.io.IOException;
  22. import java.io.InputStream;
  23. import java.nio.ByteBuffer;
  24. import java.nio.channels.SeekableByteChannel;
  25. import java.nio.file.Files;
  26. import java.nio.file.Path;
  27. import java.util.ArrayList;
  28. import java.util.Arrays;
  29. import java.util.HashMap;
  30. import java.util.LinkedList;
  31. import java.util.List;
  32. import java.util.Map;

  33. import org.apache.commons.compress.archivers.zip.ZipEncoding;
  34. import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
  35. import org.apache.commons.compress.utils.ArchiveUtils;
  36. import org.apache.commons.compress.utils.BoundedArchiveInputStream;
  37. import org.apache.commons.compress.utils.BoundedInputStream;
  38. import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
  39. import org.apache.commons.compress.utils.SeekableInMemoryByteChannel;

  40. /**
  41.  * Provides random access to UNIX archives.
  42.  *
  43.  * @since 1.21
  44.  */
  45. public class TarFile implements Closeable {

  46.     private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream {

  47.         private final SeekableByteChannel channel;

  48.         private final TarArchiveEntry entry;

  49.         private long entryOffset;

  50.         private int currentSparseInputStreamIndex;

  51.         BoundedTarEntryInputStream(final TarArchiveEntry entry, final SeekableByteChannel channel) throws IOException {
  52.             super(entry.getDataOffset(), entry.getRealSize());
  53.             if (channel.size() - entry.getSize() < entry.getDataOffset()) {
  54.                 throw new IOException("entry size exceeds archive size");
  55.             }
  56.             this.entry = entry;
  57.             this.channel = channel;
  58.         }

  59.         @Override
  60.         protected int read(final long pos, final ByteBuffer buf) throws IOException {
  61.             if (entryOffset >= entry.getRealSize()) {
  62.                 return -1;
  63.             }

  64.             final int totalRead;
  65.             if (entry.isSparse()) {
  66.                 totalRead = readSparse(entryOffset, buf, buf.limit());
  67.             } else {
  68.                 totalRead = readArchive(pos, buf);
  69.             }

  70.             if (totalRead == -1) {
  71.                 if (buf.array().length > 0) {
  72.                     throw new IOException("Truncated TAR archive");
  73.                 }
  74.                 setAtEOF(true);
  75.             } else {
  76.                 entryOffset += totalRead;
  77.                 buf.flip();
  78.             }
  79.             return totalRead;
  80.         }

  81.         private int readArchive(final long pos, final ByteBuffer buf) throws IOException {
  82.             channel.position(pos);
  83.             return channel.read(buf);
  84.         }

  85.         private int readSparse(final long pos, final ByteBuffer buf, final int numToRead) throws IOException {
  86.             // if there are no actual input streams, just read from the original archive
  87.             final List<InputStream> entrySparseInputStreams = sparseInputStreams.get(entry.getName());
  88.             if (entrySparseInputStreams == null || entrySparseInputStreams.isEmpty()) {
  89.                 return readArchive(entry.getDataOffset() + pos, buf);
  90.             }

  91.             if (currentSparseInputStreamIndex >= entrySparseInputStreams.size()) {
  92.                 return -1;
  93.             }

  94.             final InputStream currentInputStream = entrySparseInputStreams.get(currentSparseInputStreamIndex);
  95.             final byte[] bufArray = new byte[numToRead];
  96.             final int readLen = currentInputStream.read(bufArray);
  97.             if (readLen != -1) {
  98.                 buf.put(bufArray, 0, readLen);
  99.             }

  100.             // if the current input stream is the last input stream,
  101.             // just return the number of bytes read from current input stream
  102.             if (currentSparseInputStreamIndex == entrySparseInputStreams.size() - 1) {
  103.                 return readLen;
  104.             }

  105.             // if EOF of current input stream is meet, open a new input stream and recursively call read
  106.             if (readLen == -1) {
  107.                 currentSparseInputStreamIndex++;
  108.                 return readSparse(pos, buf, numToRead);
  109.             }

  110.             // if the rest data of current input stream is not long enough, open a new input stream
  111.             // and recursively call read
  112.             if (readLen < numToRead) {
  113.                 currentSparseInputStreamIndex++;
  114.                 final int readLenOfNext = readSparse(pos + readLen, buf, numToRead - readLen);
  115.                 if (readLenOfNext == -1) {
  116.                     return readLen;
  117.                 }

  118.                 return readLen + readLenOfNext;
  119.             }

  120.             // if the rest data of current input stream is enough(which means readLen == len), just return readLen
  121.             return readLen;
  122.         }
  123.     }

  124.     private static final int SMALL_BUFFER_SIZE = 256;

  125.     private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE];

  126.     private final SeekableByteChannel archive;

  127.     /**
  128.      * The encoding of the tar file
  129.      */
  130.     private final ZipEncoding zipEncoding;

  131.     private final LinkedList<TarArchiveEntry> entries = new LinkedList<>();

  132.     private final int blockSize;

  133.     private final boolean lenient;

  134.     private final int recordSize;

  135.     private final ByteBuffer recordBuffer;

  136.     // the global sparse headers, this is only used in PAX Format 0.X
  137.     private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>();

  138.     private boolean hasHitEOF;

  139.     /**
  140.      * The meta-data about the current entry
  141.      */
  142.     private TarArchiveEntry currEntry;

  143.     // the global PAX header
  144.     private Map<String, String> globalPaxHeaders = new HashMap<>();

  145.     private final Map<String, List<InputStream>> sparseInputStreams = new HashMap<>();

  146.     /**
  147.      * Constructor for TarFile.
  148.      *
  149.      * @param content the content to use
  150.      * @throws IOException when reading the tar archive fails
  151.      */
  152.     public TarFile(final byte[] content) throws IOException {
  153.         this(new SeekableInMemoryByteChannel(content));
  154.     }

  155.     /**
  156.      * Constructor for TarFile.
  157.      *
  158.      * @param content the content to use
  159.      * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to
  160.      *                {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead.
  161.      * @throws IOException when reading the tar archive fails
  162.      */
  163.     public TarFile(final byte[] content, final boolean lenient) throws IOException {
  164.         this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
  165.     }

  166.     /**
  167.      * Constructor for TarFile.
  168.      *
  169.      * @param content  the content to use
  170.      * @param encoding the encoding to use
  171.      * @throws IOException when reading the tar archive fails
  172.      */
  173.     public TarFile(final byte[] content, final String encoding) throws IOException {
  174.         this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false);
  175.     }

  176.     /**
  177.      * Constructor for TarFile.
  178.      *
  179.      * @param archive the file of the archive to use
  180.      * @throws IOException when reading the tar archive fails
  181.      */
  182.     public TarFile(final File archive) throws IOException {
  183.         this(archive.toPath());
  184.     }

  185.     /**
  186.      * Constructor for TarFile.
  187.      *
  188.      * @param archive the file of the archive to use
  189.      * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to
  190.      *                {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead.
  191.      * @throws IOException when reading the tar archive fails
  192.      */
  193.     public TarFile(final File archive, final boolean lenient) throws IOException {
  194.         this(archive.toPath(), lenient);
  195.     }

  196.     /**
  197.      * Constructor for TarFile.
  198.      *
  199.      * @param archive  the file of the archive to use
  200.      * @param encoding the encoding to use
  201.      * @throws IOException when reading the tar archive fails
  202.      */
  203.     public TarFile(final File archive, final String encoding) throws IOException {
  204.         this(archive.toPath(), encoding);
  205.     }

  206.     /**
  207.      * Constructor for TarFile.
  208.      *
  209.      * @param archivePath the path of the archive to use
  210.      * @throws IOException when reading the tar archive fails
  211.      */
  212.     public TarFile(final Path archivePath) throws IOException {
  213.         this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false);
  214.     }

  215.     /**
  216.      * Constructor for TarFile.
  217.      *
  218.      * @param archivePath the path of the archive to use
  219.      * @param lenient     when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to
  220.      *                    {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead.
  221.      * @throws IOException when reading the tar archive fails
  222.      */
  223.     public TarFile(final Path archivePath, final boolean lenient) throws IOException {
  224.         this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
  225.     }

  226.     /**
  227.      * Constructor for TarFile.
  228.      *
  229.      * @param archivePath the path of the archive to use
  230.      * @param encoding    the encoding to use
  231.      * @throws IOException when reading the tar archive fails
  232.      */
  233.     public TarFile(final Path archivePath, final String encoding) throws IOException {
  234.         this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false);
  235.     }

  236.     /**
  237.      * Constructor for TarFile.
  238.      *
  239.      * @param content the content to use
  240.      * @throws IOException when reading the tar archive fails
  241.      */
  242.     public TarFile(final SeekableByteChannel content) throws IOException {
  243.         this(content, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false);
  244.     }

  245.     /**
  246.      * Constructor for TarFile.
  247.      *
  248.      * @param archive    the seekable byte channel to use
  249.      * @param blockSize  the blocks size to use
  250.      * @param recordSize the record size to use
  251.      * @param encoding   the encoding to use
  252.      * @param lenient    when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to
  253.      *                   {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead.
  254.      * @throws IOException when reading the tar archive fails
  255.      */
  256.     public TarFile(final SeekableByteChannel archive, final int blockSize, final int recordSize, final String encoding, final boolean lenient)
  257.             throws IOException {
  258.         this.archive = archive;
  259.         this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
  260.         this.recordSize = recordSize;
  261.         this.recordBuffer = ByteBuffer.allocate(this.recordSize);
  262.         this.blockSize = blockSize;
  263.         this.lenient = lenient;

  264.         TarArchiveEntry entry;
  265.         while ((entry = getNextTarEntry()) != null) {
  266.             entries.add(entry);
  267.         }
  268.     }

  269.     /**
  270.      * Update the current entry with the read pax headers
  271.      *
  272.      * @param headers       Headers read from the pax header
  273.      * @param sparseHeaders Sparse headers read from pax header
  274.      */
  275.     private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) throws IOException {
  276.         currEntry.updateEntryFromPaxHeaders(headers);
  277.         currEntry.setSparseHeaders(sparseHeaders);
  278.     }

  279.     /**
  280.      * Build the input streams consisting of all-zero input streams and non-zero input streams. When reading from the non-zero input streams, the data is
  281.      * actually read from the original input stream. The size of each input stream is introduced by the sparse headers.
  282.      *
  283.      * @implNote Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the 0 size input streams because they are
  284.      *           meaningless.
  285.      */
  286.     private void buildSparseInputStreams() throws IOException {
  287.         final List<InputStream> streams = new ArrayList<>();

  288.         final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders();

  289.         // Stream doesn't need to be closed at all as it doesn't use any resources
  290.         final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); // NOSONAR
  291.         // logical offset into the extracted entry
  292.         long offset = 0;
  293.         long numberOfZeroBytesInSparseEntry = 0;
  294.         for (final TarArchiveStructSparse sparseHeader : sparseHeaders) {
  295.             final long zeroBlockSize = sparseHeader.getOffset() - offset;
  296.             if (zeroBlockSize < 0) {
  297.                 // sparse header says to move backwards inside the extracted entry
  298.                 throw new IOException("Corrupted struct sparse detected");
  299.             }

  300.             // only store the zero block if it is not empty
  301.             if (zeroBlockSize > 0) {
  302.                 streams.add(new BoundedInputStream(zeroInputStream, zeroBlockSize));
  303.                 numberOfZeroBytesInSparseEntry += zeroBlockSize;
  304.             }

  305.             // only store the input streams with non-zero size
  306.             if (sparseHeader.getNumbytes() > 0) {
  307.                 final long start = currEntry.getDataOffset() + sparseHeader.getOffset() - numberOfZeroBytesInSparseEntry;
  308.                 if (start + sparseHeader.getNumbytes() < start) {
  309.                     // possible integer overflow
  310.                     throw new IOException("Unreadable TAR archive, sparse block offset or length too big");
  311.                 }
  312.                 streams.add(new BoundedSeekableByteChannelInputStream(start, sparseHeader.getNumbytes(), archive));
  313.             }

  314.             offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
  315.         }

  316.         sparseInputStreams.put(currEntry.getName(), streams);
  317.     }

  318.     @Override
  319.     public void close() throws IOException {
  320.         archive.close();
  321.     }

  322.     /**
  323.      * This method is invoked once the end of the archive is hit, it tries to consume the remaining bytes under the assumption that the tool creating this
  324.      * archive has padded the last block.
  325.      */
  326.     private void consumeRemainderOfLastBlock() throws IOException {
  327.         final long bytesReadOfLastBlock = archive.position() % blockSize;
  328.         if (bytesReadOfLastBlock > 0) {
  329.             repositionForwardBy(blockSize - bytesReadOfLastBlock);
  330.         }
  331.     }

  332.     /**
  333.      * Gets all TAR Archive Entries from the TarFile
  334.      *
  335.      * @return All entries from the tar file
  336.      */
  337.     public List<TarArchiveEntry> getEntries() {
  338.         return new ArrayList<>(entries);
  339.     }

  340.     /**
  341.      * Gets the input stream for the provided Tar Archive Entry.
  342.      *
  343.      * @param entry Entry to get the input stream from
  344.      * @return Input stream of the provided entry
  345.      * @throws IOException Corrupted TAR archive. Can't read entry.
  346.      */
  347.     public InputStream getInputStream(final TarArchiveEntry entry) throws IOException {
  348.         try {
  349.             return new BoundedTarEntryInputStream(entry, archive);
  350.         } catch (final RuntimeException ex) {
  351.             throw new IOException("Corrupted TAR archive. Can't read entry", ex);
  352.         }
  353.     }

  354.     /**
  355.      * Gets the next entry in this tar archive as long name data.
  356.      *
  357.      * @return The next entry in the archive as long name data, or null.
  358.      * @throws IOException on error
  359.      */
  360.     private byte[] getLongNameData() throws IOException {
  361.         final ByteArrayOutputStream longName = new ByteArrayOutputStream();
  362.         int length;
  363.         try (InputStream in = getInputStream(currEntry)) {
  364.             while ((length = in.read(smallBuf)) >= 0) {
  365.                 longName.write(smallBuf, 0, length);
  366.             }
  367.         }
  368.         getNextTarEntry();
  369.         if (currEntry == null) {
  370.             // Bugzilla: 40334
  371.             // Malformed tar file - long entry name not followed by entry
  372.             return null;
  373.         }
  374.         byte[] longNameData = longName.toByteArray();
  375.         // remove trailing null terminator(s)
  376.         length = longNameData.length;
  377.         while (length > 0 && longNameData[length - 1] == 0) {
  378.             --length;
  379.         }
  380.         if (length != longNameData.length) {
  381.             longNameData = Arrays.copyOf(longNameData, length);
  382.         }
  383.         return longNameData;
  384.     }

  385.     /**
  386.      * Gets the next entry in this tar archive. This will skip to the end of the current entry, if there is one, and place the position of the channel at the
  387.      * header of the next entry, and read the header and instantiate a new TarEntry from the header bytes and return that entry. If there are no more entries in
  388.      * the archive, null will be returned to indicate that the end of the archive has been reached.
  389.      *
  390.      * @return The next TarEntry in the archive, or null if there is no next entry.
  391.      * @throws IOException when reading the next TarEntry fails
  392.      */
  393.     private TarArchiveEntry getNextTarEntry() throws IOException {
  394.         if (isAtEOF()) {
  395.             return null;
  396.         }

  397.         if (currEntry != null) {
  398.             // Skip to the end of the entry
  399.             repositionForwardTo(currEntry.getDataOffset() + currEntry.getSize());
  400.             throwExceptionIfPositionIsNotInArchive();
  401.             skipRecordPadding();
  402.         }

  403.         final ByteBuffer headerBuf = getRecord();
  404.         if (null == headerBuf) {
  405.             /* hit EOF */
  406.             currEntry = null;
  407.             return null;
  408.         }

  409.         try {
  410.             final long position = archive.position();
  411.             currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf.array(), zipEncoding, lenient, position);
  412.         } catch (final IllegalArgumentException e) {
  413.             throw new IOException("Error detected parsing the header", e);
  414.         }

  415.         if (currEntry.isGNULongLinkEntry()) {
  416.             final byte[] longLinkData = getLongNameData();
  417.             if (longLinkData == null) {
  418.                 // Bugzilla: 40334
  419.                 // Malformed tar file - long link entry name not followed by
  420.                 // entry
  421.                 return null;
  422.             }
  423.             currEntry.setLinkName(zipEncoding.decode(longLinkData));
  424.         }

  425.         if (currEntry.isGNULongNameEntry()) {
  426.             final byte[] longNameData = getLongNameData();
  427.             if (longNameData == null) {
  428.                 // Bugzilla: 40334
  429.                 // Malformed tar file - long entry name not followed by
  430.                 // entry
  431.                 return null;
  432.             }

  433.             // COMPRESS-509 : the name of directories should end with '/'
  434.             final String name = zipEncoding.decode(longNameData);
  435.             currEntry.setName(name);
  436.             if (currEntry.isDirectory() && !name.endsWith("/")) {
  437.                 currEntry.setName(name + "/");
  438.             }
  439.         }

  440.         if (currEntry.isGlobalPaxHeader()) { // Process Global Pax headers
  441.             readGlobalPaxHeaders();
  442.         }

  443.         try {
  444.             if (currEntry.isPaxHeader()) { // Process Pax headers
  445.                 paxHeaders();
  446.             } else if (!globalPaxHeaders.isEmpty()) {
  447.                 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders);
  448.             }
  449.         } catch (final NumberFormatException e) {
  450.             throw new IOException("Error detected parsing the pax header", e);
  451.         }

  452.         if (currEntry.isOldGNUSparse()) { // Process sparse files
  453.             readOldGNUSparse();
  454.         }

  455.         return currEntry;
  456.     }

  457.     /**
  458.      * Gets the next record in this tar archive. This will skip over any remaining data in the current entry, if there is one, and place the input stream at the
  459.      * header of the next entry.
  460.      *
  461.      * <p>
  462.      * If there are no more entries in the archive, null will be returned to indicate that the end of the archive has been reached. At the same time the
  463.      * {@code hasHitEOF} marker will be set to true.
  464.      * </p>
  465.      *
  466.      * @return The next TarEntry in the archive, or null if there is no next entry.
  467.      * @throws IOException when reading the next TarEntry fails
  468.      */
  469.     private ByteBuffer getRecord() throws IOException {
  470.         ByteBuffer headerBuf = readRecord();
  471.         setAtEOF(isEOFRecord(headerBuf));
  472.         if (isAtEOF() && headerBuf != null) {
  473.             // Consume rest
  474.             tryToConsumeSecondEOFRecord();
  475.             consumeRemainderOfLastBlock();
  476.             headerBuf = null;
  477.         }
  478.         return headerBuf;
  479.     }

  480.     protected final boolean isAtEOF() {
  481.         return hasHitEOF;
  482.     }

  483.     private boolean isDirectory() {
  484.         return currEntry != null && currEntry.isDirectory();
  485.     }

  486.     private boolean isEOFRecord(final ByteBuffer headerBuf) {
  487.         return headerBuf == null || ArchiveUtils.isArrayZero(headerBuf.array(), recordSize);
  488.     }

  489.     /**
  490.      * <p>
  491.      * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
  492.      *
  493.      * <pre>
  494.      * GNU.sparse.size=size
  495.      * GNU.sparse.numblocks=numblocks
  496.      * repeat numblocks times
  497.      *   GNU.sparse.offset=offset
  498.      *   GNU.sparse.numbytes=numbytes
  499.      * end repeat
  500.      * </pre>
  501.      *
  502.      * <p>
  503.      * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
  504.      *
  505.      * <pre>
  506.      * GNU.sparse.map
  507.      *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
  508.      * </pre>
  509.      *
  510.      * <p>
  511.      * For PAX Format 1.X: <br>
  512.      * The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers delimited by newlines.
  513.      * The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are map entries, each one
  514.      * consisting of two numbers giving the offset and size of the data block it describes.
  515.      *
  516.      * @throws IOException
  517.      */
  518.     private void paxHeaders() throws IOException {
  519.         List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
  520.         final Map<String, String> headers;
  521.         try (InputStream input = getInputStream(currEntry)) {
  522.             headers = TarUtils.parsePaxHeaders(input, sparseHeaders, globalPaxHeaders, currEntry.getSize());
  523.         }

  524.         // for 0.1 PAX Headers
  525.         if (headers.containsKey(TarGnuSparseKeys.MAP)) {
  526.             sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get(TarGnuSparseKeys.MAP)));
  527.         }
  528.         getNextTarEntry(); // Get the actual file entry
  529.         if (currEntry == null) {
  530.             throw new IOException("premature end of tar archive. Didn't find any entry after PAX header.");
  531.         }
  532.         applyPaxHeadersToCurrentEntry(headers, sparseHeaders);

  533.         // for 1.0 PAX Format, the sparse map is stored in the file data block
  534.         if (currEntry.isPaxGNU1XSparse()) {
  535.             try (InputStream input = getInputStream(currEntry)) {
  536.                 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(input, recordSize);
  537.             }
  538.             currEntry.setSparseHeaders(sparseHeaders);
  539.             // data of the entry is after the pax gnu entry. So we need to update the data position once again
  540.             currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
  541.         }

  542.         // sparse headers are all done reading, we need to build
  543.         // sparse input streams using these sparse headers
  544.         buildSparseInputStreams();
  545.     }

  546.     private void readGlobalPaxHeaders() throws IOException {
  547.         try (InputStream input = getInputStream(currEntry)) {
  548.             globalPaxHeaders = TarUtils.parsePaxHeaders(input, globalSparseHeaders, globalPaxHeaders, currEntry.getSize());
  549.         }
  550.         getNextTarEntry(); // Get the actual file entry

  551.         if (currEntry == null) {
  552.             throw new IOException("Error detected parsing the pax header");
  553.         }
  554.     }

  555.     /**
  556.      * Adds the sparse chunks from the current entry to the sparse chunks, including any additional sparse entries following the current entry.
  557.      *
  558.      * @throws IOException when reading the sparse entry fails
  559.      */
  560.     private void readOldGNUSparse() throws IOException {
  561.         if (currEntry.isExtended()) {
  562.             TarArchiveSparseEntry entry;
  563.             do {
  564.                 final ByteBuffer headerBuf = getRecord();
  565.                 if (headerBuf == null) {
  566.                     throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag.");
  567.                 }
  568.                 entry = new TarArchiveSparseEntry(headerBuf.array());
  569.                 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
  570.                 currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
  571.             } while (entry.isExtended());
  572.         }

  573.         // sparse headers are all done reading, we need to build
  574.         // sparse input streams using these sparse headers
  575.         buildSparseInputStreams();
  576.     }

  577.     /**
  578.      * Read a record from the input stream and return the data.
  579.      *
  580.      * @return The record data or null if EOF has been hit.
  581.      * @throws IOException if reading from the archive fails
  582.      */
  583.     private ByteBuffer readRecord() throws IOException {
  584.         recordBuffer.rewind();
  585.         final int readNow = archive.read(recordBuffer);
  586.         if (readNow != recordSize) {
  587.             return null;
  588.         }
  589.         return recordBuffer;
  590.     }

  591.     private void repositionForwardBy(final long offset) throws IOException {
  592.         repositionForwardTo(archive.position() + offset);
  593.     }

  594.     private void repositionForwardTo(final long newPosition) throws IOException {
  595.         final long currPosition = archive.position();
  596.         if (newPosition < currPosition) {
  597.             throw new IOException("trying to move backwards inside of the archive");
  598.         }
  599.         archive.position(newPosition);
  600.     }

  601.     protected final void setAtEOF(final boolean b) {
  602.         hasHitEOF = b;
  603.     }

  604.     /**
  605.      * The last record block should be written at the full size, so skip any additional space used to fill a record after an entry
  606.      *
  607.      * @throws IOException when skipping the padding of the record fails
  608.      */
  609.     private void skipRecordPadding() throws IOException {
  610.         if (!isDirectory() && currEntry.getSize() > 0 && currEntry.getSize() % recordSize != 0) {
  611.             final long numRecords = currEntry.getSize() / recordSize + 1;
  612.             final long padding = numRecords * recordSize - currEntry.getSize();
  613.             repositionForwardBy(padding);
  614.             throwExceptionIfPositionIsNotInArchive();
  615.         }
  616.     }

  617.     /**
  618.      * Checks if the current position of the SeekableByteChannel is in the archive.
  619.      *
  620.      * @throws IOException If the position is not in the archive
  621.      */
  622.     private void throwExceptionIfPositionIsNotInArchive() throws IOException {
  623.         if (archive.size() < archive.position()) {
  624.             throw new IOException("Truncated TAR archive");
  625.         }
  626.     }

  627.     /**
  628.      * Tries to read the next record resetting the position in the archive if it is not an EOF record.
  629.      *
  630.      * <p>
  631.      * This is meant to protect against cases where a tar implementation has written only one EOF record when two are expected. Actually this won't help since a
  632.      * non-conforming implementation likely won't fill full blocks consisting of - by default - ten records either so we probably have already read beyond the
  633.      * archive anyway.
  634.      * </p>
  635.      *
  636.      * @throws IOException if reading the record of resetting the position in the archive fails
  637.      */
  638.     private void tryToConsumeSecondEOFRecord() throws IOException {
  639.         boolean shouldReset = true;
  640.         try {
  641.             shouldReset = !isEOFRecord(readRecord());
  642.         } finally {
  643.             if (shouldReset) {
  644.                 archive.position(archive.position() - recordSize);
  645.             }
  646.         }
  647.     }
  648. }