View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.archivers.tar;
20  
21  import java.io.ByteArrayOutputStream;
22  import java.io.Closeable;
23  import java.io.File;
24  import java.io.IOException;
25  import java.io.InputStream;
26  import java.nio.ByteBuffer;
27  import java.nio.channels.SeekableByteChannel;
28  import java.nio.file.Files;
29  import java.nio.file.Path;
30  import java.util.ArrayList;
31  import java.util.Arrays;
32  import java.util.HashMap;
33  import java.util.LinkedList;
34  import java.util.List;
35  import java.util.Map;
36  
37  import org.apache.commons.compress.archivers.zip.ZipEncoding;
38  import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
39  import org.apache.commons.compress.utils.ArchiveUtils;
40  import org.apache.commons.compress.utils.BoundedArchiveInputStream;
41  import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
42  import org.apache.commons.compress.utils.SeekableInMemoryByteChannel;
43  import org.apache.commons.io.input.BoundedInputStream;
44  
45  /**
46   * Provides random access to Unix archives.
47   *
48   * @since 1.21
49   */
50  public class TarFile implements Closeable {
51  
52      private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream {
53  
54          private final SeekableByteChannel channel;
55  
56          private final TarArchiveEntry entry;
57  
58          private long entryOffset;
59  
60          private int currentSparseInputStreamIndex;
61  
62          BoundedTarEntryInputStream(final TarArchiveEntry entry, final SeekableByteChannel channel) throws IOException {
63              super(entry.getDataOffset(), entry.getRealSize());
64              if (channel.size() - entry.getSize() < entry.getDataOffset()) {
65                  throw new IOException("entry size exceeds archive size");
66              }
67              this.entry = entry;
68              this.channel = channel;
69          }
70  
71          @Override
72          protected int read(final long pos, final ByteBuffer buf) throws IOException {
73              if (entryOffset >= entry.getRealSize()) {
74                  return -1;
75              }
76  
77              final int totalRead;
78              if (entry.isSparse()) {
79                  totalRead = readSparse(entryOffset, buf, buf.limit());
80              } else {
81                  totalRead = readArchive(pos, buf);
82              }
83  
84              if (totalRead == -1) {
85                  if (buf.array().length > 0) {
86                      throw new IOException("Truncated TAR archive");
87                  }
88                  setAtEOF(true);
89              } else {
90                  entryOffset += totalRead;
91                  buf.flip();
92              }
93              return totalRead;
94          }
95  
96          private int readArchive(final long pos, final ByteBuffer buf) throws IOException {
97              channel.position(pos);
98              return channel.read(buf);
99          }
100 
101         private int readSparse(final long pos, final ByteBuffer buf, final int numToRead) throws IOException {
102             // if there are no actual input streams, just read from the original archive
103             final List<InputStream> entrySparseInputStreams = sparseInputStreams.get(entry.getName());
104             if (entrySparseInputStreams == null || entrySparseInputStreams.isEmpty()) {
105                 return readArchive(entry.getDataOffset() + pos, buf);
106             }
107 
108             if (currentSparseInputStreamIndex >= entrySparseInputStreams.size()) {
109                 return -1;
110             }
111 
112             final InputStream currentInputStream = entrySparseInputStreams.get(currentSparseInputStreamIndex);
113             final byte[] bufArray = new byte[numToRead];
114             final int readLen = currentInputStream.read(bufArray);
115             if (readLen != -1) {
116                 buf.put(bufArray, 0, readLen);
117             }
118 
119             // if the current input stream is the last input stream,
120             // just return the number of bytes read from current input stream
121             if (currentSparseInputStreamIndex == entrySparseInputStreams.size() - 1) {
122                 return readLen;
123             }
124 
125             // if EOF of current input stream is meet, open a new input stream and recursively call read
126             if (readLen == -1) {
127                 currentSparseInputStreamIndex++;
128                 return readSparse(pos, buf, numToRead);
129             }
130 
131             // if the rest data of current input stream is not long enough, open a new input stream
132             // and recursively call read
133             if (readLen < numToRead) {
134                 currentSparseInputStreamIndex++;
135                 final int readLenOfNext = readSparse(pos + readLen, buf, numToRead - readLen);
136                 if (readLenOfNext == -1) {
137                     return readLen;
138                 }
139 
140                 return readLen + readLenOfNext;
141             }
142 
143             // if the rest data of current input stream is enough(which means readLen == len), just return readLen
144             return readLen;
145         }
146     }
147 
148     private static final int SMALL_BUFFER_SIZE = 256;
149 
150     private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE];
151 
152     private final SeekableByteChannel archive;
153 
154     /**
155      * The encoding of the tar file
156      */
157     private final ZipEncoding zipEncoding;
158 
159     private final LinkedList<TarArchiveEntry> entries = new LinkedList<>();
160 
161     private final int blockSize;
162 
163     private final boolean lenient;
164 
165     private final int recordSize;
166 
167     private final ByteBuffer recordBuffer;
168 
169     // the global sparse headers, this is only used in PAX Format 0.X
170     private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>();
171 
172     private boolean eof;
173 
174     /**
175      * The meta-data about the current entry
176      */
177     private TarArchiveEntry currEntry;
178 
179     // the global PAX header
180     private Map<String, String> globalPaxHeaders = new HashMap<>();
181 
182     private final Map<String, List<InputStream>> sparseInputStreams = new HashMap<>();
183 
184     /**
185      * Constructor for TarFile.
186      *
187      * @param content the content to use
188      * @throws IOException when reading the tar archive fails
189      */
190     public TarFile(final byte[] content) throws IOException {
191         this(new SeekableInMemoryByteChannel(content));
192     }
193 
194     /**
195      * Constructor for TarFile.
196      *
197      * @param content the content to use
198      * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to
199      *                {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead.
200      * @throws IOException when reading the tar archive fails
201      */
202     public TarFile(final byte[] content, final boolean lenient) throws IOException {
203         this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
204     }
205 
206     /**
207      * Constructor for TarFile.
208      *
209      * @param content  the content to use
210      * @param encoding the encoding to use
211      * @throws IOException when reading the tar archive fails
212      */
213     public TarFile(final byte[] content, final String encoding) throws IOException {
214         this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false);
215     }
216 
217     /**
218      * Constructor for TarFile.
219      *
220      * @param archive the file of the archive to use
221      * @throws IOException when reading the tar archive fails
222      */
223     public TarFile(final File archive) throws IOException {
224         this(archive.toPath());
225     }
226 
227     /**
228      * Constructor for TarFile.
229      *
230      * @param archive the file of the archive to use
231      * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to
232      *                {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead.
233      * @throws IOException when reading the tar archive fails
234      */
235     public TarFile(final File archive, final boolean lenient) throws IOException {
236         this(archive.toPath(), lenient);
237     }
238 
239     /**
240      * Constructor for TarFile.
241      *
242      * @param archive  the file of the archive to use
243      * @param encoding the encoding to use
244      * @throws IOException when reading the tar archive fails
245      */
246     public TarFile(final File archive, final String encoding) throws IOException {
247         this(archive.toPath(), encoding);
248     }
249 
250     /**
251      * Constructor for TarFile.
252      *
253      * @param archivePath the path of the archive to use
254      * @throws IOException when reading the tar archive fails
255      */
256     public TarFile(final Path archivePath) throws IOException {
257         this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false);
258     }
259 
260     /**
261      * Constructor for TarFile.
262      *
263      * @param archivePath the path of the archive to use
264      * @param lenient     when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to
265      *                    {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead.
266      * @throws IOException when reading the tar archive fails
267      */
268     public TarFile(final Path archivePath, final boolean lenient) throws IOException {
269         this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
270     }
271 
272     /**
273      * Constructor for TarFile.
274      *
275      * @param archivePath the path of the archive to use
276      * @param encoding    the encoding to use
277      * @throws IOException when reading the tar archive fails
278      */
279     public TarFile(final Path archivePath, final String encoding) throws IOException {
280         this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false);
281     }
282 
283     /**
284      * Constructor for TarFile.
285      *
286      * @param content the content to use
287      * @throws IOException when reading the tar archive fails
288      */
289     public TarFile(final SeekableByteChannel content) throws IOException {
290         this(content, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false);
291     }
292 
293     /**
294      * Constructor for TarFile.
295      *
296      * @param archive    the seekable byte channel to use
297      * @param blockSize  the blocks size to use
298      * @param recordSize the record size to use
299      * @param encoding   the encoding to use
300      * @param lenient    when set to true illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to
301      *                   {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead.
302      * @throws IOException when reading the tar archive fails
303      */
304     public TarFile(final SeekableByteChannel archive, final int blockSize, final int recordSize, final String encoding, final boolean lenient)
305             throws IOException {
306         this.archive = archive;
307         this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
308         this.recordSize = recordSize;
309         this.recordBuffer = ByteBuffer.allocate(this.recordSize);
310         this.blockSize = blockSize;
311         this.lenient = lenient;
312 
313         TarArchiveEntry entry;
314         while ((entry = getNextTarEntry()) != null) {
315             entries.add(entry);
316         }
317     }
318 
319     /**
320      * Update the current entry with the read pax headers
321      *
322      * @param headers       Headers read from the pax header
323      * @param sparseHeaders Sparse headers read from pax header
324      */
325     private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) throws IOException {
326         currEntry.updateEntryFromPaxHeaders(headers);
327         currEntry.setSparseHeaders(sparseHeaders);
328     }
329 
330     /**
331      * Build the input streams consisting of all-zero input streams and non-zero input streams. When reading from the non-zero input streams, the data is
332      * actually read from the original input stream. The size of each input stream is introduced by the sparse headers.
333      *
334      * @implNote Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the 0 size input streams because they are
335      *           meaningless.
336      */
337     private void buildSparseInputStreams() throws IOException {
338         final List<InputStream> streams = new ArrayList<>();
339         final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders();
340         // Stream doesn't need to be closed at all as it doesn't use any resources
341         final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); // NOSONAR
342         // logical offset into the extracted entry
343         long offset = 0;
344         long numberOfZeroBytesInSparseEntry = 0;
345         for (final TarArchiveStructSparse sparseHeader : sparseHeaders) {
346             final long zeroBlockSize = sparseHeader.getOffset() - offset;
347             if (zeroBlockSize < 0) {
348                 // sparse header says to move backwards inside the extracted entry
349                 throw new IOException("Corrupted struct sparse detected");
350             }
351             // only store the zero block if it is not empty
352             if (zeroBlockSize > 0) {
353                 streams.add(BoundedInputStream.builder().setInputStream(zeroInputStream).setMaxCount(zeroBlockSize).get());
354                 numberOfZeroBytesInSparseEntry += zeroBlockSize;
355             }
356             // only store the input streams with non-zero size
357             if (sparseHeader.getNumbytes() > 0) {
358                 final long start = currEntry.getDataOffset() + sparseHeader.getOffset() - numberOfZeroBytesInSparseEntry;
359                 if (start + sparseHeader.getNumbytes() < start) {
360                     // possible integer overflow
361                     throw new IOException("Unreadable TAR archive, sparse block offset or length too big");
362                 }
363                 streams.add(new BoundedSeekableByteChannelInputStream(start, sparseHeader.getNumbytes(), archive));
364             }
365             offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
366         }
367         sparseInputStreams.put(currEntry.getName(), streams);
368     }
369 
370     @Override
371     public void close() throws IOException {
372         archive.close();
373     }
374 
375     /**
376      * This method is invoked once the end of the archive is hit, it tries to consume the remaining bytes under the assumption that the tool creating this
377      * archive has padded the last block.
378      */
379     private void consumeRemainderOfLastBlock() throws IOException {
380         final long bytesReadOfLastBlock = archive.position() % blockSize;
381         if (bytesReadOfLastBlock > 0) {
382             repositionForwardBy(blockSize - bytesReadOfLastBlock);
383         }
384     }
385 
386     /**
387      * Gets all TAR Archive Entries from the TarFile
388      *
389      * @return All entries from the tar file
390      */
391     public List<TarArchiveEntry> getEntries() {
392         return new ArrayList<>(entries);
393     }
394 
395     /**
396      * Gets the input stream for the provided Tar Archive Entry.
397      *
398      * @param entry Entry to get the input stream from
399      * @return Input stream of the provided entry
400      * @throws IOException Corrupted TAR archive. Can't read entry.
401      */
402     public InputStream getInputStream(final TarArchiveEntry entry) throws IOException {
403         try {
404             return new BoundedTarEntryInputStream(entry, archive);
405         } catch (final RuntimeException ex) {
406             throw new IOException("Corrupted TAR archive. Can't read entry", ex);
407         }
408     }
409 
410     /**
411      * Gets the next entry in this tar archive as long name data.
412      *
413      * @return The next entry in the archive as long name data, or null.
414      * @throws IOException on error
415      */
416     private byte[] getLongNameData() throws IOException {
417         final ByteArrayOutputStream longName = new ByteArrayOutputStream();
418         int length;
419         try (InputStream in = getInputStream(currEntry)) {
420             while ((length = in.read(smallBuf)) >= 0) {
421                 longName.write(smallBuf, 0, length);
422             }
423         }
424         getNextTarEntry();
425         if (currEntry == null) {
426             // Bugzilla: 40334
427             // Malformed tar file - long entry name not followed by entry
428             return null;
429         }
430         byte[] longNameData = longName.toByteArray();
431         // remove trailing null terminator(s)
432         length = longNameData.length;
433         while (length > 0 && longNameData[length - 1] == 0) {
434             --length;
435         }
436         if (length != longNameData.length) {
437             longNameData = Arrays.copyOf(longNameData, length);
438         }
439         return longNameData;
440     }
441 
442     /**
443      * Gets the next entry in this tar archive. This will skip to the end of the current entry, if there is one, and place the position of the channel at the
444      * header of the next entry, and read the header and instantiate a new TarEntry from the header bytes and return that entry. If there are no more entries in
445      * the archive, null will be returned to indicate that the end of the archive has been reached.
446      *
447      * @return The next TarEntry in the archive, or null if there is no next entry.
448      * @throws IOException when reading the next TarEntry fails
449      */
450     private TarArchiveEntry getNextTarEntry() throws IOException {
451         if (isAtEOF()) {
452             return null;
453         }
454 
455         if (currEntry != null) {
456             // Skip to the end of the entry
457             repositionForwardTo(currEntry.getDataOffset() + currEntry.getSize());
458             throwExceptionIfPositionIsNotInArchive();
459             skipRecordPadding();
460         }
461 
462         final ByteBuffer headerBuf = getRecord();
463         if (null == headerBuf) {
464             // Hit EOF
465             currEntry = null;
466             return null;
467         }
468 
469         try {
470             final long position = archive.position();
471             currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf.array(), zipEncoding, lenient, position);
472         } catch (final IllegalArgumentException e) {
473             throw new IOException("Error detected parsing the header", e);
474         }
475 
476         if (currEntry.isGNULongLinkEntry()) {
477             final byte[] longLinkData = getLongNameData();
478             if (longLinkData == null) {
479                 // Bugzilla: 40334
480                 // Malformed tar file - long link entry name not followed by
481                 // entry
482                 return null;
483             }
484             currEntry.setLinkName(zipEncoding.decode(longLinkData));
485         }
486 
487         if (currEntry.isGNULongNameEntry()) {
488             final byte[] longNameData = getLongNameData();
489             if (longNameData == null) {
490                 // Bugzilla: 40334
491                 // Malformed tar file - long entry name not followed by
492                 // entry
493                 return null;
494             }
495 
496             // COMPRESS-509 : the name of directories should end with '/'
497             final String name = zipEncoding.decode(longNameData);
498             currEntry.setName(name);
499             if (currEntry.isDirectory() && !name.endsWith("/")) {
500                 currEntry.setName(name + "/");
501             }
502         }
503 
504         if (currEntry.isGlobalPaxHeader()) { // Process Global Pax headers
505             readGlobalPaxHeaders();
506         }
507 
508         try {
509             if (currEntry.isPaxHeader()) { // Process Pax headers
510                 paxHeaders();
511             } else if (!globalPaxHeaders.isEmpty()) {
512                 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders);
513             }
514         } catch (final NumberFormatException e) {
515             throw new IOException("Error detected parsing the pax header", e);
516         }
517 
518         if (currEntry.isOldGNUSparse()) { // Process sparse files
519             readOldGNUSparse();
520         }
521 
522         return currEntry;
523     }
524 
525     /**
526      * Gets the next record in this tar archive. This will skip over any remaining data in the current entry, if there is one, and place the input stream at the
527      * header of the next entry.
528      *
529      * <p>
530      * If there are no more entries in the archive, null will be returned to indicate that the end of the archive has been reached. At the same time the
531      * {@code hasHitEOF} marker will be set to true.
532      * </p>
533      *
534      * @return The next TarEntry in the archive, or null if there is no next entry.
535      * @throws IOException when reading the next TarEntry fails
536      */
537     private ByteBuffer getRecord() throws IOException {
538         ByteBuffer headerBuf = readRecord();
539         setAtEOF(isEOFRecord(headerBuf));
540         if (isAtEOF() && headerBuf != null) {
541             // Consume rest
542             tryToConsumeSecondEOFRecord();
543             consumeRemainderOfLastBlock();
544             headerBuf = null;
545         }
546         return headerBuf;
547     }
548 
549     /**
550      * Tests whether or not we are at the end-of-file.
551      *
552      * @return whether or not we are at the end-of-file.
553      */
554     protected final boolean isAtEOF() {
555         return eof;
556     }
557 
558     private boolean isDirectory() {
559         return currEntry != null && currEntry.isDirectory();
560     }
561 
562     private boolean isEOFRecord(final ByteBuffer headerBuf) {
563         return headerBuf == null || ArchiveUtils.isArrayZero(headerBuf.array(), recordSize);
564     }
565 
566     /**
567      * <p>
568      * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
569      *
570      * <pre>
571      * GNU.sparse.size=size
572      * GNU.sparse.numblocks=numblocks
573      * repeat numblocks times
574      *   GNU.sparse.offset=offset
575      *   GNU.sparse.numbytes=numbytes
576      * end repeat
577      * </pre>
578      *
579      * <p>
580      * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
581      *
582      * <pre>
583      * GNU.sparse.map
584      *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
585      * </pre>
586      *
587      * <p>
588      * For PAX Format 1.X: <br>
589      * The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers delimited by newlines.
590      * The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are map entries, each one
591      * consisting of two numbers giving the offset and size of the data block it describes.
592      *
593      * @throws IOException if an I/O error occurs.
594      */
595     private void paxHeaders() throws IOException {
596         List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
597         final Map<String, String> headers;
598         try (InputStream input = getInputStream(currEntry)) {
599             headers = TarUtils.parsePaxHeaders(input, sparseHeaders, globalPaxHeaders, currEntry.getSize());
600         }
601 
602         // for 0.1 PAX Headers
603         if (headers.containsKey(TarGnuSparseKeys.MAP)) {
604             sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get(TarGnuSparseKeys.MAP)));
605         }
606         getNextTarEntry(); // Get the actual file entry
607         if (currEntry == null) {
608             throw new IOException("premature end of tar archive. Didn't find any entry after PAX header.");
609         }
610         applyPaxHeadersToCurrentEntry(headers, sparseHeaders);
611 
612         // for 1.0 PAX Format, the sparse map is stored in the file data block
613         if (currEntry.isPaxGNU1XSparse()) {
614             try (InputStream input = getInputStream(currEntry)) {
615                 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(input, recordSize);
616             }
617             currEntry.setSparseHeaders(sparseHeaders);
618             // data of the entry is after the pax gnu entry. So we need to update the data position once again
619             currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
620         }
621 
622         // sparse headers are all done reading, we need to build
623         // sparse input streams using these sparse headers
624         buildSparseInputStreams();
625     }
626 
627     private void readGlobalPaxHeaders() throws IOException {
628         try (InputStream input = getInputStream(currEntry)) {
629             globalPaxHeaders = TarUtils.parsePaxHeaders(input, globalSparseHeaders, globalPaxHeaders, currEntry.getSize());
630         }
631         getNextTarEntry(); // Get the actual file entry
632 
633         if (currEntry == null) {
634             throw new IOException("Error detected parsing the pax header");
635         }
636     }
637 
638     /**
639      * Adds the sparse chunks from the current entry to the sparse chunks, including any additional sparse entries following the current entry.
640      *
641      * @throws IOException when reading the sparse entry fails
642      */
643     private void readOldGNUSparse() throws IOException {
644         if (currEntry.isExtended()) {
645             TarArchiveSparseEntry entry;
646             do {
647                 final ByteBuffer headerBuf = getRecord();
648                 if (headerBuf == null) {
649                     throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag.");
650                 }
651                 entry = new TarArchiveSparseEntry(headerBuf.array());
652                 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
653                 currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
654             } while (entry.isExtended());
655         }
656 
657         // sparse headers are all done reading, we need to build
658         // sparse input streams using these sparse headers
659         buildSparseInputStreams();
660     }
661 
662     /**
663      * Reads a record from the input stream and return the data.
664      *
665      * @return The record data or null if EOF has been hit.
666      * @throws IOException if reading from the archive fails
667      */
668     private ByteBuffer readRecord() throws IOException {
669         recordBuffer.rewind();
670         final int readNow = archive.read(recordBuffer);
671         if (readNow != recordSize) {
672             return null;
673         }
674         return recordBuffer;
675     }
676 
677     private void repositionForwardBy(final long offset) throws IOException {
678         repositionForwardTo(archive.position() + offset);
679     }
680 
681     private void repositionForwardTo(final long newPosition) throws IOException {
682         final long currPosition = archive.position();
683         if (newPosition < currPosition) {
684             throw new IOException("trying to move backwards inside of the archive");
685         }
686         archive.position(newPosition);
687     }
688 
689     /**
690      * Sets whether we are at end-of-file.
691      *
692      * @param eof whether we are at end-of-file.
693      */
694     protected final void setAtEOF(final boolean eof) {
695         this.eof = eof;
696     }
697 
698     /**
699      * The last record block should be written at the full size, so skip any additional space used to fill a record after an entry
700      *
701      * @throws IOException when skipping the padding of the record fails
702      */
703     private void skipRecordPadding() throws IOException {
704         if (!isDirectory() && currEntry.getSize() > 0 && currEntry.getSize() % recordSize != 0) {
705             final long numRecords = currEntry.getSize() / recordSize + 1;
706             final long padding = numRecords * recordSize - currEntry.getSize();
707             repositionForwardBy(padding);
708             throwExceptionIfPositionIsNotInArchive();
709         }
710     }
711 
712     /**
713      * Checks if the current position of the SeekableByteChannel is in the archive.
714      *
715      * @throws IOException If the position is not in the archive
716      */
717     private void throwExceptionIfPositionIsNotInArchive() throws IOException {
718         if (archive.size() < archive.position()) {
719             throw new IOException("Truncated TAR archive");
720         }
721     }
722 
723     /**
724      * Tries to read the next record resetting the position in the archive if it is not an EOF record.
725      *
726      * <p>
727      * This is meant to protect against cases where a tar implementation has written only one EOF record when two are expected. Actually this won't help since a
728      * non-conforming implementation likely won't fill full blocks consisting of - by default - ten records either so we probably have already read beyond the
729      * archive anyway.
730      * </p>
731      *
732      * @throws IOException if reading the record of resetting the position in the archive fails
733      */
734     private void tryToConsumeSecondEOFRecord() throws IOException {
735         boolean shouldReset = true;
736         try {
737             shouldReset = !isEOFRecord(readRecord());
738         } finally {
739             if (shouldReset) {
740                 archive.position(archive.position() - recordSize);
741             }
742         }
743     }
744 }