001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.tar;
019
020import java.io.ByteArrayOutputStream;
021import java.io.Closeable;
022import java.io.File;
023import java.io.IOException;
024import java.io.InputStream;
025import java.nio.ByteBuffer;
026import java.nio.channels.SeekableByteChannel;
027import java.nio.file.Files;
028import java.nio.file.Path;
029import java.util.ArrayList;
030import java.util.HashMap;
031import java.util.LinkedList;
032import java.util.List;
033import java.util.Map;
034
035import org.apache.commons.compress.archivers.zip.ZipEncoding;
036import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
037import org.apache.commons.compress.utils.ArchiveUtils;
038import org.apache.commons.compress.utils.BoundedInputStream;
039import org.apache.commons.compress.utils.BoundedArchiveInputStream;
040import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
041import org.apache.commons.compress.utils.SeekableInMemoryByteChannel;
042
043/**
044 * The TarFile provides random access to UNIX archives.
045 * @since 1.21
046 */
047public class TarFile implements Closeable {
048
049    private static final int SMALL_BUFFER_SIZE = 256;
050
051    private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE];
052
053    private final SeekableByteChannel archive;
054
055    /**
056     * The encoding of the tar file
057     */
058    private final ZipEncoding zipEncoding;
059
060    private final LinkedList<TarArchiveEntry> entries = new LinkedList<>();
061
062    private final int blockSize;
063
064    private final boolean lenient;
065
066    private final int recordSize;
067
068    private final ByteBuffer recordBuffer;
069
070    // the global sparse headers, this is only used in PAX Format 0.X
071    private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>();
072
073    private boolean hasHitEOF;
074
075    /**
076     * The meta-data about the current entry
077     */
078    private TarArchiveEntry currEntry;
079
080    // the global PAX header
081    private Map<String, String> globalPaxHeaders = new HashMap<>();
082
083    private final Map<String, List<InputStream>> sparseInputStreams = new HashMap<>();
084
085    /**
086     * Constructor for TarFile.
087     *
088     * @param content the content to use
089     * @throws IOException when reading the tar archive fails
090     */
091    public TarFile(final byte[] content) throws IOException {
092        this(new SeekableInMemoryByteChannel(content));
093    }
094
095    /**
096     * Constructor for TarFile.
097     *
098     * @param content  the content to use
099     * @param encoding the encoding to use
100     * @throws IOException when reading the tar archive fails
101     */
102    public TarFile(final byte[] content, final String encoding) throws IOException {
103        this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false);
104    }
105
106    /**
107     * Constructor for TarFile.
108     *
109     * @param content the content to use
110     * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be
111     *                ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
112     *                exception instead.
113     * @throws IOException when reading the tar archive fails
114     */
115    public TarFile(final byte[] content, final boolean lenient) throws IOException {
116        this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
117    }
118
119    /**
120     * Constructor for TarFile.
121     *
122     * @param archive the file of the archive to use
123     * @throws IOException when reading the tar archive fails
124     */
125    public TarFile(final File archive) throws IOException {
126        this(archive.toPath());
127    }
128
129    /**
130     * Constructor for TarFile.
131     *
132     * @param archive  the file of the archive to use
133     * @param encoding the encoding to use
134     * @throws IOException when reading the tar archive fails
135     */
136    public TarFile(final File archive, final String encoding) throws IOException {
137        this(archive.toPath(), encoding);
138    }
139
140    /**
141     * Constructor for TarFile.
142     *
143     * @param archive the file of the archive to use
144     * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be
145     *                ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
146     *                exception instead.
147     * @throws IOException when reading the tar archive fails
148     */
149    public TarFile(final File archive, final boolean lenient) throws IOException {
150        this(archive.toPath(), lenient);
151    }
152
153    /**
154     * Constructor for TarFile.
155     *
156     * @param archivePath the path of the archive to use
157     * @throws IOException when reading the tar archive fails
158     */
159    public TarFile(final Path archivePath) throws IOException {
160        this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false);
161    }
162
163    /**
164     * Constructor for TarFile.
165     *
166     * @param archivePath the path of the archive to use
167     * @param encoding    the encoding to use
168     * @throws IOException when reading the tar archive fails
169     */
170    public TarFile(final Path archivePath, final String encoding) throws IOException {
171        this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false);
172    }
173
174    /**
175     * Constructor for TarFile.
176     *
177     * @param archivePath the path of the archive to use
178     * @param lenient     when set to true illegal values for group/userid, mode, device numbers and timestamp will be
179     *                    ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
180     *                    exception instead.
181     * @throws IOException when reading the tar archive fails
182     */
183    public TarFile(final Path archivePath, final boolean lenient) throws IOException {
184        this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
185    }
186
187    /**
188     * Constructor for TarFile.
189     *
190     * @param content the content to use
191     * @throws IOException when reading the tar archive fails
192     */
193    public TarFile(final SeekableByteChannel content) throws IOException {
194        this(content, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false);
195    }
196
197    /**
198     * Constructor for TarFile.
199     *
200     * @param archive    the seekable byte channel to use
201     * @param blockSize  the blocks size to use
202     * @param recordSize the record size to use
203     * @param encoding   the encoding to use
204     * @param lenient    when set to true illegal values for group/userid, mode, device numbers and timestamp will be
205     *                   ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
206     *                   exception instead.
207     * @throws IOException when reading the tar archive fails
208     */
209    public TarFile(final SeekableByteChannel archive, final int blockSize, final int recordSize, final String encoding, final boolean lenient) throws IOException {
210        this.archive = archive;
211        this.hasHitEOF = false;
212        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
213        this.recordSize = recordSize;
214        this.recordBuffer = ByteBuffer.allocate(this.recordSize);
215        this.blockSize = blockSize;
216        this.lenient = lenient;
217
218        TarArchiveEntry entry;
219        while ((entry = getNextTarEntry()) != null) {
220            entries.add(entry);
221        }
222    }
223
224    /**
225     * Get the next entry in this tar archive. This will skip
226     * to the end of the current entry, if there is one, and
227     * place the position of the channel at the header of the
228     * next entry, and read the header and instantiate a new
229     * TarEntry from the header bytes and return that entry.
230     * If there are no more entries in the archive, null will
231     * be returned to indicate that the end of the archive has
232     * been reached.
233     *
234     * @return The next TarEntry in the archive, or null if there is no next entry.
235     * @throws IOException when reading the next TarEntry fails
236     */
237    private TarArchiveEntry getNextTarEntry() throws IOException {
238        if (isAtEOF()) {
239            return null;
240        }
241
242        if (currEntry != null) {
243            // Skip to the end of the entry
244            repositionForwardTo(currEntry.getDataOffset() + currEntry.getSize());
245            throwExceptionIfPositionIsNotInArchive();
246            skipRecordPadding();
247        }
248
249        final ByteBuffer headerBuf = getRecord();
250        if (null == headerBuf) {
251            /* hit EOF */
252            currEntry = null;
253            return null;
254        }
255
256        try {
257            currEntry = new TarArchiveEntry(headerBuf.array(), zipEncoding, lenient, archive.position());
258        } catch (final IllegalArgumentException e) {
259            throw new IOException("Error detected parsing the header", e);
260        }
261
262        if (currEntry.isGNULongLinkEntry()) {
263            final byte[] longLinkData = getLongNameData();
264            if (longLinkData == null) {
265                // Bugzilla: 40334
266                // Malformed tar file - long link entry name not followed by
267                // entry
268                return null;
269            }
270            currEntry.setLinkName(zipEncoding.decode(longLinkData));
271        }
272
273        if (currEntry.isGNULongNameEntry()) {
274            final byte[] longNameData = getLongNameData();
275            if (longNameData == null) {
276                // Bugzilla: 40334
277                // Malformed tar file - long entry name not followed by
278                // entry
279                return null;
280            }
281
282            // COMPRESS-509 : the name of directories should end with '/'
283            final String name = zipEncoding.decode(longNameData);
284            currEntry.setName(name);
285            if (currEntry.isDirectory() && !name.endsWith("/")) {
286                currEntry.setName(name + "/");
287            }
288        }
289
290        if (currEntry.isGlobalPaxHeader()) { // Process Global Pax headers
291            readGlobalPaxHeaders();
292        }
293
294        try {
295            if (currEntry.isPaxHeader()) { // Process Pax headers
296                paxHeaders();
297            } else if (!globalPaxHeaders.isEmpty()) {
298                applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders);
299            }
300        } catch (NumberFormatException e) {
301            throw new IOException("Error detected parsing the pax header", e);
302        }
303
304        if (currEntry.isOldGNUSparse()) { // Process sparse files
305            readOldGNUSparse();
306        }
307
308        return currEntry;
309    }
310
311    /**
312     * Adds the sparse chunks from the current entry to the sparse chunks,
313     * including any additional sparse entries following the current entry.
314     *
315     * @throws IOException when reading the sparse entry fails
316     */
317    private void readOldGNUSparse() throws IOException {
318        if (currEntry.isExtended()) {
319            TarArchiveSparseEntry entry;
320            do {
321                final ByteBuffer headerBuf = getRecord();
322                if (headerBuf == null) {
323                    throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag.");
324                }
325                entry = new TarArchiveSparseEntry(headerBuf.array());
326                currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
327                currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
328            } while (entry.isExtended());
329        }
330
331        // sparse headers are all done reading, we need to build
332        // sparse input streams using these sparse headers
333        buildSparseInputStreams();
334    }
335
336    /**
337     * Build the input streams consisting of all-zero input streams and non-zero input streams.
338     * When reading from the non-zero input streams, the data is actually read from the original input stream.
339     * The size of each input stream is introduced by the sparse headers.
340     *
341     * @implNote Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the
342     *        0 size input streams because they are meaningless.
343     */
344    private void buildSparseInputStreams() throws IOException {
345        final List<InputStream> streams = new ArrayList<>();
346
347        final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders();
348
349        // Stream doesn't need to be closed at all as it doesn't use any resources
350        final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); //NOSONAR
351        // logical offset into the extracted entry
352        long offset = 0;
353        long numberOfZeroBytesInSparseEntry = 0;
354        for (TarArchiveStructSparse sparseHeader : sparseHeaders) {
355            final long zeroBlockSize = sparseHeader.getOffset() - offset;
356            if (zeroBlockSize < 0) {
357                // sparse header says to move backwards inside of the extracted entry
358                throw new IOException("Corrupted struct sparse detected");
359            }
360
361            // only store the zero block if it is not empty
362            if (zeroBlockSize > 0) {
363                streams.add(new BoundedInputStream(zeroInputStream, zeroBlockSize));
364                numberOfZeroBytesInSparseEntry += zeroBlockSize;
365            }
366
367            // only store the input streams with non-zero size
368            if (sparseHeader.getNumbytes() > 0) {
369                final long start =
370                    currEntry.getDataOffset() + sparseHeader.getOffset() - numberOfZeroBytesInSparseEntry;
371                if (start + sparseHeader.getNumbytes() < start) {
372                    // possible integer overflow
373                    throw new IOException("Unreadable TAR archive, sparse block offset or length too big");
374                }
375                streams.add(new BoundedSeekableByteChannelInputStream(start, sparseHeader.getNumbytes(), archive));
376            }
377
378            offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
379        }
380
381        sparseInputStreams.put(currEntry.getName(), streams);
382    }
383
384    /**
385     * Update the current entry with the read pax headers
386     * @param headers Headers read from the pax header
387     * @param sparseHeaders Sparse headers read from pax header
388     */
389    private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders)
390        throws IOException {
391        currEntry.updateEntryFromPaxHeaders(headers);
392        currEntry.setSparseHeaders(sparseHeaders);
393    }
394
395    /**
396     * <p>
397     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes)
398     * may appear multi times, and they look like:
399     * <pre>
400     * GNU.sparse.size=size
401     * GNU.sparse.numblocks=numblocks
402     * repeat numblocks times
403     *   GNU.sparse.offset=offset
404     *   GNU.sparse.numbytes=numbytes
405     * end repeat
406     * </pre>
407     *
408     * <p>
409     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
410     * <pre>
411     * GNU.sparse.map
412     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
413     * </pre>
414     *
415     * <p>
416     * For PAX Format 1.X:
417     * <br>
418     * The sparse map itself is stored in the file data block, preceding the actual file data.
419     * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary.
420     * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers
421     * giving the offset and size of the data block it describes.
422     * @throws IOException
423     */
424    private void paxHeaders() throws IOException {
425        List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
426        final Map<String, String> headers;
427        try (final InputStream input = getInputStream(currEntry)) {
428            headers = TarUtils.parsePaxHeaders(input, sparseHeaders, globalPaxHeaders, currEntry.getSize());
429        }
430
431        // for 0.1 PAX Headers
432        if (headers.containsKey("GNU.sparse.map")) {
433            sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get("GNU.sparse.map")));
434        }
435        getNextTarEntry(); // Get the actual file entry
436        if (currEntry == null) {
437            throw new IOException("premature end of tar archive. Didn't find any entry after PAX header.");
438        }
439        applyPaxHeadersToCurrentEntry(headers, sparseHeaders);
440
441        // for 1.0 PAX Format, the sparse map is stored in the file data block
442        if (currEntry.isPaxGNU1XSparse()) {
443            try (final InputStream input = getInputStream(currEntry)) {
444                sparseHeaders = TarUtils.parsePAX1XSparseHeaders(input, recordSize);
445            }
446            currEntry.setSparseHeaders(sparseHeaders);
447            // data of the entry is after the pax gnu entry. So we need to update the data position once again
448            currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
449        }
450
451        // sparse headers are all done reading, we need to build
452        // sparse input streams using these sparse headers
453        buildSparseInputStreams();
454    }
455
456    private void readGlobalPaxHeaders() throws IOException {
457        try (InputStream input = getInputStream(currEntry)) {
458            globalPaxHeaders = TarUtils.parsePaxHeaders(input, globalSparseHeaders, globalPaxHeaders,
459                currEntry.getSize());
460        }
461        getNextTarEntry(); // Get the actual file entry
462
463        if (currEntry == null) {
464            throw new IOException("Error detected parsing the pax header");
465        }
466    }
467
468    /**
469     * Get the next entry in this tar archive as longname data.
470     *
471     * @return The next entry in the archive as longname data, or null.
472     * @throws IOException on error
473     */
474    private byte[] getLongNameData() throws IOException {
475        final ByteArrayOutputStream longName = new ByteArrayOutputStream();
476        int length;
477        try (final InputStream in = getInputStream(currEntry)) {
478            while ((length = in.read(smallBuf)) >= 0) {
479                longName.write(smallBuf, 0, length);
480            }
481        }
482        getNextTarEntry();
483        if (currEntry == null) {
484            // Bugzilla: 40334
485            // Malformed tar file - long entry name not followed by entry
486            return null;
487        }
488        byte[] longNameData = longName.toByteArray();
489        // remove trailing null terminator(s)
490        length = longNameData.length;
491        while (length > 0 && longNameData[length - 1] == 0) {
492            --length;
493        }
494        if (length != longNameData.length) {
495            final byte[] l = new byte[length];
496            System.arraycopy(longNameData, 0, l, 0, length);
497            longNameData = l;
498        }
499        return longNameData;
500    }
501
502    /**
503     * The last record block should be written at the full size, so skip any
504     * additional space used to fill a record after an entry
505     *
506     * @throws IOException when skipping the padding of the record fails
507     */
508    private void skipRecordPadding() throws IOException {
509        if (!isDirectory() && currEntry.getSize() > 0 && currEntry.getSize() % recordSize != 0) {
510            final long numRecords = (currEntry.getSize() / recordSize) + 1;
511            final long padding = (numRecords * recordSize) - currEntry.getSize();
512            repositionForwardBy(padding);
513            throwExceptionIfPositionIsNotInArchive();
514        }
515    }
516
517    private void repositionForwardTo(final long newPosition) throws IOException {
518        final long currPosition = archive.position();
519        if (newPosition < currPosition) {
520            throw new IOException("trying to move backwards inside of the archive");
521        }
522        archive.position(newPosition);
523    }
524
525    private void repositionForwardBy(final long offset) throws IOException {
526        repositionForwardTo(archive.position() + offset);
527    }
528
529    /**
530     * Checks if the current position of the SeekableByteChannel is in the archive.
531     * @throws IOException If the position is not in the archive
532     */
533    private void throwExceptionIfPositionIsNotInArchive() throws IOException {
534        if (archive.size() < archive.position()) {
535            throw new IOException("Truncated TAR archive");
536        }
537    }
538
539    /**
540     * Get the next record in this tar archive. This will skip
541     * over any remaining data in the current entry, if there
542     * is one, and place the input stream at the header of the
543     * next entry.
544     *
545     * <p>If there are no more entries in the archive, null will be
546     * returned to indicate that the end of the archive has been
547     * reached.  At the same time the {@code hasHitEOF} marker will be
548     * set to true.</p>
549     *
550     * @return The next TarEntry in the archive, or null if there is no next entry.
551     * @throws IOException when reading the next TarEntry fails
552     */
553    private ByteBuffer getRecord() throws IOException {
554        ByteBuffer headerBuf = readRecord();
555        setAtEOF(isEOFRecord(headerBuf));
556        if (isAtEOF() && headerBuf != null) {
557            // Consume rest
558            tryToConsumeSecondEOFRecord();
559            consumeRemainderOfLastBlock();
560            headerBuf = null;
561        }
562        return headerBuf;
563    }
564
565    /**
566     * Tries to read the next record resetting the position in the
567     * archive if it is not a EOF record.
568     *
569     * <p>This is meant to protect against cases where a tar
570     * implementation has written only one EOF record when two are
571     * expected. Actually this won't help since a non-conforming
572     * implementation likely won't fill full blocks consisting of - by
573     * default - ten records either so we probably have already read
574     * beyond the archive anyway.</p>
575     *
576     * @throws IOException if reading the record of resetting the position in the archive fails
577     */
578    private void tryToConsumeSecondEOFRecord() throws IOException {
579        boolean shouldReset = true;
580        try {
581            shouldReset = !isEOFRecord(readRecord());
582        } finally {
583            if (shouldReset) {
584                archive.position(archive.position() - recordSize);
585            }
586        }
587    }
588
589    /**
590     * This method is invoked once the end of the archive is hit, it
591     * tries to consume the remaining bytes under the assumption that
592     * the tool creating this archive has padded the last block.
593     */
594    private void consumeRemainderOfLastBlock() throws IOException {
595        final long bytesReadOfLastBlock = archive.position() % blockSize;
596        if (bytesReadOfLastBlock > 0) {
597            repositionForwardBy(blockSize - bytesReadOfLastBlock);
598        }
599    }
600
601    /**
602     * Read a record from the input stream and return the data.
603     *
604     * @return The record data or null if EOF has been hit.
605     * @throws IOException if reading from the archive fails
606     */
607    private ByteBuffer readRecord() throws IOException {
608        recordBuffer.rewind();
609        final int readNow = archive.read(recordBuffer);
610        if (readNow != recordSize) {
611            return null;
612        }
613        return recordBuffer;
614    }
615
616    /**
617     * Get all TAR Archive Entries from the TarFile
618     *
619     * @return All entries from the tar file
620     */
621    public List<TarArchiveEntry> getEntries() {
622        return new ArrayList<>(entries);
623    }
624
625    private boolean isEOFRecord(final ByteBuffer headerBuf) {
626        return headerBuf == null || ArchiveUtils.isArrayZero(headerBuf.array(), recordSize);
627    }
628
629    protected final boolean isAtEOF() {
630        return hasHitEOF;
631    }
632
633    protected final void setAtEOF(final boolean b) {
634        hasHitEOF = b;
635    }
636
637    private boolean isDirectory() {
638        return currEntry != null && currEntry.isDirectory();
639    }
640
641    /**
642     * Gets the input stream for the provided Tar Archive Entry.
643     * @param entry Entry to get the input stream from
644     * @return Input stream of the provided entry
645     * @throws IOException Corrupted TAR archive. Can't read entry.
646     */
647    public InputStream getInputStream(final TarArchiveEntry entry) throws IOException {
648        try {
649            return new BoundedTarEntryInputStream(entry, archive);
650        } catch (RuntimeException ex) {
651            throw new IOException("Corrupted TAR archive. Can't read entry", ex);
652        }
653    }
654
655    @Override
656    public void close() throws IOException {
657        archive.close();
658    }
659
660    private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream {
661
662        private final SeekableByteChannel channel;
663
664        private final TarArchiveEntry entry;
665
666        private long entryOffset;
667
668        private int currentSparseInputStreamIndex;
669
670        BoundedTarEntryInputStream(final TarArchiveEntry entry, final SeekableByteChannel channel) throws IOException {
671            super(entry.getDataOffset(), entry.getRealSize());
672            if (channel.size() - entry.getSize() < entry.getDataOffset()) {
673                throw new IOException("entry size exceeds archive size");
674            }
675            this.entry = entry;
676            this.channel = channel;
677        }
678
679        @Override
680        protected int read(final long pos, final ByteBuffer buf) throws IOException {
681            if (entryOffset >= entry.getRealSize()) {
682                return -1;
683            }
684
685            final int totalRead;
686            if (entry.isSparse()) {
687                totalRead = readSparse(entryOffset, buf, buf.limit());
688            } else {
689                totalRead = readArchive(pos, buf);
690            }
691
692            if (totalRead == -1) {
693                if (buf.array().length > 0) {
694                    throw new IOException("Truncated TAR archive");
695                }
696                setAtEOF(true);
697            } else {
698                entryOffset += totalRead;
699                buf.flip();
700            }
701            return totalRead;
702        }
703
704        private int readSparse(final long pos, final ByteBuffer buf, final int numToRead) throws IOException {
705            // if there are no actual input streams, just read from the original archive
706            final List<InputStream> entrySparseInputStreams = sparseInputStreams.get(entry.getName());
707            if (entrySparseInputStreams == null || entrySparseInputStreams.isEmpty()) {
708                return readArchive(entry.getDataOffset() + pos, buf);
709            }
710
711            if (currentSparseInputStreamIndex >= entrySparseInputStreams.size()) {
712                return -1;
713            }
714
715            final InputStream currentInputStream = entrySparseInputStreams.get(currentSparseInputStreamIndex);
716            final byte[] bufArray = new byte[numToRead];
717            final int readLen = currentInputStream.read(bufArray);
718            if (readLen != -1) {
719                buf.put(bufArray, 0, readLen);
720            }
721
722            // if the current input stream is the last input stream,
723            // just return the number of bytes read from current input stream
724            if (currentSparseInputStreamIndex == entrySparseInputStreams.size() - 1) {
725                return readLen;
726            }
727
728            // if EOF of current input stream is meet, open a new input stream and recursively call read
729            if (readLen == -1) {
730                currentSparseInputStreamIndex++;
731                return readSparse(pos, buf, numToRead);
732            }
733
734            // if the rest data of current input stream is not long enough, open a new input stream
735            // and recursively call read
736            if (readLen < numToRead) {
737                currentSparseInputStreamIndex++;
738                final int readLenOfNext = readSparse(pos + readLen, buf, numToRead - readLen);
739                if (readLenOfNext == -1) {
740                    return readLen;
741                }
742
743                return readLen + readLenOfNext;
744            }
745
746            // if the rest data of current input stream is enough(which means readLen == len), just return readLen
747            return readLen;
748        }
749
750        private int readArchive(final long pos, final ByteBuffer buf) throws IOException {
751            channel.position(pos);
752            return channel.read(buf);
753        }
754    }
755}