001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018
019/*
020 * This package is based on the work done by Timothy Gerard Endres
021 * (time@ice.com) to whom the Ant project is very grateful for his great code.
022 */
023
024package org.apache.commons.compress.archivers.tar;
025
026import java.io.ByteArrayOutputStream;
027import java.io.FileInputStream;
028import java.io.IOException;
029import java.io.InputStream;
030import java.util.ArrayList;
031import java.util.HashMap;
032import java.util.List;
033import java.util.Map;
034
035import org.apache.commons.compress.archivers.ArchiveEntry;
036import org.apache.commons.compress.archivers.ArchiveInputStream;
037import org.apache.commons.compress.archivers.zip.ZipEncoding;
038import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
039import org.apache.commons.compress.utils.ArchiveUtils;
040import org.apache.commons.compress.utils.BoundedInputStream;
041import org.apache.commons.compress.utils.IOUtils;
042
043/**
044 * The TarInputStream reads a UNIX tar archive as an InputStream.
045 * methods are provided to position at each successive entry in
046 * the archive, and the read each entry as a normal input stream
047 * using read().
048 * @NotThreadSafe
049 */
050public class TarArchiveInputStream extends ArchiveInputStream {
051
052    private static final int SMALL_BUFFER_SIZE = 256;
053
054    private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE];
055
056    /** The size the TAR header */
057    private final int recordSize;
058
059    /** The buffer to store the TAR header **/
060    private final byte[] recordBuffer;
061
062    /** The size of a block */
063    private final int blockSize;
064
065    /** True if file has hit EOF */
066    private boolean hasHitEOF;
067
068    /** Size of the current entry */
069    private long entrySize;
070
071    /** How far into the entry the stream is at */
072    private long entryOffset;
073
074    /** An input stream to read from */
075    private final InputStream inputStream;
076
077    /** Input streams for reading sparse entries **/
078    private List<InputStream> sparseInputStreams;
079
080    /** the index of current input stream being read when reading sparse entries */
081    private int currentSparseInputStreamIndex;
082
083    /** The meta-data about the current entry */
084    private TarArchiveEntry currEntry;
085
086    /** The encoding of the file */
087    private final ZipEncoding zipEncoding;
088
089    // the provided encoding (for unit tests)
090    final String encoding;
091
092    // the global PAX header
093    private Map<String, String> globalPaxHeaders = new HashMap<>();
094
095    // the global sparse headers, this is only used in PAX Format 0.X
096    private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>();
097
098    private final boolean lenient;
099
100    /**
101     * Constructor for TarInputStream.
102     * @param is the input stream to use
103     */
104    public TarArchiveInputStream(final InputStream is) {
105        this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE);
106    }
107
108    /**
109     * Constructor for TarInputStream.
110     * @param is the input stream to use
111     * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be
112     * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
113     * exception instead.
114     * @since 1.19
115     */
116    public TarArchiveInputStream(final InputStream is, final boolean lenient) {
117        this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient);
118    }
119
120    /**
121     * Constructor for TarInputStream.
122     * @param is the input stream to use
123     * @param encoding name of the encoding to use for file names
124     * @since 1.4
125     */
126    public TarArchiveInputStream(final InputStream is, final String encoding) {
127        this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE,
128             encoding);
129    }
130
131    /**
132     * Constructor for TarInputStream.
133     * @param is the input stream to use
134     * @param blockSize the block size to use
135     */
136    public TarArchiveInputStream(final InputStream is, final int blockSize) {
137        this(is, blockSize, TarConstants.DEFAULT_RCDSIZE);
138    }
139
140    /**
141     * Constructor for TarInputStream.
142     * @param is the input stream to use
143     * @param blockSize the block size to use
144     * @param encoding name of the encoding to use for file names
145     * @since 1.4
146     */
147    public TarArchiveInputStream(final InputStream is, final int blockSize,
148                                 final String encoding) {
149        this(is, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding);
150    }
151
152    /**
153     * Constructor for TarInputStream.
154     * @param is the input stream to use
155     * @param blockSize the block size to use
156     * @param recordSize the record size to use
157     */
158    public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize) {
159        this(is, blockSize, recordSize, null);
160    }
161
162    /**
163     * Constructor for TarInputStream.
164     * @param is the input stream to use
165     * @param blockSize the block size to use
166     * @param recordSize the record size to use
167     * @param encoding name of the encoding to use for file names
168     * @since 1.4
169     */
170    public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize,
171                                 final String encoding) {
172        this(is, blockSize, recordSize, encoding, false);
173    }
174
175    /**
176     * Constructor for TarInputStream.
177     * @param is the input stream to use
178     * @param blockSize the block size to use
179     * @param recordSize the record size to use
180     * @param encoding name of the encoding to use for file names
181     * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be
182     * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an
183     * exception instead.
184     * @since 1.19
185     */
186    public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize,
187                                 final String encoding, final boolean lenient) {
188        this.inputStream = is;
189        this.hasHitEOF = false;
190        this.encoding = encoding;
191        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
192        this.recordSize = recordSize;
193        this.recordBuffer = new byte[recordSize];
194        this.blockSize = blockSize;
195        this.lenient = lenient;
196    }
197
198    /**
199     * Closes this stream. Calls the TarBuffer's close() method.
200     * @throws IOException on error
201     */
202    @Override
203    public void close() throws IOException {
204        // Close all the input streams in sparseInputStreams
205        if(sparseInputStreams != null) {
206            for (final InputStream inputStream : sparseInputStreams) {
207                inputStream.close();
208            }
209        }
210
211        inputStream.close();
212    }
213
214    /**
215     * Get the record size being used by this stream's buffer.
216     *
217     * @return The TarBuffer record size.
218     */
219    public int getRecordSize() {
220        return recordSize;
221    }
222
223    /**
224     * Get the available data that can be read from the current
225     * entry in the archive. This does not indicate how much data
226     * is left in the entire archive, only in the current entry.
227     * This value is determined from the entry's size header field
228     * and the amount of data already read from the current entry.
229     * Integer.MAX_VALUE is returned in case more than Integer.MAX_VALUE
230     * bytes are left in the current entry in the archive.
231     *
232     * @return The number of available bytes for the current entry.
233     * @throws IOException for signature
234     */
235    @Override
236    public int available() throws IOException {
237        if (isDirectory()) {
238            return 0;
239        }
240
241        if (currEntry.getRealSize() - entryOffset > Integer.MAX_VALUE) {
242            return Integer.MAX_VALUE;
243        }
244        return (int) (currEntry.getRealSize() - entryOffset);
245    }
246
247
248    /**
249     * Skips over and discards <code>n</code> bytes of data from this input
250     * stream. The <code>skip</code> method may, for a variety of reasons, end
251     * up skipping over some smaller number of bytes, possibly <code>0</code>.
252     * This may result from any of a number of conditions; reaching end of file
253     * or end of entry before <code>n</code> bytes have been skipped; are only
254     * two possibilities. The actual number of bytes skipped is returned. If
255     * <code>n</code> is negative, no bytes are skipped.
256     *
257     *
258     * @param n
259     *            the number of bytes to be skipped.
260     * @return the actual number of bytes skipped.
261     * @throws IOException if a truncated tar archive is detected
262     *                     or some other I/O error occurs
263     */
264    @Override
265    public long skip(final long n) throws IOException {
266        if (n <= 0 || isDirectory()) {
267            return 0;
268        }
269
270        final long availableOfInputStream = inputStream.available();
271        final long available = currEntry.getRealSize() - entryOffset;
272        final long numToSkip = Math.min(n, available);
273        long skipped;
274
275        if (!currEntry.isSparse()) {
276            skipped = IOUtils.skip(inputStream, numToSkip);
277            // for non-sparse entry, we should get the bytes actually skipped bytes along with
278            // inputStream.available() if inputStream is instance of FileInputStream
279            skipped = getActuallySkipped(availableOfInputStream, skipped, numToSkip);
280        } else {
281            skipped = skipSparse(numToSkip);
282        }
283
284
285        count(skipped);
286        entryOffset += skipped;
287        return skipped;
288    }
289
290    /**
291     * Skip n bytes from current input stream, if the current input stream doesn't have enough data to skip,
292     * jump to the next input stream and skip the rest bytes, keep doing this until total n bytes are skipped
293     * or the input streams are all skipped
294     *
295     * @param n bytes of data to skip
296     * @return actual bytes of data skipped
297     * @throws IOException
298     */
299    private long skipSparse(final long n) throws IOException {
300        if (sparseInputStreams == null || sparseInputStreams.isEmpty()) {
301            return inputStream.skip(n);
302        }
303
304        long bytesSkipped = 0;
305
306        while (bytesSkipped < n && currentSparseInputStreamIndex < sparseInputStreams.size()) {
307            final InputStream  currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
308            bytesSkipped += currentInputStream.skip(n - bytesSkipped);
309
310            if (bytesSkipped < n) {
311                currentSparseInputStreamIndex++;
312            }
313        }
314
315        return bytesSkipped;
316    }
317
318    /**
319     * Since we do not support marking just yet, we return false.
320     *
321     * @return False.
322     */
323    @Override
324    public boolean markSupported() {
325        return false;
326    }
327
328    /**
329     * Since we do not support marking just yet, we do nothing.
330     *
331     * @param markLimit The limit to mark.
332     */
333    @Override
334    public synchronized void mark(final int markLimit) {
335    }
336
337    /**
338     * Since we do not support marking just yet, we do nothing.
339     */
340    @Override
341    public synchronized void reset() {
342    }
343
344    /**
345     * Get the next entry in this tar archive. This will skip
346     * over any remaining data in the current entry, if there
347     * is one, and place the input stream at the header of the
348     * next entry, and read the header and instantiate a new
349     * TarEntry from the header bytes and return that entry.
350     * If there are no more entries in the archive, null will
351     * be returned to indicate that the end of the archive has
352     * been reached.
353     *
354     * @return The next TarEntry in the archive, or null.
355     * @throws IOException on error
356     */
357    public TarArchiveEntry getNextTarEntry() throws IOException {
358        if (isAtEOF()) {
359            return null;
360        }
361
362        if (currEntry != null) {
363            /* Skip will only go to the end of the current entry */
364            IOUtils.skip(this, Long.MAX_VALUE);
365
366            /* skip to the end of the last record */
367            skipRecordPadding();
368        }
369
370        final byte[] headerBuf = getRecord();
371
372        if (headerBuf == null) {
373            /* hit EOF */
374            currEntry = null;
375            return null;
376        }
377
378        try {
379            currEntry = new TarArchiveEntry(headerBuf, zipEncoding, lenient);
380        } catch (final IllegalArgumentException e) {
381            throw new IOException("Error detected parsing the header", e);
382        }
383
384        entryOffset = 0;
385        entrySize = currEntry.getSize();
386
387        if (currEntry.isGNULongLinkEntry()) {
388            final byte[] longLinkData = getLongNameData();
389            if (longLinkData == null) {
390                // Bugzilla: 40334
391                // Malformed tar file - long link entry name not followed by
392                // entry
393                return null;
394            }
395            currEntry.setLinkName(zipEncoding.decode(longLinkData));
396        }
397
398        if (currEntry.isGNULongNameEntry()) {
399            final byte[] longNameData = getLongNameData();
400            if (longNameData == null) {
401                // Bugzilla: 40334
402                // Malformed tar file - long entry name not followed by
403                // entry
404                return null;
405            }
406
407            // COMPRESS-509 : the name of directories should end with '/'
408            final String name = zipEncoding.decode(longNameData);
409            currEntry.setName(name);
410            if (currEntry.isDirectory() && !name.endsWith("/")) {
411                currEntry.setName(name + "/");
412            }
413        }
414
415        if (currEntry.isGlobalPaxHeader()){ // Process Global Pax headers
416            readGlobalPaxHeaders();
417        }
418
419        try {
420            if (currEntry.isPaxHeader()){ // Process Pax headers
421                paxHeaders();
422            } else if (!globalPaxHeaders.isEmpty()) {
423                applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders);
424            }
425        } catch (final NumberFormatException e) {
426            throw new IOException("Error detected parsing the pax header", e);
427        }
428
429        if (currEntry.isOldGNUSparse()){ // Process sparse files
430            readOldGNUSparse();
431        }
432
433        // If the size of the next element in the archive has changed
434        // due to a new size being reported in the posix header
435        // information, we update entrySize here so that it contains
436        // the correct value.
437        entrySize = currEntry.getSize();
438
439        return currEntry;
440    }
441
442    /**
443     * The last record block should be written at the full size, so skip any
444     * additional space used to fill a record after an entry.
445     *
446     * @throws IOException if a truncated tar archive is detected
447     */
448    private void skipRecordPadding() throws IOException {
449        if (!isDirectory() && this.entrySize > 0 && this.entrySize % this.recordSize != 0) {
450            final long available = inputStream.available();
451            final long numRecords = (this.entrySize / this.recordSize) + 1;
452            final long padding = (numRecords * this.recordSize) - this.entrySize;
453            long skipped = IOUtils.skip(inputStream, padding);
454
455            skipped = getActuallySkipped(available, skipped, padding);
456
457            count(skipped);
458        }
459    }
460
461    /**
462     * For FileInputStream, the skip always return the number you input, so we
463     * need the available bytes to determine how many bytes are actually skipped
464     *
465     * @param available available bytes returned by inputStream.available()
466     * @param skipped   skipped bytes returned by inputStream.skip()
467     * @param expected  bytes expected to skip
468     * @return number of bytes actually skipped
469     * @throws IOException if a truncated tar archive is detected
470     */
471    private long getActuallySkipped(final long available, final long skipped, final long expected) throws IOException {
472        long actuallySkipped = skipped;
473        if (inputStream instanceof FileInputStream) {
474            actuallySkipped = Math.min(skipped, available);
475        }
476
477        if (actuallySkipped != expected) {
478            throw new IOException("Truncated TAR archive");
479        }
480
481        return actuallySkipped;
482    }
483
484    /**
485     * Get the next entry in this tar archive as longname data.
486     *
487     * @return The next entry in the archive as longname data, or null.
488     * @throws IOException on error
489     */
490    protected byte[] getLongNameData() throws IOException {
491        // read in the name
492        final ByteArrayOutputStream longName = new ByteArrayOutputStream();
493        int length = 0;
494        while ((length = read(smallBuf)) >= 0) {
495            longName.write(smallBuf, 0, length);
496        }
497        getNextEntry();
498        if (currEntry == null) {
499            // Bugzilla: 40334
500            // Malformed tar file - long entry name not followed by entry
501            return null;
502        }
503        byte[] longNameData = longName.toByteArray();
504        // remove trailing null terminator(s)
505        length = longNameData.length;
506        while (length > 0 && longNameData[length - 1] == 0) {
507            --length;
508        }
509        if (length != longNameData.length) {
510            final byte[] l = new byte[length];
511            System.arraycopy(longNameData, 0, l, 0, length);
512            longNameData = l;
513        }
514        return longNameData;
515    }
516
517    /**
518     * Get the next record in this tar archive. This will skip
519     * over any remaining data in the current entry, if there
520     * is one, and place the input stream at the header of the
521     * next entry.
522     *
523     * <p>If there are no more entries in the archive, null will be
524     * returned to indicate that the end of the archive has been
525     * reached.  At the same time the {@code hasHitEOF} marker will be
526     * set to true.</p>
527     *
528     * @return The next header in the archive, or null.
529     * @throws IOException on error
530     */
531    private byte[] getRecord() throws IOException {
532        byte[] headerBuf = readRecord();
533        setAtEOF(isEOFRecord(headerBuf));
534        if (isAtEOF() && headerBuf != null) {
535            tryToConsumeSecondEOFRecord();
536            consumeRemainderOfLastBlock();
537            headerBuf = null;
538        }
539        return headerBuf;
540    }
541
542    /**
543     * Determine if an archive record indicate End of Archive. End of
544     * archive is indicated by a record that consists entirely of null bytes.
545     *
546     * @param record The record data to check.
547     * @return true if the record data is an End of Archive
548     */
549    protected boolean isEOFRecord(final byte[] record) {
550        return record == null || ArchiveUtils.isArrayZero(record, recordSize);
551    }
552
553    /**
554     * Read a record from the input stream and return the data.
555     *
556     * @return The record data or null if EOF has been hit.
557     * @throws IOException on error
558     */
559    protected byte[] readRecord() throws IOException {
560        final int readNow = IOUtils.readFully(inputStream, recordBuffer);
561        count(readNow);
562        if (readNow != recordSize) {
563            return null;
564        }
565
566        return recordBuffer;
567    }
568
569    private void readGlobalPaxHeaders() throws IOException {
570        globalPaxHeaders = TarUtils.parsePaxHeaders(this, globalSparseHeaders, globalPaxHeaders, entrySize);
571        getNextEntry(); // Get the actual file entry
572
573        if (currEntry == null) {
574            throw new IOException("Error detected parsing the pax header");
575        }
576    }
577
578    /**
579     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes)
580     * may appear multi times, and they look like:
581     *
582     * GNU.sparse.size=size
583     * GNU.sparse.numblocks=numblocks
584     * repeat numblocks times
585     *   GNU.sparse.offset=offset
586     *   GNU.sparse.numbytes=numbytes
587     * end repeat
588     *
589     *
590     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
591     *
592     * GNU.sparse.map
593     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
594     *
595     *
596     * For PAX Format 1.X:
597     * The sparse map itself is stored in the file data block, preceding the actual file data.
598     * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary.
599     * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers
600     * giving the offset and size of the data block it describes.
601     * @throws IOException
602     */
603    private void paxHeaders() throws IOException {
604        List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
605        final Map<String, String> headers = TarUtils.parsePaxHeaders(this, sparseHeaders, globalPaxHeaders, entrySize);
606
607        // for 0.1 PAX Headers
608        if (headers.containsKey("GNU.sparse.map")) {
609            sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get("GNU.sparse.map")));
610        }
611        getNextEntry(); // Get the actual file entry
612        if (currEntry == null) {
613            throw new IOException("premature end of tar archive. Didn't find any entry after PAX header.");
614        }
615        applyPaxHeadersToCurrentEntry(headers, sparseHeaders);
616
617        // for 1.0 PAX Format, the sparse map is stored in the file data block
618        if (currEntry.isPaxGNU1XSparse()) {
619            sparseHeaders = TarUtils.parsePAX1XSparseHeaders(inputStream, recordSize);
620            currEntry.setSparseHeaders(sparseHeaders);
621        }
622
623        // sparse headers are all done reading, we need to build
624        // sparse input streams using these sparse headers
625        buildSparseInputStreams();
626    }
627
628    private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders)
629        throws IOException {
630        currEntry.updateEntryFromPaxHeaders(headers);
631        currEntry.setSparseHeaders(sparseHeaders);
632    }
633
634    /**
635     * Adds the sparse chunks from the current entry to the sparse chunks,
636     * including any additional sparse entries following the current entry.
637     *
638     * @throws IOException on error
639     */
640    private void readOldGNUSparse() throws IOException {
641        if (currEntry.isExtended()) {
642            TarArchiveSparseEntry entry;
643            do {
644                final byte[] headerBuf = getRecord();
645                if (headerBuf == null) {
646                    throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag.");
647                }
648                entry = new TarArchiveSparseEntry(headerBuf);
649                currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
650            } while (entry.isExtended());
651        }
652
653        // sparse headers are all done reading, we need to build
654        // sparse input streams using these sparse headers
655        buildSparseInputStreams();
656    }
657
658    private boolean isDirectory() {
659        return currEntry != null && currEntry.isDirectory();
660    }
661
662    /**
663     * Returns the next Archive Entry in this Stream.
664     *
665     * @return the next entry,
666     *         or {@code null} if there are no more entries
667     * @throws IOException if the next entry could not be read
668     */
669    @Override
670    public ArchiveEntry getNextEntry() throws IOException {
671        return getNextTarEntry();
672    }
673
674    /**
675     * Tries to read the next record rewinding the stream if it is not a EOF record.
676     *
677     * <p>This is meant to protect against cases where a tar
678     * implementation has written only one EOF record when two are
679     * expected.  Actually this won't help since a non-conforming
680     * implementation likely won't fill full blocks consisting of - by
681     * default - ten records either so we probably have already read
682     * beyond the archive anyway.</p>
683     */
684    private void tryToConsumeSecondEOFRecord() throws IOException {
685        boolean shouldReset = true;
686        final boolean marked = inputStream.markSupported();
687        if (marked) {
688            inputStream.mark(recordSize);
689        }
690        try {
691            shouldReset = !isEOFRecord(readRecord());
692        } finally {
693            if (shouldReset && marked) {
694                pushedBackBytes(recordSize);
695                inputStream.reset();
696            }
697        }
698    }
699
700    /**
701     * Reads bytes from the current tar archive entry.
702     *
703     * This method is aware of the boundaries of the current
704     * entry in the archive and will deal with them as if they
705     * were this stream's start and EOF.
706     *
707     * @param buf The buffer into which to place bytes read.
708     * @param offset The offset at which to place bytes read.
709     * @param numToRead The number of bytes to read.
710     * @return The number of bytes read, or -1 at EOF.
711     * @throws IOException on error
712     */
713    @Override
714    public int read(final byte[] buf, final int offset, int numToRead) throws IOException {
715        if (numToRead == 0) {
716            return 0;
717        }
718        int totalRead = 0;
719
720        if (isAtEOF() || isDirectory()) {
721            return -1;
722        }
723
724        if (currEntry == null) {
725            throw new IllegalStateException("No current tar entry");
726        }
727
728        if (entryOffset >= currEntry.getRealSize()) {
729            return -1;
730        }
731
732        numToRead = Math.min(numToRead, available());
733
734        if (currEntry.isSparse()) {
735            // for sparse entries, we need to read them in another way
736            totalRead = readSparse(buf, offset, numToRead);
737        } else {
738            totalRead = inputStream.read(buf, offset, numToRead);
739        }
740
741        if (totalRead == -1) {
742            if (numToRead > 0) {
743                throw new IOException("Truncated TAR archive");
744            }
745            setAtEOF(true);
746        } else {
747            count(totalRead);
748            entryOffset += totalRead;
749        }
750
751        return totalRead;
752    }
753
754    /**
755     * For sparse tar entries, there are many "holes"(consisting of all 0) in the file. Only the non-zero data is
756     * stored in tar files, and they are stored separately. The structure of non-zero data is introduced by the
757     * sparse headers using the offset, where a block of non-zero data starts, and numbytes, the length of the
758     * non-zero data block.
759     * When reading sparse entries, the actual data is read out with "holes" and non-zero data combined together
760     * according to the sparse headers.
761     *
762     * @param buf The buffer into which to place bytes read.
763     * @param offset The offset at which to place bytes read.
764     * @param numToRead The number of bytes to read.
765     * @return The number of bytes read, or -1 at EOF.
766     * @throws IOException on error
767     */
768    private int readSparse(final byte[] buf, final int offset, final int numToRead) throws IOException {
769        // if there are no actual input streams, just read from the original input stream
770        if (sparseInputStreams == null || sparseInputStreams.isEmpty()) {
771            return inputStream.read(buf, offset, numToRead);
772        }
773
774        if (currentSparseInputStreamIndex >= sparseInputStreams.size()) {
775            return -1;
776        }
777
778        final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
779        final int readLen = currentInputStream.read(buf, offset, numToRead);
780
781        // if the current input stream is the last input stream,
782        // just return the number of bytes read from current input stream
783        if (currentSparseInputStreamIndex == sparseInputStreams.size() - 1) {
784            return readLen;
785        }
786
787        // if EOF of current input stream is meet, open a new input stream and recursively call read
788        if (readLen == -1) {
789            currentSparseInputStreamIndex++;
790            return readSparse(buf, offset, numToRead);
791        }
792
793        // if the rest data of current input stream is not long enough, open a new input stream
794        // and recursively call read
795        if (readLen < numToRead) {
796            currentSparseInputStreamIndex++;
797            final int readLenOfNext = readSparse(buf, offset + readLen, numToRead - readLen);
798            if (readLenOfNext == -1) {
799                return readLen;
800            }
801
802            return readLen + readLenOfNext;
803        }
804
805        // if the rest data of current input stream is enough(which means readLen == len), just return readLen
806        return readLen;
807    }
808
809    /**
810     * Whether this class is able to read the given entry.
811     *
812     * @return The implementation will return true if the {@link ArchiveEntry} is an instance of {@link TarArchiveEntry}
813     */
814    @Override
815    public boolean canReadEntryData(final ArchiveEntry ae) {
816        return ae instanceof TarArchiveEntry;
817    }
818
819    /**
820     * Get the current TAR Archive Entry that this input stream is processing
821     *
822     * @return The current Archive Entry
823     */
824    public TarArchiveEntry getCurrentEntry() {
825        return currEntry;
826    }
827
828    protected final void setCurrentEntry(final TarArchiveEntry e) {
829        currEntry = e;
830    }
831
832    protected final boolean isAtEOF() {
833        return hasHitEOF;
834    }
835
836    protected final void setAtEOF(final boolean b) {
837        hasHitEOF = b;
838    }
839
840    /**
841     * This method is invoked once the end of the archive is hit, it
842     * tries to consume the remaining bytes under the assumption that
843     * the tool creating this archive has padded the last block.
844     */
845    private void consumeRemainderOfLastBlock() throws IOException {
846        final long bytesReadOfLastBlock = getBytesRead() % blockSize;
847        if (bytesReadOfLastBlock > 0) {
848            final long skipped = IOUtils.skip(inputStream, blockSize - bytesReadOfLastBlock);
849            count(skipped);
850        }
851    }
852
853    /**
854     * Checks if the signature matches what is expected for a tar file.
855     *
856     * @param signature
857     *            the bytes to check
858     * @param length
859     *            the number of bytes to check
860     * @return true, if this stream is a tar archive stream, false otherwise
861     */
862    public static boolean matches(final byte[] signature, final int length) {
863        if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) {
864            return false;
865        }
866
867        if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX,
868                signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
869            &&
870            ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX,
871                signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
872                ){
873            return true;
874        }
875        if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU,
876                signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
877            &&
878            (
879             ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE,
880                signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
881            ||
882            ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO,
883                signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
884            )
885                ){
886            return true;
887        }
888        // COMPRESS-107 - recognise Ant tar files
889        return ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT,
890                signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
891                &&
892                ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT,
893                        signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN);
894    }
895
896    /**
897     * Build the input streams consisting of all-zero input streams and non-zero input streams.
898     * When reading from the non-zero input streams, the data is actually read from the original input stream.
899     * The size of each input stream is introduced by the sparse headers.
900     *
901     * NOTE : Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the
902     *        0 size input streams because they are meaningless.
903     */
904    private void buildSparseInputStreams() throws IOException {
905        currentSparseInputStreamIndex = -1;
906        sparseInputStreams = new ArrayList<>();
907
908        final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders();
909
910        // Stream doesn't need to be closed at all as it doesn't use any resources
911        final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); //NOSONAR
912        // logical offset into the extracted entry
913        long offset = 0;
914        for (final TarArchiveStructSparse sparseHeader : sparseHeaders) {
915            final long zeroBlockSize = sparseHeader.getOffset() - offset;
916            if (zeroBlockSize < 0) {
917                // sparse header says to move backwards inside of the extracted entry
918                throw new IOException("Corrupted struct sparse detected");
919            }
920
921            // only store the zero block if it is not empty
922            if (zeroBlockSize > 0) {
923                sparseInputStreams.add(new BoundedInputStream(zeroInputStream, sparseHeader.getOffset() - offset));
924            }
925
926            // only store the input streams with non-zero size
927            if (sparseHeader.getNumbytes() > 0) {
928                sparseInputStreams.add(new BoundedInputStream(inputStream, sparseHeader.getNumbytes()));
929            }
930
931            offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
932        }
933
934        if (!sparseInputStreams.isEmpty()) {
935            currentSparseInputStreamIndex = 0;
936        }
937    }
938}