001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.tar;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.UncheckedIOException;
024import java.math.BigInteger;
025import java.nio.ByteBuffer;
026import java.nio.charset.Charset;
027import java.nio.charset.StandardCharsets;
028import java.util.ArrayList;
029import java.util.Collections;
030import java.util.HashMap;
031import java.util.List;
032import java.util.Map;
033
034import org.apache.commons.compress.archivers.zip.ZipEncoding;
035import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
036import org.apache.commons.compress.utils.IOUtils;
037import org.apache.commons.compress.utils.ParsingUtils;
038import org.apache.commons.io.output.ByteArrayOutputStream;
039
040/**
041 * This class provides static utility methods to work with byte streams.
042 *
043 * @Immutable
044 */
045// CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
046public class TarUtils {
047
048    private static final int BYTE_MASK = 255;
049
050    static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(Charset.defaultCharset());
051
052    /**
053     * Encapsulates the algorithms used up to Commons Compress 1.3 as ZipEncoding.
054     */
055    static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
056
057        @Override
058        public boolean canEncode(final String name) {
059            return true;
060        }
061
062        @Override
063        public String decode(final byte[] buffer) {
064            final int length = buffer.length;
065            final StringBuilder result = new StringBuilder(length);
066            for (final byte b : buffer) {
067                if (b == 0) { // Trailing null
068                    break;
069                }
070                result.append((char) (b & 0xFF)); // Allow for sign-extension
071            }
072            return result.toString();
073        }
074
075        @Override
076        public ByteBuffer encode(final String name) {
077            final int length = name.length();
078            final byte[] buf = new byte[length];
079            // copy until end of input or output is reached.
080            for (int i = 0; i < length; ++i) {
081                buf[i] = (byte) name.charAt(i);
082            }
083            return ByteBuffer.wrap(buf);
084        }
085    };
086
087    /**
088     * Computes the checksum of a tar entry header.
089     *
090     * @param buf The tar entry's header buffer.
091     * @return The computed checksum.
092     */
093    public static long computeCheckSum(final byte[] buf) {
094        long sum = 0;
095        for (final byte element : buf) {
096            sum += BYTE_MASK & element;
097        }
098        return sum;
099    }
100
101    // Helper method to generate the exception message
102    private static String exceptionMessage(final byte[] buffer, final int offset, final int length, final int current, final byte currentByte) {
103        // default charset is good enough for an exception message,
104        //
105        // the alternative was to modify parseOctal and
106        // parseOctalOrBinary to receive the ZipEncoding of the
107        // archive (deprecating the existing public methods, of
108        // course) and dealing with the fact that ZipEncoding#decode
109        // can throw an IOException which parseOctal* doesn't declare
110        String string = new String(buffer, offset, length, Charset.defaultCharset());
111
112        string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed
113        return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length;
114    }
115
116    private static void formatBigIntegerBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) {
117        final BigInteger val = BigInteger.valueOf(value);
118        final byte[] b = val.toByteArray();
119        final int len = b.length;
120        if (len > length - 1) {
121            throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field.");
122        }
123        final int off = offset + length - len;
124        System.arraycopy(b, 0, buf, off, len);
125        final byte fill = (byte) (negative ? 0xff : 0);
126        for (int i = offset + 1; i < off; i++) {
127            buf[i] = fill;
128        }
129    }
130
131    /**
132     * Writes an octal value into a buffer.
133     *
134     * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by NUL and then
135     * space.
136     *
137     * @param value  The value to convert
138     * @param buf    The destination buffer
139     * @param offset The starting offset into the buffer.
140     * @param length The size of the buffer.
141     * @return The updated value of offset, i.e. offset+length
142     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
143     */
144    public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
145        int idx = length - 2; // for NUL and space
146        formatUnsignedOctalString(value, buf, offset, idx);
147        buf[offset + idx++] = 0; // Trailing null
148        buf[offset + idx] = (byte) ' '; // Trailing space
149        return offset + length;
150    }
151
152    private static void formatLongBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) {
153        final int bits = (length - 1) * 8;
154        final long max = 1L << bits;
155        long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE
156        if (val < 0 || val >= max) {
157            throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field.");
158        }
159        if (negative) {
160            val ^= max - 1;
161            val++;
162            val |= 0xffL << bits;
163        }
164        for (int i = offset + length - 1; i >= offset; i--) {
165            buf[i] = (byte) val;
166            val >>= 8;
167        }
168    }
169
170    /**
171     * Writes an octal long integer into a buffer.
172     *
173     * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space.
174     *
175     * @param value  The value to write as octal
176     * @param buf    The destinationbuffer.
177     * @param offset The starting offset into the buffer.
178     * @param length The length of the buffer
179     * @return The updated offset
180     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
181     */
182    public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
183        final int idx = length - 1; // For space
184        formatUnsignedOctalString(value, buf, offset, idx);
185        buf[offset + idx] = (byte) ' '; // Trailing space
186        return offset + length;
187    }
188
189    /**
190     * Writes a long integer into a buffer as an octal string if this will fit, or as a binary number otherwise.
191     *
192     * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space.
193     *
194     * @param value  The value to write into the buffer.
195     * @param buf    The destination buffer.
196     * @param offset The starting offset into the buffer.
197     * @param length The length of the buffer.
198     * @return The updated offset.
199     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer.
200     * @since 1.4
201     */
202    public static int formatLongOctalOrBinaryBytes(final long value, final byte[] buf, final int offset, final int length) {
203        // Check whether we are dealing with UID/GID or SIZE field
204        final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
205        final boolean negative = value < 0;
206        if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
207            return formatLongOctalBytes(value, buf, offset, length);
208        }
209        if (length < 9) {
210            formatLongBinary(value, buf, offset, length, negative);
211        } else {
212            formatBigIntegerBinary(value, buf, offset, length, negative);
213        }
214        buf[offset] = (byte) (negative ? 0xff : 0x80);
215        return offset + length;
216    }
217
218    /**
219     * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the
220     * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated.
221     *
222     * @param name   The header name from which to copy the characters.
223     * @param buf    The buffer where the name is to be stored.
224     * @param offset The starting offset into the buffer
225     * @param length The maximum number of header bytes to copy.
226     * @return The updated offset, i.e. offset + length
227     */
228    public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
229        try {
230            return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
231        } catch (final IOException ex) { // NOSONAR
232            try {
233                return formatNameBytes(name, buf, offset, length, FALLBACK_ENCODING);
234            } catch (final IOException ex2) {
235                // impossible
236                throw new UncheckedIOException(ex2); // NOSONAR
237            }
238        }
239    }
240
241    /**
242     * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the
243     * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated.
244     *
245     * @param name     The header name from which to copy the characters.
246     * @param buf      The buffer where the name is to be stored.
247     * @param offset   The starting offset into the buffer
248     * @param length   The maximum number of header bytes to copy.
249     * @param encoding name of the encoding to use for file names
250     * @since 1.4
251     * @return The updated offset, i.e. offset + length
252     * @throws IOException on error
253     */
254    public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length, final ZipEncoding encoding) throws IOException {
255        int len = name.length();
256        ByteBuffer b = encoding.encode(name);
257        while (b.limit() > length && len > 0) {
258            b = encoding.encode(name.substring(0, --len));
259        }
260        final int limit = b.limit() - b.position();
261        System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
262        // Pad any remaining output bytes with NUL
263        for (int i = limit; i < length; ++i) {
264            buf[offset + i] = 0;
265        }
266        return offset + length;
267    }
268
269    /**
270     * Writes an octal integer into a buffer.
271     *
272     * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by space and NUL
273     *
274     * @param value  The value to write
275     * @param buf    The buffer to receive the output
276     * @param offset The starting offset into the buffer
277     * @param length The size of the output buffer
278     * @return The updated offset, i.e. offset+length
279     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
280     */
281    public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
282        int idx = length - 2; // For space and trailing null
283        formatUnsignedOctalString(value, buf, offset, idx);
284        buf[offset + idx++] = (byte) ' '; // Trailing space
285        buf[offset + idx] = 0; // Trailing null
286        return offset + length;
287    }
288
289    /**
290     * Fills a buffer with unsigned octal number, padded with leading zeroes.
291     *
292     * @param value  number to convert to octal - treated as unsigned
293     * @param buffer destination buffer
294     * @param offset starting offset in buffer
295     * @param length length of buffer to fill
296     * @throws IllegalArgumentException if the value will not fit in the buffer
297     */
298    public static void formatUnsignedOctalString(final long value, final byte[] buffer, final int offset, final int length) {
299        int remaining = length;
300        remaining--;
301        if (value == 0) {
302            buffer[offset + remaining--] = (byte) '0';
303        } else {
304            long val = value;
305            for (; remaining >= 0 && val != 0; --remaining) {
306                // CheckStyle:MagicNumber OFF
307                buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
308                val = val >>> 3;
309                // CheckStyle:MagicNumber ON
310            }
311            if (val != 0) {
312                throw new IllegalArgumentException(value + "=" + Long.toOctalString(value) + " will not fit in octal number buffer of length " + length);
313            }
314        }
315
316        for (; remaining >= 0; --remaining) { // leading zeros
317            buffer[offset + remaining] = (byte) '0';
318        }
319    }
320
321    private static long parseBinaryBigInteger(final byte[] buffer, final int offset, final int length, final boolean negative) {
322        final byte[] remainder = new byte[length - 1];
323        System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
324        BigInteger val = new BigInteger(remainder);
325        if (negative) {
326            // 2's complement
327            val = val.add(BigInteger.valueOf(-1)).not();
328        }
329        if (val.bitLength() > 63) {
330            throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number exceeds maximum signed long value");
331        }
332        return negative ? -val.longValue() : val.longValue();
333    }
334
335    private static long parseBinaryLong(final byte[] buffer, final int offset, final int length, final boolean negative) {
336        if (length >= 9) {
337            throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number exceeds maximum signed long value");
338        }
339        long val = 0;
340        for (int i = 1; i < length; i++) {
341            val = (val << 8) + (buffer[offset + i] & 0xff);
342        }
343        if (negative) {
344            // 2's complement
345            val--;
346            val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
347        }
348        return negative ? -val : val;
349    }
350
351    /**
352     * Parses a boolean byte from a buffer. Leading spaces and NUL are ignored. The buffer may contain trailing spaces or NULs.
353     *
354     * @param buffer The buffer from which to parse.
355     * @param offset The offset into the buffer from which to parse.
356     * @return The boolean value of the bytes.
357     * @throws IllegalArgumentException if an invalid byte is detected.
358     */
359    public static boolean parseBoolean(final byte[] buffer, final int offset) {
360        return buffer[offset] == 1;
361    }
362
363    /**
364     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map GNU.sparse.map Map of non-null data chunks. It is a string
365     * consisting of comma-separated values "offset,size[,offset-1,size-1...]"
366     *
367     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
368     * @return unmodifiable list of sparse headers parsed from sparse map
369     * @throws IOException Corrupted TAR archive.
370     * @since 1.21
371     */
372    protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(final String sparseMap) throws IOException {
373        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
374        final String[] sparseHeaderStrings = sparseMap.split(",");
375        if (sparseHeaderStrings.length % 2 == 1) {
376            throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header");
377        }
378        for (int i = 0; i < sparseHeaderStrings.length; i += 2) {
379            final long sparseOffset = ParsingUtils.parseLongValue(sparseHeaderStrings[i]);
380            if (sparseOffset < 0) {
381                throw new IOException("Corrupted TAR archive. Sparse struct offset contains negative value");
382            }
383            final long sparseNumbytes = ParsingUtils.parseLongValue(sparseHeaderStrings[i + 1]);
384            if (sparseNumbytes < 0) {
385                throw new IOException("Corrupted TAR archive. Sparse struct numbytes contains negative value");
386            }
387            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
388        }
389        return Collections.unmodifiableList(sparseHeaders);
390    }
391
392    /**
393     * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached.
394     *
395     * @param buffer The buffer from which to parse.
396     * @param offset The offset into the buffer from which to parse.
397     * @param length The maximum number of bytes to parse.
398     * @return The entry name.
399     */
400    public static String parseName(final byte[] buffer, final int offset, final int length) {
401        try {
402            return parseName(buffer, offset, length, DEFAULT_ENCODING);
403        } catch (final IOException ex) { // NOSONAR
404            try {
405                return parseName(buffer, offset, length, FALLBACK_ENCODING);
406            } catch (final IOException ex2) {
407                // impossible
408                throw new UncheckedIOException(ex2); // NOSONAR
409            }
410        }
411    }
412
413    /**
414     * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached.
415     *
416     * @param buffer   The buffer from which to parse.
417     * @param offset   The offset into the buffer from which to parse.
418     * @param length   The maximum number of bytes to parse.
419     * @param encoding name of the encoding to use for file names
420     * @since 1.4
421     * @return The entry name.
422     * @throws IOException on error
423     */
424    public static String parseName(final byte[] buffer, final int offset, final int length, final ZipEncoding encoding) throws IOException {
425        int len = 0;
426        for (int i = offset; len < length && buffer[i] != 0; i++) {
427            len++;
428        }
429        if (len > 0) {
430            final byte[] b = new byte[len];
431            System.arraycopy(buffer, offset, b, 0, len);
432            return encoding.decode(b);
433        }
434        return "";
435    }
436
437    /**
438     * Parses an octal string from a buffer.
439     *
440     * <p>
441     * Leading spaces are ignored. The buffer must contain a trailing space or NUL, and may contain an additional trailing space or NUL.
442     * </p>
443     *
444     * <p>
445     * The input buffer is allowed to contain all NULs, in which case the method returns 0L (this allows for missing fields).
446     * </p>
447     *
448     * <p>
449     * To work-around some tar implementations that insert a leading NUL this method returns 0 if it detects a leading NUL since Commons Compress 1.4.
450     * </p>
451     *
452     * @param buffer The buffer from which to parse.
453     * @param offset The offset into the buffer from which to parse.
454     * @param length The maximum number of bytes to parse - must be at least 2 bytes.
455     * @return The long value of the octal string.
456     * @throws IllegalArgumentException if the trailing space/NUL is missing or if an invalid byte is detected.
457     */
458    public static long parseOctal(final byte[] buffer, final int offset, final int length) {
459        long result = 0;
460        int end = offset + length;
461        int start = offset;
462        if (length < 2) {
463            throw new IllegalArgumentException("Length " + length + " must be at least 2");
464        }
465        if (buffer[start] == 0) {
466            return 0L;
467        }
468        // Skip leading spaces
469        while (start < end) {
470            if (buffer[start] != ' ') {
471                break;
472            }
473            start++;
474        }
475        // Trim all trailing NULs and spaces.
476        // The ustar and POSIX tar specs require a trailing NUL or
477        // space but some implementations use the extra digit for big
478        // sizes/uids/gids ...
479        byte trailer = buffer[end - 1];
480        while (start < end && (trailer == 0 || trailer == ' ')) {
481            end--;
482            trailer = buffer[end - 1];
483        }
484        for (; start < end; start++) {
485            final byte currentByte = buffer[start];
486            // CheckStyle:MagicNumber OFF
487            if (currentByte < '0' || currentByte > '7') {
488                throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte));
489            }
490            result = (result << 3) + (currentByte - '0'); // convert from ASCII
491            // CheckStyle:MagicNumber ON
492        }
493        return result;
494    }
495
496    /**
497     * Computes the value contained in a byte buffer. If the most significant bit of the first byte in the buffer is set, this bit is ignored and the rest of
498     * the buffer is interpreted as a binary number. Otherwise, the buffer is interpreted as an octal number as per the parseOctal function above.
499     *
500     * @param buffer The buffer from which to parse.
501     * @param offset The offset into the buffer from which to parse.
502     * @param length The maximum number of bytes to parse.
503     * @return The long value of the octal or binary string.
504     * @throws IllegalArgumentException if the trailing space/NUL is missing or an invalid byte is detected in an octal number, or if a binary number would
505     *                                  exceed the size of a signed long 64-bit integer.
506     * @since 1.4
507     */
508    public static long parseOctalOrBinary(final byte[] buffer, final int offset, final int length) {
509        if ((buffer[offset] & 0x80) == 0) {
510            return parseOctal(buffer, offset, length);
511        }
512        final boolean negative = buffer[offset] == (byte) 0xff;
513        if (length < 9) {
514            return parseBinaryLong(buffer, offset, length, negative);
515        }
516        return parseBinaryBigInteger(buffer, offset, length, negative);
517    }
518
519    /**
520     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
521     *
522     * <p>
523     * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
524     * </p>
525     * <p>
526     * Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You should use
527     * {@link #parseFromPAX01SparseHeaders} directly instead.
528     * </p>
529     *
530     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
531     * @return sparse headers parsed from sparse map
532     * @deprecated use #parseFromPAX01SparseHeaders instead
533     */
534    @Deprecated
535    protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(final String sparseMap) {
536        try {
537            return parseFromPAX01SparseHeaders(sparseMap);
538        } catch (final IOException ex) {
539            throw new UncheckedIOException(ex.getMessage(), ex);
540        }
541    }
542
543    /**
544     * For PAX Format 1.X: The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers
545     * delimited by newlines. The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are
546     * map entries, each one consisting of two numbers giving the offset and size of the data block it describes.
547     *
548     * @param inputStream parsing source.
549     * @param recordSize  The size the TAR header
550     * @return sparse headers
551     * @throws IOException if an I/O error occurs.
552     */
553    protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException {
554        // for 1.X PAX Headers
555        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
556        long bytesRead = 0;
557        long[] readResult = readLineOfNumberForPax1X(inputStream);
558        long sparseHeadersCount = readResult[0];
559        if (sparseHeadersCount < 0) {
560            // overflow while reading number?
561            throw new IOException("Corrupted TAR archive. Negative value in sparse headers block");
562        }
563        bytesRead += readResult[1];
564        while (sparseHeadersCount-- > 0) {
565            readResult = readLineOfNumberForPax1X(inputStream);
566            final long sparseOffset = readResult[0];
567            if (sparseOffset < 0) {
568                throw new IOException("Corrupted TAR archive. Sparse header block offset contains negative value");
569            }
570            bytesRead += readResult[1];
571
572            readResult = readLineOfNumberForPax1X(inputStream);
573            final long sparseNumbytes = readResult[0];
574            if (sparseNumbytes < 0) {
575                throw new IOException("Corrupted TAR archive. Sparse header block numbytes contains negative value");
576            }
577            bytesRead += readResult[1];
578            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
579        }
580        // skip the rest of this record data
581        final long bytesToSkip = recordSize - bytesRead % recordSize;
582        org.apache.commons.io.IOUtils.skip(inputStream, bytesToSkip);
583        return sparseHeaders;
584    }
585
586    /**
587     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
588     *
589     * <pre>
590     * GNU.sparse.size=size
591     * GNU.sparse.numblocks=numblocks
592     * repeat numblocks times
593     *   GNU.sparse.offset=offset
594     *   GNU.sparse.numbytes=numbytes
595     * end repeat
596     * </pre>
597     * <p>
598     * For PAX Format 0.1, the sparse headers are stored in a single variable: GNU.sparse.map
599     * </p>
600     * <p>
601     * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
602     * </p>
603     *
604     * @param inputStream      input stream to read keys and values
605     * @param sparseHeaders    used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map
606     * @param globalPaxHeaders global PAX headers of the tar archive
607     * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
608     * @throws IOException if an I/O error occurs.
609     * @deprecated use the four-arg version instead
610     */
611    @Deprecated
612    protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders,
613            final Map<String, String> globalPaxHeaders) throws IOException {
614        return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1);
615    }
616
617    /**
618     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
619     *
620     * <pre>
621     * GNU.sparse.size=size
622     * GNU.sparse.numblocks=numblocks
623     * repeat numblocks times
624     *   GNU.sparse.offset=offset
625     *   GNU.sparse.numbytes=numbytes
626     * end repeat
627     * </pre>
628     * <p>
629     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
630     * </p>
631     * <p>
632     * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
633     * </p>
634     *
635     * @param inputStream      input stream to read keys and values
636     * @param sparseHeaders    used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map
637     * @param globalPaxHeaders global PAX headers of the tar archive
638     * @param headerSize       total size of the PAX header, will be ignored if negative
639     * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
640     * @throws IOException if an I/O error occurs.
641     * @since 1.21
642     */
643    protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders,
644            final Map<String, String> globalPaxHeaders, final long headerSize) throws IOException {
645        final Map<String, String> headers = new HashMap<>(globalPaxHeaders);
646        Long offset = null;
647        // Format is "length keyword=value\n";
648        int totalRead = 0;
649        while (true) { // get length
650            int ch;
651            int len = 0;
652            int read = 0;
653            while ((ch = inputStream.read()) != -1) {
654                read++;
655                totalRead++;
656                if (ch == '\n') { // blank line in header
657                    break;
658                }
659                if (ch == ' ') { // End of length string
660                    // Get keyword
661                    final ByteArrayOutputStream coll = new ByteArrayOutputStream();
662                    while ((ch = inputStream.read()) != -1) {
663                        read++;
664                        totalRead++;
665                        if (totalRead < 0 || headerSize >= 0 && totalRead >= headerSize) {
666                            break;
667                        }
668                        if (ch == '=') { // end of keyword
669                            final String keyword = coll.toString(StandardCharsets.UTF_8);
670                            // Get rest of entry
671                            final int restLen = len - read;
672                            if (restLen <= 1) { // only NL
673                                headers.remove(keyword);
674                            } else if (headerSize >= 0 && restLen > headerSize - totalRead) {
675                                throw new IOException("Paxheader value size " + restLen + " exceeds size of header record");
676                            } else {
677                                final byte[] rest = IOUtils.readRange(inputStream, restLen);
678                                final int got = rest.length;
679                                if (got != restLen) {
680                                    throw new IOException("Failed to read Paxheader. Expected " + restLen + " bytes, read " + got);
681                                }
682                                totalRead += restLen;
683                                // Drop trailing NL
684                                if (rest[restLen - 1] != '\n') {
685                                    throw new IOException("Failed to read Paxheader." + "Value should end with a newline");
686                                }
687                                final String value = new String(rest, 0, restLen - 1, StandardCharsets.UTF_8);
688                                headers.put(keyword, value);
689
690                                // for 0.0 PAX Headers
691                                if (keyword.equals(TarGnuSparseKeys.OFFSET)) {
692                                    if (offset != null) {
693                                        // previous GNU.sparse.offset header but no numBytes
694                                        sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
695                                    }
696                                    try {
697                                        offset = Long.valueOf(value);
698                                    } catch (final NumberFormatException ex) {
699                                        throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains a non-numeric value");
700                                    }
701                                    if (offset < 0) {
702                                        throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains negative value");
703                                    }
704                                }
705
706                                // for 0.0 PAX Headers
707                                if (keyword.equals(TarGnuSparseKeys.NUMBYTES)) {
708                                    if (offset == null) {
709                                        throw new IOException(
710                                                "Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " is expected before GNU.sparse.numbytes shows up.");
711                                    }
712                                    final long numbytes = ParsingUtils.parseLongValue(value);
713                                    if (numbytes < 0) {
714                                        throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.NUMBYTES + " contains negative value");
715                                    }
716                                    sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes));
717                                    offset = null;
718                                }
719                            }
720                            break;
721                        }
722                        coll.write((byte) ch);
723                    }
724                    break; // Processed single header
725                }
726                // COMPRESS-530 : throw if we encounter a non-number while reading length
727                if (ch < '0' || ch > '9') {
728                    throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length");
729                }
730                len *= 10;
731                len += ch - '0';
732            }
733            if (ch == -1) { // EOF
734                break;
735            }
736        }
737        if (offset != null) {
738            // offset but no numBytes
739            sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
740        }
741        return headers;
742    }
743
744    /**
745     * Parses the content of a PAX 1.0 sparse block.
746     *
747     * @since 1.20
748     * @param buffer The buffer from which to parse.
749     * @param offset The offset into the buffer from which to parse.
750     * @return a parsed sparse struct
751     */
752    public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) {
753        final long sparseOffset = parseOctalOrBinary(buffer, offset, TarConstants.SPARSE_OFFSET_LEN);
754        final long sparseNumbytes = parseOctalOrBinary(buffer, offset + TarConstants.SPARSE_OFFSET_LEN, TarConstants.SPARSE_NUMBYTES_LEN);
755        return new TarArchiveStructSparse(sparseOffset, sparseNumbytes);
756    }
757
758    /**
759     * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers
760     * delimited by newlines.
761     *
762     * @param inputStream the input stream of the tar file
763     * @return the decimal number delimited by '\n', and the bytes read from input stream
764     * @throws IOException
765     */
766    private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException {
767        int number;
768        long result = 0;
769        long bytesRead = 0;
770        while ((number = inputStream.read()) != '\n') {
771            bytesRead += 1;
772            if (number == -1) {
773                throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format");
774            }
775            if (number < '0' || number > '9') {
776                throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block");
777            }
778            result = result * 10 + (number - '0');
779        }
780        bytesRead += 1;
781        return new long[] { result, bytesRead };
782    }
783
784    /**
785     * @since 1.21
786     */
787    static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries) throws IOException {
788        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
789        for (int i = 0; i < entries; i++) {
790            try {
791                final TarArchiveStructSparse sparseHeader = parseSparse(buffer,
792                        offset + i * (TarConstants.SPARSE_OFFSET_LEN + TarConstants.SPARSE_NUMBYTES_LEN));
793                if (sparseHeader.getOffset() < 0) {
794                    throw new IOException("Corrupted TAR archive, sparse entry with negative offset");
795                }
796                if (sparseHeader.getNumbytes() < 0) {
797                    throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes");
798                }
799                sparseHeaders.add(sparseHeader);
800            } catch (final IllegalArgumentException ex) {
801                // thrown internally by parseOctalOrBinary
802                throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex);
803            }
804        }
805        return Collections.unmodifiableList(sparseHeaders);
806    }
807
808    /**
809     * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>: <blockquote> The checksum is calculated by taking the sum of the
810     * unsigned byte values of the header block with the eight checksum bytes taken to be ASCII spaces (decimal value 32). It is stored as a six digit octal
811     * number with leading zeroes followed by a NUL and then a space. Various implementations do not adhere to this format. For better compatibility, ignore
812     * leading and trailing whitespace, and get the first six digits. In addition, some historic tar implementations treated bytes as signed. Implementations
813     * typically calculate the checksum both ways, and treat it as good if either the signed or unsigned sum matches the included checksum. </blockquote>
814     * <p>
815     * The return value of this method should be treated as a best-effort heuristic rather than an absolute and final truth. The checksum verification logic may
816     * well evolve over time as more special cases are encountered.
817     * </p>
818     *
819     * @param header tar header
820     * @return whether the checksum is reasonably good
821     * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
822     * @since 1.5
823     */
824    public static boolean verifyCheckSum(final byte[] header) {
825        final long storedSum = parseOctal(header, TarConstants.CHKSUM_OFFSET, TarConstants.CHKSUMLEN);
826        long unsignedSum = 0;
827        long signedSum = 0;
828        for (int i = 0; i < header.length; i++) {
829            byte b = header[i];
830            if (TarConstants.CHKSUM_OFFSET <= i && i < TarConstants.CHKSUM_OFFSET + TarConstants.CHKSUMLEN) {
831                b = ' ';
832            }
833            unsignedSum += 0xff & b;
834            signedSum += b;
835        }
836        return storedSum == unsignedSum || storedSum == signedSum;
837    }
838
839    /** Prevents instantiation. */
840    private TarUtils() {
841    }
842
843}