001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.tar;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.UncheckedIOException;
024import java.math.BigInteger;
025import java.nio.ByteBuffer;
026import java.nio.charset.Charset;
027import java.nio.charset.StandardCharsets;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Collections;
031import java.util.HashMap;
032import java.util.List;
033import java.util.Map;
034
035import org.apache.commons.compress.archivers.zip.ZipEncoding;
036import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
037import org.apache.commons.compress.utils.IOUtils;
038import org.apache.commons.compress.utils.ParsingUtils;
039import org.apache.commons.io.output.ByteArrayOutputStream;
040
041/**
042 * This class provides static utility methods to work with byte streams.
043 *
044 * @Immutable
045 */
046// CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
047public class TarUtils {
048
049    private static final BigInteger NEG_1_BIG_INT = BigInteger.valueOf(-1);
050
051    private static final int BYTE_MASK = 255;
052
053    static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(Charset.defaultCharset());
054
055    /**
056     * Encapsulates the algorithms used up to Commons Compress 1.3 as ZipEncoding.
057     */
058    static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
059
060        @Override
061        public boolean canEncode(final String name) {
062            return true;
063        }
064
065        @Override
066        public String decode(final byte[] buffer) {
067            final int length = buffer.length;
068            final StringBuilder result = new StringBuilder(length);
069            for (final byte b : buffer) {
070                if (b == 0) { // Trailing null
071                    break;
072                }
073                result.append((char) (b & 0xFF)); // Allow for sign-extension
074            }
075            return result.toString();
076        }
077
078        @Override
079        public ByteBuffer encode(final String name) {
080            return ByteBuffer.wrap(name.getBytes(StandardCharsets.US_ASCII));
081        }
082    };
083
084    /**
085     * Computes the checksum of a tar entry header.
086     *
087     * @param buf The tar entry's header buffer.
088     * @return The computed checksum.
089     */
090    public static long computeCheckSum(final byte[] buf) {
091        long sum = 0;
092        for (final byte element : buf) {
093            sum += BYTE_MASK & element;
094        }
095        return sum;
096    }
097
098    /*
099     * Generates an exception message.
100     */
101    private static String exceptionMessage(final byte[] buffer, final int offset, final int length, final int current, final byte currentByte) {
102        // default charset is good enough for an exception message,
103        //
104        // the alternative was to modify parseOctal and
105        // parseOctalOrBinary to receive the ZipEncoding of the
106        // archive (deprecating the existing public methods, of
107        // course) and dealing with the fact that ZipEncoding#decode
108        // can throw an IOException which parseOctal* doesn't declare
109        String string = new String(buffer, offset, length, Charset.defaultCharset());
110        string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed
111        return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length;
112    }
113
114    private static void formatBigIntegerBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) {
115        final BigInteger val = BigInteger.valueOf(value);
116        final byte[] b = val.toByteArray();
117        final int len = b.length;
118        if (len > length - 1) {
119            throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field.");
120        }
121        final int off = offset + length - len;
122        System.arraycopy(b, 0, buf, off, len);
123        Arrays.fill(buf, offset + 1, off, (byte) (negative ? 0xff : 0));
124    }
125
126    /**
127     * Writes an octal value into a buffer.
128     *
129     * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by NUL and then
130     * space.
131     *
132     * @param value  The value to convert
133     * @param buf    The destination buffer
134     * @param offset The starting offset into the buffer.
135     * @param length The size of the buffer.
136     * @return The updated value of offset, i.e. offset+length
137     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
138     */
139    public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
140        int idx = length - 2; // for NUL and space
141        formatUnsignedOctalString(value, buf, offset, idx);
142        buf[offset + idx++] = 0; // Trailing null
143        buf[offset + idx] = (byte) ' '; // Trailing space
144        return offset + length;
145    }
146
147    private static void formatLongBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) {
148        final int bits = (length - 1) * 8;
149        final long max = 1L << bits;
150        long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE
151        if (val < 0 || val >= max) {
152            throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field.");
153        }
154        if (negative) {
155            val ^= max - 1;
156            val++;
157            val |= 0xffL << bits;
158        }
159        for (int i = offset + length - 1; i >= offset; i--) {
160            buf[i] = (byte) val;
161            val >>= 8;
162        }
163    }
164
165    /**
166     * Writes an octal long integer into a buffer.
167     *
168     * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space.
169     *
170     * @param value  The value to write as octal
171     * @param buf    The destinationbuffer.
172     * @param offset The starting offset into the buffer.
173     * @param length The length of the buffer
174     * @return The updated offset
175     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
176     */
177    public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
178        final int idx = length - 1; // For space
179        formatUnsignedOctalString(value, buf, offset, idx);
180        buf[offset + idx] = (byte) ' '; // Trailing space
181        return offset + length;
182    }
183
184    /**
185     * Writes a long integer into a buffer as an octal string if this will fit, or as a binary number otherwise.
186     *
187     * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space.
188     *
189     * @param value  The value to write into the buffer.
190     * @param buf    The destination buffer.
191     * @param offset The starting offset into the buffer.
192     * @param length The length of the buffer.
193     * @return The updated offset.
194     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer.
195     * @since 1.4
196     */
197    public static int formatLongOctalOrBinaryBytes(final long value, final byte[] buf, final int offset, final int length) {
198        // Check whether we are dealing with UID/GID or SIZE field
199        final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
200        final boolean negative = value < 0;
201        if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
202            return formatLongOctalBytes(value, buf, offset, length);
203        }
204        if (length < 9) {
205            formatLongBinary(value, buf, offset, length, negative);
206        } else {
207            formatBigIntegerBinary(value, buf, offset, length, negative);
208        }
209        buf[offset] = (byte) (negative ? 0xff : 0x80);
210        return offset + length;
211    }
212
213    /**
214     * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the
215     * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated.
216     *
217     * @param name   The header name from which to copy the characters.
218     * @param buf    The buffer where the name is to be stored.
219     * @param offset The starting offset into the buffer
220     * @param length The maximum number of header bytes to copy.
221     * @return The updated offset, i.e. offset + length
222     */
223    public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
224        try {
225            return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
226        } catch (final IOException ex) { // NOSONAR
227            try {
228                return formatNameBytes(name, buf, offset, length, FALLBACK_ENCODING);
229            } catch (final IOException ex2) {
230                // impossible
231                throw new UncheckedIOException(ex2); // NOSONAR
232            }
233        }
234    }
235
236    /**
237     * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the
238     * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated.
239     *
240     * @param name     The header name from which to copy the characters.
241     * @param buf      The buffer where the name is to be stored.
242     * @param offset   The starting offset into the buffer
243     * @param length   The maximum number of header bytes to copy.
244     * @param encoding name of the encoding to use for file names
245     * @return The updated offset, i.e. offset + length
246     * @throws IOException on error
247     * @since 1.4
248     */
249    public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length, final ZipEncoding encoding) throws IOException {
250        int len = name.length();
251        ByteBuffer b = encoding.encode(name);
252        while (b.limit() > length && len > 0) {
253            b = encoding.encode(name.substring(0, --len));
254        }
255        final int limit = b.limit() - b.position();
256        System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
257        // Pad any remaining output bytes with NUL
258        Arrays.fill(buf, offset + limit, offset + length, (byte) 0);
259        return offset + length;
260    }
261
262    /**
263     * Writes an octal integer into a buffer.
264     *
265     * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by space and NUL
266     *
267     * @param value  The value to write
268     * @param buf    The buffer to receive the output
269     * @param offset The starting offset into the buffer
270     * @param length The size of the output buffer
271     * @return The updated offset, i.e. offset+length
272     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
273     */
274    public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
275        int idx = length - 2; // For space and trailing null
276        formatUnsignedOctalString(value, buf, offset, idx);
277        buf[offset + idx++] = (byte) ' '; // Trailing space
278        buf[offset + idx] = 0; // Trailing null
279        return offset + length;
280    }
281
282    /**
283     * Fills a buffer with unsigned octal number, padded with leading zeroes.
284     *
285     * @param value  number to convert to octal - treated as unsigned
286     * @param buffer destination buffer
287     * @param offset starting offset in buffer
288     * @param length length of buffer to fill
289     * @throws IllegalArgumentException if the value will not fit in the buffer
290     */
291    public static void formatUnsignedOctalString(final long value, final byte[] buffer, final int offset, final int length) {
292        int remaining = length;
293        remaining--;
294        if (value == 0) {
295            buffer[offset + remaining--] = (byte) '0';
296        } else {
297            long val = value;
298            for (; remaining >= 0 && val != 0; --remaining) {
299                // CheckStyle:MagicNumber OFF
300                buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
301                val = val >>> 3;
302                // CheckStyle:MagicNumber ON
303            }
304            if (val != 0) {
305                throw new IllegalArgumentException(value + "=" + Long.toOctalString(value) + " will not fit in octal number buffer of length " + length);
306            }
307        }
308        for (; remaining >= 0; --remaining) { // leading zeros
309            buffer[offset + remaining] = (byte) '0';
310        }
311        Arrays.fill(buffer, offset, offset + remaining + 1, (byte) '0');
312    }
313
314    private static long parseBinaryBigInteger(final byte[] buffer, final int offset, final int length, final boolean negative) {
315        final byte[] remainder = new byte[length - 1];
316        System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
317        BigInteger val = new BigInteger(remainder);
318        if (negative) {
319            // 2's complement
320            val = val.add(NEG_1_BIG_INT).not();
321        }
322        if (val.bitLength() > 63) {
323            throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number exceeds maximum signed long value");
324        }
325        return negative ? -val.longValue() : val.longValue();
326    }
327
328    private static long parseBinaryLong(final byte[] buffer, final int offset, final int length, final boolean negative) {
329        if (length >= 9) {
330            throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number exceeds maximum signed long value");
331        }
332        long val = 0;
333        for (int i = 1; i < length; i++) {
334            val = (val << 8) + (buffer[offset + i] & 0xff);
335        }
336        if (negative) {
337            // 2's complement
338            val--;
339            val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
340        }
341        return negative ? -val : val;
342    }
343
344    /**
345     * Parses a boolean byte from a buffer. Leading spaces and NUL are ignored. The buffer may contain trailing spaces or NULs.
346     *
347     * @param buffer The buffer from which to parse.
348     * @param offset The offset into the buffer from which to parse.
349     * @return The boolean value of the bytes.
350     * @throws IllegalArgumentException if an invalid byte is detected.
351     */
352    public static boolean parseBoolean(final byte[] buffer, final int offset) {
353        return buffer[offset] == 1;
354    }
355
356    /**
357     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map GNU.sparse.map Map of non-null data chunks. It is a string
358     * consisting of comma-separated values "offset,size[,offset-1,size-1...]"
359     *
360     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
361     * @return unmodifiable list of sparse headers parsed from sparse map
362     * @throws IOException Corrupted TAR archive.
363     * @since 1.21
364     */
365    protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(final String sparseMap) throws IOException {
366        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
367        final String[] sparseHeaderStrings = sparseMap.split(",");
368        if (sparseHeaderStrings.length % 2 == 1) {
369            throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header");
370        }
371        for (int i = 0; i < sparseHeaderStrings.length; i += 2) {
372            final long sparseOffset = ParsingUtils.parseLongValue(sparseHeaderStrings[i]);
373            if (sparseOffset < 0) {
374                throw new IOException("Corrupted TAR archive. Sparse struct offset contains negative value");
375            }
376            final long sparseNumbytes = ParsingUtils.parseLongValue(sparseHeaderStrings[i + 1]);
377            if (sparseNumbytes < 0) {
378                throw new IOException("Corrupted TAR archive. Sparse struct numbytes contains negative value");
379            }
380            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
381        }
382        return Collections.unmodifiableList(sparseHeaders);
383    }
384
385    /**
386     * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached.
387     *
388     * @param buffer The buffer from which to parse.
389     * @param offset The offset into the buffer from which to parse.
390     * @param length The maximum number of bytes to parse.
391     * @return The entry name.
392     */
393    public static String parseName(final byte[] buffer, final int offset, final int length) {
394        try {
395            return parseName(buffer, offset, length, DEFAULT_ENCODING);
396        } catch (final IOException ex) { // NOSONAR
397            try {
398                return parseName(buffer, offset, length, FALLBACK_ENCODING);
399            } catch (final IOException ex2) {
400                // impossible
401                throw new UncheckedIOException(ex2); // NOSONAR
402            }
403        }
404    }
405
406    /**
407     * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached.
408     *
409     * @param buffer   The buffer from which to parse.
410     * @param offset   The offset into the buffer from which to parse.
411     * @param length   The maximum number of bytes to parse.
412     * @param encoding name of the encoding to use for file names
413     * @return The entry name.
414     * @throws IOException on error
415     * @since 1.4
416     */
417    public static String parseName(final byte[] buffer, final int offset, final int length, final ZipEncoding encoding) throws IOException {
418        int len = 0;
419        for (int i = offset; len < length && buffer[i] != 0; i++) {
420            len++;
421        }
422        if (len > 0) {
423            final byte[] b = new byte[len];
424            System.arraycopy(buffer, offset, b, 0, len);
425            return encoding.decode(b);
426        }
427        return "";
428    }
429
430    /**
431     * Parses an octal string from a buffer.
432     *
433     * <p>
434     * Leading spaces are ignored. The buffer must contain a trailing space or NUL, and may contain an additional trailing space or NUL.
435     * </p>
436     *
437     * <p>
438     * The input buffer is allowed to contain all NULs, in which case the method returns 0L (this allows for missing fields).
439     * </p>
440     *
441     * <p>
442     * To work-around some tar implementations that insert a leading NUL this method returns 0 if it detects a leading NUL since Commons Compress 1.4.
443     * </p>
444     *
445     * @param buffer The buffer from which to parse.
446     * @param offset The offset into the buffer from which to parse.
447     * @param length The maximum number of bytes to parse - must be at least 2 bytes.
448     * @return The long value of the octal string.
449     * @throws IllegalArgumentException if the trailing space/NUL is missing or if an invalid byte is detected.
450     */
451    public static long parseOctal(final byte[] buffer, final int offset, final int length) {
452        long result = 0;
453        int end = offset + length;
454        int start = offset;
455        if (length < 2) {
456            throw new IllegalArgumentException("Length " + length + " must be at least 2");
457        }
458        if (buffer[start] == 0) {
459            return 0L;
460        }
461        // Skip leading spaces
462        while (start < end) {
463            if (buffer[start] != ' ') {
464                break;
465            }
466            start++;
467        }
468        // Trim all trailing NULs and spaces.
469        // The ustar and POSIX tar specs require a trailing NUL or
470        // space but some implementations use the extra digit for big
471        // sizes/uids/gids ...
472        byte trailer = buffer[end - 1];
473        while (start < end && (trailer == 0 || trailer == ' ')) {
474            end--;
475            trailer = buffer[end - 1];
476        }
477        for (; start < end; start++) {
478            final byte currentByte = buffer[start];
479            // CheckStyle:MagicNumber OFF
480            if (currentByte < '0' || currentByte > '7') {
481                throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte));
482            }
483            result = (result << 3) + (currentByte - '0'); // convert from ASCII
484            // CheckStyle:MagicNumber ON
485        }
486        return result;
487    }
488
489    /**
490     * Computes the value contained in a byte buffer. If the most significant bit of the first byte in the buffer is set, this bit is ignored and the rest of
491     * the buffer is interpreted as a binary number. Otherwise, the buffer is interpreted as an octal number as per the parseOctal function above.
492     *
493     * @param buffer The buffer from which to parse.
494     * @param offset The offset into the buffer from which to parse.
495     * @param length The maximum number of bytes to parse.
496     * @return The long value of the octal or binary string.
497     * @throws IllegalArgumentException if the trailing space/NUL is missing or an invalid byte is detected in an octal number, or if a binary number would
498     *                                  exceed the size of a signed long 64-bit integer.
499     * @since 1.4
500     */
501    public static long parseOctalOrBinary(final byte[] buffer, final int offset, final int length) {
502        if ((buffer[offset] & 0x80) == 0) {
503            return parseOctal(buffer, offset, length);
504        }
505        final boolean negative = buffer[offset] == (byte) 0xff;
506        if (length < 9) {
507            return parseBinaryLong(buffer, offset, length, negative);
508        }
509        return parseBinaryBigInteger(buffer, offset, length, negative);
510    }
511
512    /**
513     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
514     *
515     * <p>
516     * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
517     * </p>
518     * <p>
519     * Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You should use
520     * {@link #parseFromPAX01SparseHeaders} directly instead.
521     * </p>
522     *
523     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
524     * @return sparse headers parsed from sparse map
525     * @deprecated use #parseFromPAX01SparseHeaders instead
526     */
527    @Deprecated
528    protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(final String sparseMap) {
529        try {
530            return parseFromPAX01SparseHeaders(sparseMap);
531        } catch (final IOException ex) {
532            throw new UncheckedIOException(ex.getMessage(), ex);
533        }
534    }
535
536    /**
537     * For PAX Format 1.X: The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers
538     * delimited by newlines. The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are
539     * map entries, each one consisting of two numbers giving the offset and size of the data block it describes.
540     *
541     * @param inputStream parsing source.
542     * @param recordSize  The size the TAR header
543     * @return sparse headers
544     * @throws IOException if an I/O error occurs.
545     */
546    protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException {
547        // for 1.X PAX Headers
548        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
549        long bytesRead = 0;
550        long[] readResult = readLineOfNumberForPax1x(inputStream);
551        long sparseHeadersCount = readResult[0];
552        if (sparseHeadersCount < 0) {
553            // overflow while reading number?
554            throw new IOException("Corrupted TAR archive. Negative value in sparse headers block");
555        }
556        bytesRead += readResult[1];
557        while (sparseHeadersCount-- > 0) {
558            readResult = readLineOfNumberForPax1x(inputStream);
559            final long sparseOffset = readResult[0];
560            if (sparseOffset < 0) {
561                throw new IOException("Corrupted TAR archive. Sparse header block offset contains negative value");
562            }
563            bytesRead += readResult[1];
564
565            readResult = readLineOfNumberForPax1x(inputStream);
566            final long sparseNumbytes = readResult[0];
567            if (sparseNumbytes < 0) {
568                throw new IOException("Corrupted TAR archive. Sparse header block numbytes contains negative value");
569            }
570            bytesRead += readResult[1];
571            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
572        }
573        // skip the rest of this record data
574        final long bytesToSkip = recordSize - bytesRead % recordSize;
575        IOUtils.skip(inputStream, bytesToSkip);
576        return sparseHeaders;
577    }
578
579    /**
580     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
581     *
582     * <pre>
583     * GNU.sparse.size=size
584     * GNU.sparse.numblocks=numblocks
585     * repeat numblocks times
586     *   GNU.sparse.offset=offset
587     *   GNU.sparse.numbytes=numbytes
588     * end repeat
589     * </pre>
590     * <p>
591     * For PAX Format 0.1, the sparse headers are stored in a single variable: GNU.sparse.map
592     * </p>
593     * <p>
594     * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
595     * </p>
596     *
597     * @param inputStream      input stream to read keys and values
598     * @param sparseHeaders    used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map
599     * @param globalPaxHeaders global PAX headers of the tar archive
600     * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
601     * @throws IOException if an I/O error occurs.
602     * @deprecated use the four-arg version instead
603     */
604    @Deprecated
605    protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders,
606            final Map<String, String> globalPaxHeaders) throws IOException {
607        return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1);
608    }
609
610    /**
611     * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
612     *
613     * <pre>
614     * GNU.sparse.size=size
615     * GNU.sparse.numblocks=numblocks
616     * repeat numblocks times
617     *   GNU.sparse.offset=offset
618     *   GNU.sparse.numbytes=numbytes
619     * end repeat
620     * </pre>
621     * <p>
622     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
623     * </p>
624     * <p>
625     * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
626     * </p>
627     *
628     * @param inputStream      input stream to read keys and values
629     * @param sparseHeaders    used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map
630     * @param globalPaxHeaders global PAX headers of the tar archive
631     * @param headerSize       total size of the PAX header, will be ignored if negative
632     * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
633     * @throws IOException if an I/O error occurs.
634     * @since 1.21
635     */
636    protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders,
637            final Map<String, String> globalPaxHeaders, final long headerSize) throws IOException {
638        final Map<String, String> headers = new HashMap<>(globalPaxHeaders);
639        Long offset = null;
640        // Format is "length keyword=value\n";
641        int totalRead = 0;
642        while (true) { // get length
643            int ch;
644            int len = 0;
645            int read = 0;
646            while ((ch = inputStream.read()) != -1) {
647                read++;
648                totalRead++;
649                if (ch == '\n') { // blank line in header
650                    break;
651                }
652                if (ch == ' ') { // End of length string
653                    // Get keyword
654                    final ByteArrayOutputStream coll = new ByteArrayOutputStream();
655                    while ((ch = inputStream.read()) != -1) {
656                        read++;
657                        totalRead++;
658                        if (totalRead < 0 || headerSize >= 0 && totalRead >= headerSize) {
659                            break;
660                        }
661                        if (ch == '=') { // end of keyword
662                            final String keyword = coll.toString(StandardCharsets.UTF_8);
663                            // Get rest of entry
664                            final int restLen = len - read;
665                            if (restLen <= 1) { // only NL
666                                headers.remove(keyword);
667                            } else if (headerSize >= 0 && restLen > headerSize - totalRead) {
668                                throw new IOException("Paxheader value size " + restLen + " exceeds size of header record");
669                            } else {
670                                final byte[] rest = IOUtils.readRange(inputStream, restLen);
671                                final int got = rest.length;
672                                if (got != restLen) {
673                                    throw new IOException("Failed to read Paxheader. Expected " + restLen + " bytes, read " + got);
674                                }
675                                totalRead += restLen;
676                                // Drop trailing NL
677                                if (rest[restLen - 1] != '\n') {
678                                    throw new IOException("Failed to read Paxheader.Value should end with a newline");
679                                }
680                                final String value = new String(rest, 0, restLen - 1, StandardCharsets.UTF_8);
681                                headers.put(keyword, value);
682
683                                // for 0.0 PAX Headers
684                                if (keyword.equals(TarGnuSparseKeys.OFFSET)) {
685                                    if (offset != null) {
686                                        // previous GNU.sparse.offset header but no numBytes
687                                        sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
688                                    }
689                                    try {
690                                        offset = Long.valueOf(value);
691                                    } catch (final NumberFormatException ex) {
692                                        throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains a non-numeric value");
693                                    }
694                                    if (offset < 0) {
695                                        throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains negative value");
696                                    }
697                                }
698
699                                // for 0.0 PAX Headers
700                                if (keyword.equals(TarGnuSparseKeys.NUMBYTES)) {
701                                    if (offset == null) {
702                                        throw new IOException(
703                                                "Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " is expected before GNU.sparse.numbytes shows up.");
704                                    }
705                                    final long numbytes = ParsingUtils.parseLongValue(value);
706                                    if (numbytes < 0) {
707                                        throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.NUMBYTES + " contains negative value");
708                                    }
709                                    sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes));
710                                    offset = null;
711                                }
712                            }
713                            break;
714                        }
715                        coll.write((byte) ch);
716                    }
717                    break; // Processed single header
718                }
719                // COMPRESS-530 : throw if we encounter a non-number while reading length
720                if (ch < '0' || ch > '9') {
721                    throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length");
722                }
723                len *= 10;
724                len += ch - '0';
725            }
726            if (ch == -1) { // EOF
727                break;
728            }
729        }
730        if (offset != null) {
731            // offset but no numBytes
732            sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
733        }
734        return headers;
735    }
736
737    /**
738     * Parses the content of a PAX 1.0 sparse block.
739     *
740     * @param buffer The buffer from which to parse.
741     * @param offset The offset into the buffer from which to parse.
742     * @return a parsed sparse struct
743     * @since 1.20
744     */
745    public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) {
746        final long sparseOffset = parseOctalOrBinary(buffer, offset, TarConstants.SPARSE_OFFSET_LEN);
747        final long sparseNumbytes = parseOctalOrBinary(buffer, offset + TarConstants.SPARSE_OFFSET_LEN, TarConstants.SPARSE_NUMBYTES_LEN);
748        return new TarArchiveStructSparse(sparseOffset, sparseNumbytes);
749    }
750
751    /**
752     * For 1.x PAX Format, the sparse headers are stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers
753     * delimited by newlines.
754     *
755     * @param inputStream the input stream of the tar file
756     * @return the decimal number delimited by '\n', and the bytes read from input stream
757     * @throws IOException if an I/O error occurs.
758     */
759    private static long[] readLineOfNumberForPax1x(final InputStream inputStream) throws IOException {
760        int number;
761        long result = 0;
762        long bytesRead = 0;
763        while ((number = inputStream.read()) != '\n') {
764            bytesRead += 1;
765            if (number == -1) {
766                throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format");
767            }
768            if (number < '0' || number > '9') {
769                throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block");
770            }
771            result = result * 10 + (number - '0');
772        }
773        bytesRead += 1;
774        return new long[] { result, bytesRead };
775    }
776
777    /**
778     * @since 1.21
779     */
780    static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries) throws IOException {
781        final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
782        for (int i = 0; i < entries; i++) {
783            try {
784                final TarArchiveStructSparse sparseHeader = parseSparse(buffer,
785                        offset + i * (TarConstants.SPARSE_OFFSET_LEN + TarConstants.SPARSE_NUMBYTES_LEN));
786                if (sparseHeader.getOffset() < 0) {
787                    throw new IOException("Corrupted TAR archive, sparse entry with negative offset");
788                }
789                if (sparseHeader.getNumbytes() < 0) {
790                    throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes");
791                }
792                sparseHeaders.add(sparseHeader);
793            } catch (final IllegalArgumentException ex) {
794                // thrown internally by parseOctalOrBinary
795                throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex);
796            }
797        }
798        return Collections.unmodifiableList(sparseHeaders);
799    }
800
801    /**
802     * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>: <blockquote> The checksum is calculated by taking the sum of the
803     * unsigned byte values of the header block with the eight checksum bytes taken to be ASCII spaces (decimal value 32). It is stored as a six digit octal
804     * number with leading zeroes followed by a NUL and then a space. Various implementations do not adhere to this format. For better compatibility, ignore
805     * leading and trailing whitespace, and get the first six digits. In addition, some historic tar implementations treated bytes as signed. Implementations
806     * typically calculate the checksum both ways, and treat it as good if either the signed or unsigned sum matches the included checksum. </blockquote>
807     * <p>
808     * The return value of this method should be treated as a best-effort heuristic rather than an absolute and final truth. The checksum verification logic may
809     * well evolve over time as more special cases are encountered.
810     * </p>
811     *
812     * @param header tar header
813     * @return whether the checksum is reasonably good
814     * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
815     * @since 1.5
816     */
817    public static boolean verifyCheckSum(final byte[] header) {
818        final long storedSum = parseOctal(header, TarConstants.CHKSUM_OFFSET, TarConstants.CHKSUMLEN);
819        long unsignedSum = 0;
820        long signedSum = 0;
821        for (int i = 0; i < header.length; i++) {
822            byte b = header[i];
823            if (TarConstants.CHKSUM_OFFSET <= i && i < TarConstants.CHKSUM_OFFSET + TarConstants.CHKSUMLEN) {
824                b = ' ';
825            }
826            unsignedSum += 0xff & b;
827            signedSum += b;
828        }
829        return storedSum == unsignedSum || storedSum == signedSum;
830    }
831
832    /** Prevents instantiation. */
833    private TarUtils() {
834    }
835
836}