TarUtils.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one
  3.  * or more contributor license agreements.  See the NOTICE file
  4.  * distributed with this work for additional information
  5.  * regarding copyright ownership.  The ASF licenses this file
  6.  * to you under the Apache License, Version 2.0 (the
  7.  * "License"); you may not use this file except in compliance
  8.  * with the License.  You may obtain a copy of the License at
  9.  *
  10.  * http://www.apache.org/licenses/LICENSE-2.0
  11.  *
  12.  * Unless required by applicable law or agreed to in writing,
  13.  * software distributed under the License is distributed on an
  14.  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15.  * KIND, either express or implied.  See the License for the
  16.  * specific language governing permissions and limitations
  17.  * under the License.
  18.  */
  19. package org.apache.commons.compress.archivers.tar;

  20. import java.io.IOException;
  21. import java.io.InputStream;
  22. import java.io.UncheckedIOException;
  23. import java.math.BigInteger;
  24. import java.nio.ByteBuffer;
  25. import java.nio.charset.Charset;
  26. import java.nio.charset.StandardCharsets;
  27. import java.util.ArrayList;
  28. import java.util.Collections;
  29. import java.util.HashMap;
  30. import java.util.List;
  31. import java.util.Map;

  32. import org.apache.commons.compress.archivers.zip.ZipEncoding;
  33. import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
  34. import org.apache.commons.compress.utils.IOUtils;
  35. import org.apache.commons.compress.utils.ParsingUtils;
  36. import org.apache.commons.io.output.ByteArrayOutputStream;

  37. /**
  38.  * This class provides static utility methods to work with byte streams.
  39.  *
  40.  * @Immutable
  41.  */
  42. // CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
  43. public class TarUtils {

  44.     private static final int BYTE_MASK = 255;

  45.     static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(Charset.defaultCharset());

  46.     /**
  47.      * Encapsulates the algorithms used up to Commons Compress 1.3 as ZipEncoding.
  48.      */
  49.     static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {

  50.         @Override
  51.         public boolean canEncode(final String name) {
  52.             return true;
  53.         }

  54.         @Override
  55.         public String decode(final byte[] buffer) {
  56.             final int length = buffer.length;
  57.             final StringBuilder result = new StringBuilder(length);
  58.             for (final byte b : buffer) {
  59.                 if (b == 0) { // Trailing null
  60.                     break;
  61.                 }
  62.                 result.append((char) (b & 0xFF)); // Allow for sign-extension
  63.             }
  64.             return result.toString();
  65.         }

  66.         @Override
  67.         public ByteBuffer encode(final String name) {
  68.             final int length = name.length();
  69.             final byte[] buf = new byte[length];
  70.             // copy until end of input or output is reached.
  71.             for (int i = 0; i < length; ++i) {
  72.                 buf[i] = (byte) name.charAt(i);
  73.             }
  74.             return ByteBuffer.wrap(buf);
  75.         }
  76.     };

  77.     /**
  78.      * Computes the checksum of a tar entry header.
  79.      *
  80.      * @param buf The tar entry's header buffer.
  81.      * @return The computed checksum.
  82.      */
  83.     public static long computeCheckSum(final byte[] buf) {
  84.         long sum = 0;
  85.         for (final byte element : buf) {
  86.             sum += BYTE_MASK & element;
  87.         }
  88.         return sum;
  89.     }

  90.     // Helper method to generate the exception message
  91.     private static String exceptionMessage(final byte[] buffer, final int offset, final int length, final int current, final byte currentByte) {
  92.         // default charset is good enough for an exception message,
  93.         //
  94.         // the alternative was to modify parseOctal and
  95.         // parseOctalOrBinary to receive the ZipEncoding of the
  96.         // archive (deprecating the existing public methods, of
  97.         // course) and dealing with the fact that ZipEncoding#decode
  98.         // can throw an IOException which parseOctal* doesn't declare
  99.         String string = new String(buffer, offset, length, Charset.defaultCharset());

  100.         string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed
  101.         return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length;
  102.     }

  103.     private static void formatBigIntegerBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) {
  104.         final BigInteger val = BigInteger.valueOf(value);
  105.         final byte[] b = val.toByteArray();
  106.         final int len = b.length;
  107.         if (len > length - 1) {
  108.             throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field.");
  109.         }
  110.         final int off = offset + length - len;
  111.         System.arraycopy(b, 0, buf, off, len);
  112.         final byte fill = (byte) (negative ? 0xff : 0);
  113.         for (int i = offset + 1; i < off; i++) {
  114.             buf[i] = fill;
  115.         }
  116.     }

  117.     /**
  118.      * Writes an octal value into a buffer.
  119.      *
  120.      * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by NUL and then
  121.      * space.
  122.      *
  123.      * @param value  The value to convert
  124.      * @param buf    The destination buffer
  125.      * @param offset The starting offset into the buffer.
  126.      * @param length The size of the buffer.
  127.      * @return The updated value of offset, i.e. offset+length
  128.      * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
  129.      */
  130.     public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
  131.         int idx = length - 2; // for NUL and space
  132.         formatUnsignedOctalString(value, buf, offset, idx);
  133.         buf[offset + idx++] = 0; // Trailing null
  134.         buf[offset + idx] = (byte) ' '; // Trailing space
  135.         return offset + length;
  136.     }

  137.     private static void formatLongBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) {
  138.         final int bits = (length - 1) * 8;
  139.         final long max = 1L << bits;
  140.         long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE
  141.         if (val < 0 || val >= max) {
  142.             throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field.");
  143.         }
  144.         if (negative) {
  145.             val ^= max - 1;
  146.             val++;
  147.             val |= 0xffL << bits;
  148.         }
  149.         for (int i = offset + length - 1; i >= offset; i--) {
  150.             buf[i] = (byte) val;
  151.             val >>= 8;
  152.         }
  153.     }

  154.     /**
  155.      * Writes an octal long integer into a buffer.
  156.      *
  157.      * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space.
  158.      *
  159.      * @param value  The value to write as octal
  160.      * @param buf    The destinationbuffer.
  161.      * @param offset The starting offset into the buffer.
  162.      * @param length The length of the buffer
  163.      * @return The updated offset
  164.      * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
  165.      */
  166.     public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
  167.         final int idx = length - 1; // For space
  168.         formatUnsignedOctalString(value, buf, offset, idx);
  169.         buf[offset + idx] = (byte) ' '; // Trailing space
  170.         return offset + length;
  171.     }

  172.     /**
  173.      * Writes a long integer into a buffer as an octal string if this will fit, or as a binary number otherwise.
  174.      *
  175.      * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space.
  176.      *
  177.      * @param value  The value to write into the buffer.
  178.      * @param buf    The destination buffer.
  179.      * @param offset The starting offset into the buffer.
  180.      * @param length The length of the buffer.
  181.      * @return The updated offset.
  182.      * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer.
  183.      * @since 1.4
  184.      */
  185.     public static int formatLongOctalOrBinaryBytes(final long value, final byte[] buf, final int offset, final int length) {
  186.         // Check whether we are dealing with UID/GID or SIZE field
  187.         final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
  188.         final boolean negative = value < 0;
  189.         if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
  190.             return formatLongOctalBytes(value, buf, offset, length);
  191.         }
  192.         if (length < 9) {
  193.             formatLongBinary(value, buf, offset, length, negative);
  194.         } else {
  195.             formatBigIntegerBinary(value, buf, offset, length, negative);
  196.         }
  197.         buf[offset] = (byte) (negative ? 0xff : 0x80);
  198.         return offset + length;
  199.     }

  200.     /**
  201.      * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the
  202.      * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated.
  203.      *
  204.      * @param name   The header name from which to copy the characters.
  205.      * @param buf    The buffer where the name is to be stored.
  206.      * @param offset The starting offset into the buffer
  207.      * @param length The maximum number of header bytes to copy.
  208.      * @return The updated offset, i.e. offset + length
  209.      */
  210.     public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
  211.         try {
  212.             return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
  213.         } catch (final IOException ex) { // NOSONAR
  214.             try {
  215.                 return formatNameBytes(name, buf, offset, length, FALLBACK_ENCODING);
  216.             } catch (final IOException ex2) {
  217.                 // impossible
  218.                 throw new UncheckedIOException(ex2); // NOSONAR
  219.             }
  220.         }
  221.     }

  222.     /**
  223.      * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the
  224.      * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated.
  225.      *
  226.      * @param name     The header name from which to copy the characters.
  227.      * @param buf      The buffer where the name is to be stored.
  228.      * @param offset   The starting offset into the buffer
  229.      * @param length   The maximum number of header bytes to copy.
  230.      * @param encoding name of the encoding to use for file names
  231.      * @since 1.4
  232.      * @return The updated offset, i.e. offset + length
  233.      * @throws IOException on error
  234.      */
  235.     public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length, final ZipEncoding encoding) throws IOException {
  236.         int len = name.length();
  237.         ByteBuffer b = encoding.encode(name);
  238.         while (b.limit() > length && len > 0) {
  239.             b = encoding.encode(name.substring(0, --len));
  240.         }
  241.         final int limit = b.limit() - b.position();
  242.         System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
  243.         // Pad any remaining output bytes with NUL
  244.         for (int i = limit; i < length; ++i) {
  245.             buf[offset + i] = 0;
  246.         }
  247.         return offset + length;
  248.     }

  249.     /**
  250.      * Writes an octal integer into a buffer.
  251.      *
  252.      * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by space and NUL
  253.      *
  254.      * @param value  The value to write
  255.      * @param buf    The buffer to receive the output
  256.      * @param offset The starting offset into the buffer
  257.      * @param length The size of the output buffer
  258.      * @return The updated offset, i.e. offset+length
  259.      * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
  260.      */
  261.     public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
  262.         int idx = length - 2; // For space and trailing null
  263.         formatUnsignedOctalString(value, buf, offset, idx);
  264.         buf[offset + idx++] = (byte) ' '; // Trailing space
  265.         buf[offset + idx] = 0; // Trailing null
  266.         return offset + length;
  267.     }

  268.     /**
  269.      * Fills a buffer with unsigned octal number, padded with leading zeroes.
  270.      *
  271.      * @param value  number to convert to octal - treated as unsigned
  272.      * @param buffer destination buffer
  273.      * @param offset starting offset in buffer
  274.      * @param length length of buffer to fill
  275.      * @throws IllegalArgumentException if the value will not fit in the buffer
  276.      */
  277.     public static void formatUnsignedOctalString(final long value, final byte[] buffer, final int offset, final int length) {
  278.         int remaining = length;
  279.         remaining--;
  280.         if (value == 0) {
  281.             buffer[offset + remaining--] = (byte) '0';
  282.         } else {
  283.             long val = value;
  284.             for (; remaining >= 0 && val != 0; --remaining) {
  285.                 // CheckStyle:MagicNumber OFF
  286.                 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
  287.                 val = val >>> 3;
  288.                 // CheckStyle:MagicNumber ON
  289.             }
  290.             if (val != 0) {
  291.                 throw new IllegalArgumentException(value + "=" + Long.toOctalString(value) + " will not fit in octal number buffer of length " + length);
  292.             }
  293.         }

  294.         for (; remaining >= 0; --remaining) { // leading zeros
  295.             buffer[offset + remaining] = (byte) '0';
  296.         }
  297.     }

  298.     private static long parseBinaryBigInteger(final byte[] buffer, final int offset, final int length, final boolean negative) {
  299.         final byte[] remainder = new byte[length - 1];
  300.         System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
  301.         BigInteger val = new BigInteger(remainder);
  302.         if (negative) {
  303.             // 2's complement
  304.             val = val.add(BigInteger.valueOf(-1)).not();
  305.         }
  306.         if (val.bitLength() > 63) {
  307.             throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number exceeds maximum signed long value");
  308.         }
  309.         return negative ? -val.longValue() : val.longValue();
  310.     }

  311.     private static long parseBinaryLong(final byte[] buffer, final int offset, final int length, final boolean negative) {
  312.         if (length >= 9) {
  313.             throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number exceeds maximum signed long value");
  314.         }
  315.         long val = 0;
  316.         for (int i = 1; i < length; i++) {
  317.             val = (val << 8) + (buffer[offset + i] & 0xff);
  318.         }
  319.         if (negative) {
  320.             // 2's complement
  321.             val--;
  322.             val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
  323.         }
  324.         return negative ? -val : val;
  325.     }

  326.     /**
  327.      * Parses a boolean byte from a buffer. Leading spaces and NUL are ignored. The buffer may contain trailing spaces or NULs.
  328.      *
  329.      * @param buffer The buffer from which to parse.
  330.      * @param offset The offset into the buffer from which to parse.
  331.      * @return The boolean value of the bytes.
  332.      * @throws IllegalArgumentException if an invalid byte is detected.
  333.      */
  334.     public static boolean parseBoolean(final byte[] buffer, final int offset) {
  335.         return buffer[offset] == 1;
  336.     }

  337.     /**
  338.      * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map GNU.sparse.map Map of non-null data chunks. It is a string
  339.      * consisting of comma-separated values "offset,size[,offset-1,size-1...]"
  340.      *
  341.      * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
  342.      * @return unmodifiable list of sparse headers parsed from sparse map
  343.      * @throws IOException Corrupted TAR archive.
  344.      * @since 1.21
  345.      */
  346.     protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(final String sparseMap) throws IOException {
  347.         final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
  348.         final String[] sparseHeaderStrings = sparseMap.split(",");
  349.         if (sparseHeaderStrings.length % 2 == 1) {
  350.             throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header");
  351.         }
  352.         for (int i = 0; i < sparseHeaderStrings.length; i += 2) {
  353.             final long sparseOffset = ParsingUtils.parseLongValue(sparseHeaderStrings[i]);
  354.             if (sparseOffset < 0) {
  355.                 throw new IOException("Corrupted TAR archive. Sparse struct offset contains negative value");
  356.             }
  357.             final long sparseNumbytes = ParsingUtils.parseLongValue(sparseHeaderStrings[i + 1]);
  358.             if (sparseNumbytes < 0) {
  359.                 throw new IOException("Corrupted TAR archive. Sparse struct numbytes contains negative value");
  360.             }
  361.             sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
  362.         }
  363.         return Collections.unmodifiableList(sparseHeaders);
  364.     }

  365.     /**
  366.      * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached.
  367.      *
  368.      * @param buffer The buffer from which to parse.
  369.      * @param offset The offset into the buffer from which to parse.
  370.      * @param length The maximum number of bytes to parse.
  371.      * @return The entry name.
  372.      */
  373.     public static String parseName(final byte[] buffer, final int offset, final int length) {
  374.         try {
  375.             return parseName(buffer, offset, length, DEFAULT_ENCODING);
  376.         } catch (final IOException ex) { // NOSONAR
  377.             try {
  378.                 return parseName(buffer, offset, length, FALLBACK_ENCODING);
  379.             } catch (final IOException ex2) {
  380.                 // impossible
  381.                 throw new UncheckedIOException(ex2); // NOSONAR
  382.             }
  383.         }
  384.     }

  385.     /**
  386.      * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached.
  387.      *
  388.      * @param buffer   The buffer from which to parse.
  389.      * @param offset   The offset into the buffer from which to parse.
  390.      * @param length   The maximum number of bytes to parse.
  391.      * @param encoding name of the encoding to use for file names
  392.      * @since 1.4
  393.      * @return The entry name.
  394.      * @throws IOException on error
  395.      */
  396.     public static String parseName(final byte[] buffer, final int offset, final int length, final ZipEncoding encoding) throws IOException {
  397.         int len = 0;
  398.         for (int i = offset; len < length && buffer[i] != 0; i++) {
  399.             len++;
  400.         }
  401.         if (len > 0) {
  402.             final byte[] b = new byte[len];
  403.             System.arraycopy(buffer, offset, b, 0, len);
  404.             return encoding.decode(b);
  405.         }
  406.         return "";
  407.     }

  408.     /**
  409.      * Parses an octal string from a buffer.
  410.      *
  411.      * <p>
  412.      * Leading spaces are ignored. The buffer must contain a trailing space or NUL, and may contain an additional trailing space or NUL.
  413.      * </p>
  414.      *
  415.      * <p>
  416.      * The input buffer is allowed to contain all NULs, in which case the method returns 0L (this allows for missing fields).
  417.      * </p>
  418.      *
  419.      * <p>
  420.      * To work-around some tar implementations that insert a leading NUL this method returns 0 if it detects a leading NUL since Commons Compress 1.4.
  421.      * </p>
  422.      *
  423.      * @param buffer The buffer from which to parse.
  424.      * @param offset The offset into the buffer from which to parse.
  425.      * @param length The maximum number of bytes to parse - must be at least 2 bytes.
  426.      * @return The long value of the octal string.
  427.      * @throws IllegalArgumentException if the trailing space/NUL is missing or if an invalid byte is detected.
  428.      */
  429.     public static long parseOctal(final byte[] buffer, final int offset, final int length) {
  430.         long result = 0;
  431.         int end = offset + length;
  432.         int start = offset;
  433.         if (length < 2) {
  434.             throw new IllegalArgumentException("Length " + length + " must be at least 2");
  435.         }
  436.         if (buffer[start] == 0) {
  437.             return 0L;
  438.         }
  439.         // Skip leading spaces
  440.         while (start < end) {
  441.             if (buffer[start] != ' ') {
  442.                 break;
  443.             }
  444.             start++;
  445.         }
  446.         // Trim all trailing NULs and spaces.
  447.         // The ustar and POSIX tar specs require a trailing NUL or
  448.         // space but some implementations use the extra digit for big
  449.         // sizes/uids/gids ...
  450.         byte trailer = buffer[end - 1];
  451.         while (start < end && (trailer == 0 || trailer == ' ')) {
  452.             end--;
  453.             trailer = buffer[end - 1];
  454.         }
  455.         for (; start < end; start++) {
  456.             final byte currentByte = buffer[start];
  457.             // CheckStyle:MagicNumber OFF
  458.             if (currentByte < '0' || currentByte > '7') {
  459.                 throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte));
  460.             }
  461.             result = (result << 3) + (currentByte - '0'); // convert from ASCII
  462.             // CheckStyle:MagicNumber ON
  463.         }
  464.         return result;
  465.     }

  466.     /**
  467.      * Computes the value contained in a byte buffer. If the most significant bit of the first byte in the buffer is set, this bit is ignored and the rest of
  468.      * the buffer is interpreted as a binary number. Otherwise, the buffer is interpreted as an octal number as per the parseOctal function above.
  469.      *
  470.      * @param buffer The buffer from which to parse.
  471.      * @param offset The offset into the buffer from which to parse.
  472.      * @param length The maximum number of bytes to parse.
  473.      * @return The long value of the octal or binary string.
  474.      * @throws IllegalArgumentException if the trailing space/NUL is missing or an invalid byte is detected in an octal number, or if a binary number would
  475.      *                                  exceed the size of a signed long 64-bit integer.
  476.      * @since 1.4
  477.      */
  478.     public static long parseOctalOrBinary(final byte[] buffer, final int offset, final int length) {
  479.         if ((buffer[offset] & 0x80) == 0) {
  480.             return parseOctal(buffer, offset, length);
  481.         }
  482.         final boolean negative = buffer[offset] == (byte) 0xff;
  483.         if (length < 9) {
  484.             return parseBinaryLong(buffer, offset, length, negative);
  485.         }
  486.         return parseBinaryBigInteger(buffer, offset, length, negative);
  487.     }

  488.     /**
  489.      * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
  490.      *
  491.      * <p>
  492.      * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
  493.      * </p>
  494.      * <p>
  495.      * Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You should use
  496.      * {@link #parseFromPAX01SparseHeaders} directly instead.
  497.      * </p>
  498.      *
  499.      * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
  500.      * @return sparse headers parsed from sparse map
  501.      * @deprecated use #parseFromPAX01SparseHeaders instead
  502.      */
  503.     @Deprecated
  504.     protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(final String sparseMap) {
  505.         try {
  506.             return parseFromPAX01SparseHeaders(sparseMap);
  507.         } catch (final IOException ex) {
  508.             throw new UncheckedIOException(ex.getMessage(), ex);
  509.         }
  510.     }

  511.     /**
  512.      * For PAX Format 1.X: The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers
  513.      * delimited by newlines. The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are
  514.      * map entries, each one consisting of two numbers giving the offset and size of the data block it describes.
  515.      *
  516.      * @param inputStream parsing source.
  517.      * @param recordSize  The size the TAR header
  518.      * @return sparse headers
  519.      * @throws IOException if an I/O error occurs.
  520.      */
  521.     protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException {
  522.         // for 1.X PAX Headers
  523.         final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
  524.         long bytesRead = 0;
  525.         long[] readResult = readLineOfNumberForPax1X(inputStream);
  526.         long sparseHeadersCount = readResult[0];
  527.         if (sparseHeadersCount < 0) {
  528.             // overflow while reading number?
  529.             throw new IOException("Corrupted TAR archive. Negative value in sparse headers block");
  530.         }
  531.         bytesRead += readResult[1];
  532.         while (sparseHeadersCount-- > 0) {
  533.             readResult = readLineOfNumberForPax1X(inputStream);
  534.             final long sparseOffset = readResult[0];
  535.             if (sparseOffset < 0) {
  536.                 throw new IOException("Corrupted TAR archive. Sparse header block offset contains negative value");
  537.             }
  538.             bytesRead += readResult[1];

  539.             readResult = readLineOfNumberForPax1X(inputStream);
  540.             final long sparseNumbytes = readResult[0];
  541.             if (sparseNumbytes < 0) {
  542.                 throw new IOException("Corrupted TAR archive. Sparse header block numbytes contains negative value");
  543.             }
  544.             bytesRead += readResult[1];
  545.             sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
  546.         }
  547.         // skip the rest of this record data
  548.         final long bytesToSkip = recordSize - bytesRead % recordSize;
  549.         org.apache.commons.io.IOUtils.skip(inputStream, bytesToSkip);
  550.         return sparseHeaders;
  551.     }

  552.     /**
  553.      * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
  554.      *
  555.      * <pre>
  556.      * GNU.sparse.size=size
  557.      * GNU.sparse.numblocks=numblocks
  558.      * repeat numblocks times
  559.      *   GNU.sparse.offset=offset
  560.      *   GNU.sparse.numbytes=numbytes
  561.      * end repeat
  562.      * </pre>
  563.      * <p>
  564.      * For PAX Format 0.1, the sparse headers are stored in a single variable: GNU.sparse.map
  565.      * </p>
  566.      * <p>
  567.      * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
  568.      * </p>
  569.      *
  570.      * @param inputStream      input stream to read keys and values
  571.      * @param sparseHeaders    used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map
  572.      * @param globalPaxHeaders global PAX headers of the tar archive
  573.      * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
  574.      * @throws IOException if an I/O error occurs.
  575.      * @deprecated use the four-arg version instead
  576.      */
  577.     @Deprecated
  578.     protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders,
  579.             final Map<String, String> globalPaxHeaders) throws IOException {
  580.         return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1);
  581.     }

  582.     /**
  583.      * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
  584.      *
  585.      * <pre>
  586.      * GNU.sparse.size=size
  587.      * GNU.sparse.numblocks=numblocks
  588.      * repeat numblocks times
  589.      *   GNU.sparse.offset=offset
  590.      *   GNU.sparse.numbytes=numbytes
  591.      * end repeat
  592.      * </pre>
  593.      * <p>
  594.      * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
  595.      * </p>
  596.      * <p>
  597.      * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
  598.      * </p>
  599.      *
  600.      * @param inputStream      input stream to read keys and values
  601.      * @param sparseHeaders    used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map
  602.      * @param globalPaxHeaders global PAX headers of the tar archive
  603.      * @param headerSize       total size of the PAX header, will be ignored if negative
  604.      * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
  605.      * @throws IOException if an I/O error occurs.
  606.      * @since 1.21
  607.      */
  608.     protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders,
  609.             final Map<String, String> globalPaxHeaders, final long headerSize) throws IOException {
  610.         final Map<String, String> headers = new HashMap<>(globalPaxHeaders);
  611.         Long offset = null;
  612.         // Format is "length keyword=value\n";
  613.         int totalRead = 0;
  614.         while (true) { // get length
  615.             int ch;
  616.             int len = 0;
  617.             int read = 0;
  618.             while ((ch = inputStream.read()) != -1) {
  619.                 read++;
  620.                 totalRead++;
  621.                 if (ch == '\n') { // blank line in header
  622.                     break;
  623.                 }
  624.                 if (ch == ' ') { // End of length string
  625.                     // Get keyword
  626.                     final ByteArrayOutputStream coll = new ByteArrayOutputStream();
  627.                     while ((ch = inputStream.read()) != -1) {
  628.                         read++;
  629.                         totalRead++;
  630.                         if (totalRead < 0 || headerSize >= 0 && totalRead >= headerSize) {
  631.                             break;
  632.                         }
  633.                         if (ch == '=') { // end of keyword
  634.                             final String keyword = coll.toString(StandardCharsets.UTF_8);
  635.                             // Get rest of entry
  636.                             final int restLen = len - read;
  637.                             if (restLen <= 1) { // only NL
  638.                                 headers.remove(keyword);
  639.                             } else if (headerSize >= 0 && restLen > headerSize - totalRead) {
  640.                                 throw new IOException("Paxheader value size " + restLen + " exceeds size of header record");
  641.                             } else {
  642.                                 final byte[] rest = IOUtils.readRange(inputStream, restLen);
  643.                                 final int got = rest.length;
  644.                                 if (got != restLen) {
  645.                                     throw new IOException("Failed to read Paxheader. Expected " + restLen + " bytes, read " + got);
  646.                                 }
  647.                                 totalRead += restLen;
  648.                                 // Drop trailing NL
  649.                                 if (rest[restLen - 1] != '\n') {
  650.                                     throw new IOException("Failed to read Paxheader." + "Value should end with a newline");
  651.                                 }
  652.                                 final String value = new String(rest, 0, restLen - 1, StandardCharsets.UTF_8);
  653.                                 headers.put(keyword, value);

  654.                                 // for 0.0 PAX Headers
  655.                                 if (keyword.equals(TarGnuSparseKeys.OFFSET)) {
  656.                                     if (offset != null) {
  657.                                         // previous GNU.sparse.offset header but no numBytes
  658.                                         sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
  659.                                     }
  660.                                     try {
  661.                                         offset = Long.valueOf(value);
  662.                                     } catch (final NumberFormatException ex) {
  663.                                         throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains a non-numeric value");
  664.                                     }
  665.                                     if (offset < 0) {
  666.                                         throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains negative value");
  667.                                     }
  668.                                 }

  669.                                 // for 0.0 PAX Headers
  670.                                 if (keyword.equals(TarGnuSparseKeys.NUMBYTES)) {
  671.                                     if (offset == null) {
  672.                                         throw new IOException(
  673.                                                 "Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " is expected before GNU.sparse.numbytes shows up.");
  674.                                     }
  675.                                     final long numbytes = ParsingUtils.parseLongValue(value);
  676.                                     if (numbytes < 0) {
  677.                                         throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.NUMBYTES + " contains negative value");
  678.                                     }
  679.                                     sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes));
  680.                                     offset = null;
  681.                                 }
  682.                             }
  683.                             break;
  684.                         }
  685.                         coll.write((byte) ch);
  686.                     }
  687.                     break; // Processed single header
  688.                 }
  689.                 // COMPRESS-530 : throw if we encounter a non-number while reading length
  690.                 if (ch < '0' || ch > '9') {
  691.                     throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length");
  692.                 }
  693.                 len *= 10;
  694.                 len += ch - '0';
  695.             }
  696.             if (ch == -1) { // EOF
  697.                 break;
  698.             }
  699.         }
  700.         if (offset != null) {
  701.             // offset but no numBytes
  702.             sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
  703.         }
  704.         return headers;
  705.     }

  706.     /**
  707.      * Parses the content of a PAX 1.0 sparse block.
  708.      *
  709.      * @since 1.20
  710.      * @param buffer The buffer from which to parse.
  711.      * @param offset The offset into the buffer from which to parse.
  712.      * @return a parsed sparse struct
  713.      */
  714.     public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) {
  715.         final long sparseOffset = parseOctalOrBinary(buffer, offset, TarConstants.SPARSE_OFFSET_LEN);
  716.         final long sparseNumbytes = parseOctalOrBinary(buffer, offset + TarConstants.SPARSE_OFFSET_LEN, TarConstants.SPARSE_NUMBYTES_LEN);
  717.         return new TarArchiveStructSparse(sparseOffset, sparseNumbytes);
  718.     }

  719.     /**
  720.      * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers
  721.      * delimited by newlines.
  722.      *
  723.      * @param inputStream the input stream of the tar file
  724.      * @return the decimal number delimited by '\n', and the bytes read from input stream
  725.      * @throws IOException
  726.      */
  727.     private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException {
  728.         int number;
  729.         long result = 0;
  730.         long bytesRead = 0;
  731.         while ((number = inputStream.read()) != '\n') {
  732.             bytesRead += 1;
  733.             if (number == -1) {
  734.                 throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format");
  735.             }
  736.             if (number < '0' || number > '9') {
  737.                 throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block");
  738.             }
  739.             result = result * 10 + (number - '0');
  740.         }
  741.         bytesRead += 1;
  742.         return new long[] { result, bytesRead };
  743.     }

  744.     /**
  745.      * @since 1.21
  746.      */
  747.     static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries) throws IOException {
  748.         final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
  749.         for (int i = 0; i < entries; i++) {
  750.             try {
  751.                 final TarArchiveStructSparse sparseHeader = parseSparse(buffer,
  752.                         offset + i * (TarConstants.SPARSE_OFFSET_LEN + TarConstants.SPARSE_NUMBYTES_LEN));
  753.                 if (sparseHeader.getOffset() < 0) {
  754.                     throw new IOException("Corrupted TAR archive, sparse entry with negative offset");
  755.                 }
  756.                 if (sparseHeader.getNumbytes() < 0) {
  757.                     throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes");
  758.                 }
  759.                 sparseHeaders.add(sparseHeader);
  760.             } catch (final IllegalArgumentException ex) {
  761.                 // thrown internally by parseOctalOrBinary
  762.                 throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex);
  763.             }
  764.         }
  765.         return Collections.unmodifiableList(sparseHeaders);
  766.     }

  767.     /**
  768.      * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>: <blockquote> The checksum is calculated by taking the sum of the
  769.      * unsigned byte values of the header block with the eight checksum bytes taken to be ASCII spaces (decimal value 32). It is stored as a six digit octal
  770.      * number with leading zeroes followed by a NUL and then a space. Various implementations do not adhere to this format. For better compatibility, ignore
  771.      * leading and trailing whitespace, and get the first six digits. In addition, some historic tar implementations treated bytes as signed. Implementations
  772.      * typically calculate the checksum both ways, and treat it as good if either the signed or unsigned sum matches the included checksum. </blockquote>
  773.      * <p>
  774.      * The return value of this method should be treated as a best-effort heuristic rather than an absolute and final truth. The checksum verification logic may
  775.      * well evolve over time as more special cases are encountered.
  776.      * </p>
  777.      *
  778.      * @param header tar header
  779.      * @return whether the checksum is reasonably good
  780.      * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
  781.      * @since 1.5
  782.      */
  783.     public static boolean verifyCheckSum(final byte[] header) {
  784.         final long storedSum = parseOctal(header, TarConstants.CHKSUM_OFFSET, TarConstants.CHKSUMLEN);
  785.         long unsignedSum = 0;
  786.         long signedSum = 0;
  787.         for (int i = 0; i < header.length; i++) {
  788.             byte b = header[i];
  789.             if (TarConstants.CHKSUM_OFFSET <= i && i < TarConstants.CHKSUM_OFFSET + TarConstants.CHKSUMLEN) {
  790.                 b = ' ';
  791.             }
  792.             unsignedSum += 0xff & b;
  793.             signedSum += b;
  794.         }
  795.         return storedSum == unsignedSum || storedSum == signedSum;
  796.     }

  797.     /** Prevents instantiation. */
  798.     private TarUtils() {
  799.     }

  800. }