View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.archivers.tar;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.UncheckedIOException;
24  import java.math.BigInteger;
25  import java.nio.ByteBuffer;
26  import java.nio.charset.Charset;
27  import java.nio.charset.StandardCharsets;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.Collections;
31  import java.util.HashMap;
32  import java.util.List;
33  import java.util.Map;
34  
35  import org.apache.commons.compress.archivers.zip.ZipEncoding;
36  import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
37  import org.apache.commons.compress.utils.IOUtils;
38  import org.apache.commons.compress.utils.ParsingUtils;
39  import org.apache.commons.io.output.ByteArrayOutputStream;
40  
41  /**
42   * This class provides static utility methods to work with byte streams.
43   *
44   * @Immutable
45   */
46  // CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
47  public class TarUtils {
48  
49      private static final BigInteger NEG_1_BIG_INT = BigInteger.valueOf(-1);
50  
51      private static final int BYTE_MASK = 255;
52  
53      static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(Charset.defaultCharset());
54  
55      /**
56       * Encapsulates the algorithms used up to Commons Compress 1.3 as ZipEncoding.
57       */
58      static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
59  
60          @Override
61          public boolean canEncode(final String name) {
62              return true;
63          }
64  
65          @Override
66          public String decode(final byte[] buffer) {
67              final int length = buffer.length;
68              final StringBuilder result = new StringBuilder(length);
69              for (final byte b : buffer) {
70                  if (b == 0) { // Trailing null
71                      break;
72                  }
73                  result.append((char) (b & 0xFF)); // Allow for sign-extension
74              }
75              return result.toString();
76          }
77  
78          @Override
79          public ByteBuffer encode(final String name) {
80              return ByteBuffer.wrap(name.getBytes(StandardCharsets.US_ASCII));
81          }
82      };
83  
84      /**
85       * Computes the checksum of a tar entry header.
86       *
87       * @param buf The tar entry's header buffer.
88       * @return The computed checksum.
89       */
90      public static long computeCheckSum(final byte[] buf) {
91          long sum = 0;
92          for (final byte element : buf) {
93              sum += BYTE_MASK & element;
94          }
95          return sum;
96      }
97  
98      /*
99       * Generates an exception message.
100      */
101     private static String exceptionMessage(final byte[] buffer, final int offset, final int length, final int current, final byte currentByte) {
102         // default charset is good enough for an exception message,
103         //
104         // the alternative was to modify parseOctal and
105         // parseOctalOrBinary to receive the ZipEncoding of the
106         // archive (deprecating the existing public methods, of
107         // course) and dealing with the fact that ZipEncoding#decode
108         // can throw an IOException which parseOctal* doesn't declare
109         String string = new String(buffer, offset, length, Charset.defaultCharset());
110         string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed
111         return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length;
112     }
113 
114     private static void formatBigIntegerBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) {
115         final BigInteger val = BigInteger.valueOf(value);
116         final byte[] b = val.toByteArray();
117         final int len = b.length;
118         if (len > length - 1) {
119             throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field.");
120         }
121         final int off = offset + length - len;
122         System.arraycopy(b, 0, buf, off, len);
123         Arrays.fill(buf, offset + 1, off, (byte) (negative ? 0xff : 0));
124     }
125 
126     /**
127      * Writes an octal value into a buffer.
128      *
129      * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by NUL and then
130      * space.
131      *
132      * @param value  The value to convert
133      * @param buf    The destination buffer
134      * @param offset The starting offset into the buffer.
135      * @param length The size of the buffer.
136      * @return The updated value of offset, i.e. offset+length
137      * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
138      */
139     public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
140         int idx = length - 2; // for NUL and space
141         formatUnsignedOctalString(value, buf, offset, idx);
142         buf[offset + idx++] = 0; // Trailing null
143         buf[offset + idx] = (byte) ' '; // Trailing space
144         return offset + length;
145     }
146 
147     private static void formatLongBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) {
148         final int bits = (length - 1) * 8;
149         final long max = 1L << bits;
150         long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE
151         if (val < 0 || val >= max) {
152             throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field.");
153         }
154         if (negative) {
155             val ^= max - 1;
156             val++;
157             val |= 0xffL << bits;
158         }
159         for (int i = offset + length - 1; i >= offset; i--) {
160             buf[i] = (byte) val;
161             val >>= 8;
162         }
163     }
164 
165     /**
166      * Writes an octal long integer into a buffer.
167      *
168      * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space.
169      *
170      * @param value  The value to write as octal
171      * @param buf    The destinationbuffer.
172      * @param offset The starting offset into the buffer.
173      * @param length The length of the buffer
174      * @return The updated offset
175      * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
176      */
177     public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
178         final int idx = length - 1; // For space
179         formatUnsignedOctalString(value, buf, offset, idx);
180         buf[offset + idx] = (byte) ' '; // Trailing space
181         return offset + length;
182     }
183 
184     /**
185      * Writes a long integer into a buffer as an octal string if this will fit, or as a binary number otherwise.
186      *
187      * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space.
188      *
189      * @param value  The value to write into the buffer.
190      * @param buf    The destination buffer.
191      * @param offset The starting offset into the buffer.
192      * @param length The length of the buffer.
193      * @return The updated offset.
194      * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer.
195      * @since 1.4
196      */
197     public static int formatLongOctalOrBinaryBytes(final long value, final byte[] buf, final int offset, final int length) {
198         // Check whether we are dealing with UID/GID or SIZE field
199         final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
200         final boolean negative = value < 0;
201         if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
202             return formatLongOctalBytes(value, buf, offset, length);
203         }
204         if (length < 9) {
205             formatLongBinary(value, buf, offset, length, negative);
206         } else {
207             formatBigIntegerBinary(value, buf, offset, length, negative);
208         }
209         buf[offset] = (byte) (negative ? 0xff : 0x80);
210         return offset + length;
211     }
212 
213     /**
214      * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the
215      * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated.
216      *
217      * @param name   The header name from which to copy the characters.
218      * @param buf    The buffer where the name is to be stored.
219      * @param offset The starting offset into the buffer
220      * @param length The maximum number of header bytes to copy.
221      * @return The updated offset, i.e. offset + length
222      */
223     public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
224         try {
225             return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
226         } catch (final IOException ex) { // NOSONAR
227             try {
228                 return formatNameBytes(name, buf, offset, length, FALLBACK_ENCODING);
229             } catch (final IOException ex2) {
230                 // impossible
231                 throw new UncheckedIOException(ex2); // NOSONAR
232             }
233         }
234     }
235 
236     /**
237      * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the
238      * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated.
239      *
240      * @param name     The header name from which to copy the characters.
241      * @param buf      The buffer where the name is to be stored.
242      * @param offset   The starting offset into the buffer
243      * @param length   The maximum number of header bytes to copy.
244      * @param encoding name of the encoding to use for file names
245      * @return The updated offset, i.e. offset + length
246      * @throws IOException on error
247      * @since 1.4
248      */
249     public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length, final ZipEncoding encoding) throws IOException {
250         int len = name.length();
251         ByteBuffer b = encoding.encode(name);
252         while (b.limit() > length && len > 0) {
253             b = encoding.encode(name.substring(0, --len));
254         }
255         final int limit = b.limit() - b.position();
256         System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
257         // Pad any remaining output bytes with NUL
258         Arrays.fill(buf, offset + limit, offset + length, (byte) 0);
259         return offset + length;
260     }
261 
262     /**
263      * Writes an octal integer into a buffer.
264      *
265      * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by space and NUL
266      *
267      * @param value  The value to write
268      * @param buf    The buffer to receive the output
269      * @param offset The starting offset into the buffer
270      * @param length The size of the output buffer
271      * @return The updated offset, i.e. offset+length
272      * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
273      */
274     public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
275         int idx = length - 2; // For space and trailing null
276         formatUnsignedOctalString(value, buf, offset, idx);
277         buf[offset + idx++] = (byte) ' '; // Trailing space
278         buf[offset + idx] = 0; // Trailing null
279         return offset + length;
280     }
281 
282     /**
283      * Fills a buffer with unsigned octal number, padded with leading zeroes.
284      *
285      * @param value  number to convert to octal - treated as unsigned
286      * @param buffer destination buffer
287      * @param offset starting offset in buffer
288      * @param length length of buffer to fill
289      * @throws IllegalArgumentException if the value will not fit in the buffer
290      */
291     public static void formatUnsignedOctalString(final long value, final byte[] buffer, final int offset, final int length) {
292         int remaining = length;
293         remaining--;
294         if (value == 0) {
295             buffer[offset + remaining--] = (byte) '0';
296         } else {
297             long val = value;
298             for (; remaining >= 0 && val != 0; --remaining) {
299                 // CheckStyle:MagicNumber OFF
300                 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
301                 val = val >>> 3;
302                 // CheckStyle:MagicNumber ON
303             }
304             if (val != 0) {
305                 throw new IllegalArgumentException(value + "=" + Long.toOctalString(value) + " will not fit in octal number buffer of length " + length);
306             }
307         }
308         for (; remaining >= 0; --remaining) { // leading zeros
309             buffer[offset + remaining] = (byte) '0';
310         }
311         Arrays.fill(buffer, offset, offset + remaining + 1, (byte) '0');
312     }
313 
314     private static long parseBinaryBigInteger(final byte[] buffer, final int offset, final int length, final boolean negative) {
315         final byte[] remainder = new byte[length - 1];
316         System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
317         BigInteger val = new BigInteger(remainder);
318         if (negative) {
319             // 2's complement
320             val = val.add(NEG_1_BIG_INT).not();
321         }
322         if (val.bitLength() > 63) {
323             throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number exceeds maximum signed long value");
324         }
325         return negative ? -val.longValue() : val.longValue();
326     }
327 
328     private static long parseBinaryLong(final byte[] buffer, final int offset, final int length, final boolean negative) {
329         if (length >= 9) {
330             throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number exceeds maximum signed long value");
331         }
332         long val = 0;
333         for (int i = 1; i < length; i++) {
334             val = (val << 8) + (buffer[offset + i] & 0xff);
335         }
336         if (negative) {
337             // 2's complement
338             val--;
339             val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
340         }
341         return negative ? -val : val;
342     }
343 
344     /**
345      * Parses a boolean byte from a buffer. Leading spaces and NUL are ignored. The buffer may contain trailing spaces or NULs.
346      *
347      * @param buffer The buffer from which to parse.
348      * @param offset The offset into the buffer from which to parse.
349      * @return The boolean value of the bytes.
350      * @throws IllegalArgumentException if an invalid byte is detected.
351      */
352     public static boolean parseBoolean(final byte[] buffer, final int offset) {
353         return buffer[offset] == 1;
354     }
355 
356     /**
357      * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map GNU.sparse.map Map of non-null data chunks. It is a string
358      * consisting of comma-separated values "offset,size[,offset-1,size-1...]"
359      *
360      * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
361      * @return unmodifiable list of sparse headers parsed from sparse map
362      * @throws IOException Corrupted TAR archive.
363      * @since 1.21
364      */
365     protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(final String sparseMap) throws IOException {
366         final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
367         final String[] sparseHeaderStrings = sparseMap.split(",");
368         if (sparseHeaderStrings.length % 2 == 1) {
369             throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header");
370         }
371         for (int i = 0; i < sparseHeaderStrings.length; i += 2) {
372             final long sparseOffset = ParsingUtils.parseLongValue(sparseHeaderStrings[i]);
373             if (sparseOffset < 0) {
374                 throw new IOException("Corrupted TAR archive. Sparse struct offset contains negative value");
375             }
376             final long sparseNumbytes = ParsingUtils.parseLongValue(sparseHeaderStrings[i + 1]);
377             if (sparseNumbytes < 0) {
378                 throw new IOException("Corrupted TAR archive. Sparse struct numbytes contains negative value");
379             }
380             sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
381         }
382         return Collections.unmodifiableList(sparseHeaders);
383     }
384 
385     /**
386      * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached.
387      *
388      * @param buffer The buffer from which to parse.
389      * @param offset The offset into the buffer from which to parse.
390      * @param length The maximum number of bytes to parse.
391      * @return The entry name.
392      */
393     public static String parseName(final byte[] buffer, final int offset, final int length) {
394         try {
395             return parseName(buffer, offset, length, DEFAULT_ENCODING);
396         } catch (final IOException ex) { // NOSONAR
397             try {
398                 return parseName(buffer, offset, length, FALLBACK_ENCODING);
399             } catch (final IOException ex2) {
400                 // impossible
401                 throw new UncheckedIOException(ex2); // NOSONAR
402             }
403         }
404     }
405 
406     /**
407      * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached.
408      *
409      * @param buffer   The buffer from which to parse.
410      * @param offset   The offset into the buffer from which to parse.
411      * @param length   The maximum number of bytes to parse.
412      * @param encoding name of the encoding to use for file names
413      * @return The entry name.
414      * @throws IOException on error
415      * @since 1.4
416      */
417     public static String parseName(final byte[] buffer, final int offset, final int length, final ZipEncoding encoding) throws IOException {
418         int len = 0;
419         for (int i = offset; len < length && buffer[i] != 0; i++) {
420             len++;
421         }
422         if (len > 0) {
423             final byte[] b = new byte[len];
424             System.arraycopy(buffer, offset, b, 0, len);
425             return encoding.decode(b);
426         }
427         return "";
428     }
429 
430     /**
431      * Parses an octal string from a buffer.
432      *
433      * <p>
434      * Leading spaces are ignored. The buffer must contain a trailing space or NUL, and may contain an additional trailing space or NUL.
435      * </p>
436      *
437      * <p>
438      * The input buffer is allowed to contain all NULs, in which case the method returns 0L (this allows for missing fields).
439      * </p>
440      *
441      * <p>
442      * To work-around some tar implementations that insert a leading NUL this method returns 0 if it detects a leading NUL since Commons Compress 1.4.
443      * </p>
444      *
445      * @param buffer The buffer from which to parse.
446      * @param offset The offset into the buffer from which to parse.
447      * @param length The maximum number of bytes to parse - must be at least 2 bytes.
448      * @return The long value of the octal string.
449      * @throws IllegalArgumentException if the trailing space/NUL is missing or if an invalid byte is detected.
450      */
451     public static long parseOctal(final byte[] buffer, final int offset, final int length) {
452         long result = 0;
453         int end = offset + length;
454         int start = offset;
455         if (length < 2) {
456             throw new IllegalArgumentException("Length " + length + " must be at least 2");
457         }
458         if (buffer[start] == 0) {
459             return 0L;
460         }
461         // Skip leading spaces
462         while (start < end) {
463             if (buffer[start] != ' ') {
464                 break;
465             }
466             start++;
467         }
468         // Trim all trailing NULs and spaces.
469         // The ustar and POSIX tar specs require a trailing NUL or
470         // space but some implementations use the extra digit for big
471         // sizes/uids/gids ...
472         byte trailer = buffer[end - 1];
473         while (start < end && (trailer == 0 || trailer == ' ')) {
474             end--;
475             trailer = buffer[end - 1];
476         }
477         for (; start < end; start++) {
478             final byte currentByte = buffer[start];
479             // CheckStyle:MagicNumber OFF
480             if (currentByte < '0' || currentByte > '7') {
481                 throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte));
482             }
483             result = (result << 3) + (currentByte - '0'); // convert from ASCII
484             // CheckStyle:MagicNumber ON
485         }
486         return result;
487     }
488 
489     /**
490      * Computes the value contained in a byte buffer. If the most significant bit of the first byte in the buffer is set, this bit is ignored and the rest of
491      * the buffer is interpreted as a binary number. Otherwise, the buffer is interpreted as an octal number as per the parseOctal function above.
492      *
493      * @param buffer The buffer from which to parse.
494      * @param offset The offset into the buffer from which to parse.
495      * @param length The maximum number of bytes to parse.
496      * @return The long value of the octal or binary string.
497      * @throws IllegalArgumentException if the trailing space/NUL is missing or an invalid byte is detected in an octal number, or if a binary number would
498      *                                  exceed the size of a signed long 64-bit integer.
499      * @since 1.4
500      */
501     public static long parseOctalOrBinary(final byte[] buffer, final int offset, final int length) {
502         if ((buffer[offset] & 0x80) == 0) {
503             return parseOctal(buffer, offset, length);
504         }
505         final boolean negative = buffer[offset] == (byte) 0xff;
506         if (length < 9) {
507             return parseBinaryLong(buffer, offset, length, negative);
508         }
509         return parseBinaryBigInteger(buffer, offset, length, negative);
510     }
511 
512     /**
513      * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
514      *
515      * <p>
516      * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
517      * </p>
518      * <p>
519      * Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You should use
520      * {@link #parseFromPAX01SparseHeaders} directly instead.
521      * </p>
522      *
523      * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
524      * @return sparse headers parsed from sparse map
525      * @deprecated use #parseFromPAX01SparseHeaders instead
526      */
527     @Deprecated
528     protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(final String sparseMap) {
529         try {
530             return parseFromPAX01SparseHeaders(sparseMap);
531         } catch (final IOException ex) {
532             throw new UncheckedIOException(ex.getMessage(), ex);
533         }
534     }
535 
536     /**
537      * For PAX Format 1.X: The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers
538      * delimited by newlines. The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are
539      * map entries, each one consisting of two numbers giving the offset and size of the data block it describes.
540      *
541      * @param inputStream parsing source.
542      * @param recordSize  The size the TAR header
543      * @return sparse headers
544      * @throws IOException if an I/O error occurs.
545      */
546     protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException {
547         // for 1.X PAX Headers
548         final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
549         long bytesRead = 0;
550         long[] readResult = readLineOfNumberForPax1x(inputStream);
551         long sparseHeadersCount = readResult[0];
552         if (sparseHeadersCount < 0) {
553             // overflow while reading number?
554             throw new IOException("Corrupted TAR archive. Negative value in sparse headers block");
555         }
556         bytesRead += readResult[1];
557         while (sparseHeadersCount-- > 0) {
558             readResult = readLineOfNumberForPax1x(inputStream);
559             final long sparseOffset = readResult[0];
560             if (sparseOffset < 0) {
561                 throw new IOException("Corrupted TAR archive. Sparse header block offset contains negative value");
562             }
563             bytesRead += readResult[1];
564 
565             readResult = readLineOfNumberForPax1x(inputStream);
566             final long sparseNumbytes = readResult[0];
567             if (sparseNumbytes < 0) {
568                 throw new IOException("Corrupted TAR archive. Sparse header block numbytes contains negative value");
569             }
570             bytesRead += readResult[1];
571             sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
572         }
573         // skip the rest of this record data
574         final long bytesToSkip = recordSize - bytesRead % recordSize;
575         IOUtils.skip(inputStream, bytesToSkip);
576         return sparseHeaders;
577     }
578 
579     /**
580      * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
581      *
582      * <pre>
583      * GNU.sparse.size=size
584      * GNU.sparse.numblocks=numblocks
585      * repeat numblocks times
586      *   GNU.sparse.offset=offset
587      *   GNU.sparse.numbytes=numbytes
588      * end repeat
589      * </pre>
590      * <p>
591      * For PAX Format 0.1, the sparse headers are stored in a single variable: GNU.sparse.map
592      * </p>
593      * <p>
594      * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
595      * </p>
596      *
597      * @param inputStream      input stream to read keys and values
598      * @param sparseHeaders    used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map
599      * @param globalPaxHeaders global PAX headers of the tar archive
600      * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
601      * @throws IOException if an I/O error occurs.
602      * @deprecated use the four-arg version instead
603      */
604     @Deprecated
605     protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders,
606             final Map<String, String> globalPaxHeaders) throws IOException {
607         return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1);
608     }
609 
610     /**
611      * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
612      *
613      * <pre>
614      * GNU.sparse.size=size
615      * GNU.sparse.numblocks=numblocks
616      * repeat numblocks times
617      *   GNU.sparse.offset=offset
618      *   GNU.sparse.numbytes=numbytes
619      * end repeat
620      * </pre>
621      * <p>
622      * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
623      * </p>
624      * <p>
625      * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
626      * </p>
627      *
628      * @param inputStream      input stream to read keys and values
629      * @param sparseHeaders    used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map
630      * @param globalPaxHeaders global PAX headers of the tar archive
631      * @param headerSize       total size of the PAX header, will be ignored if negative
632      * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
633      * @throws IOException if an I/O error occurs.
634      * @since 1.21
635      */
636     protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders,
637             final Map<String, String> globalPaxHeaders, final long headerSize) throws IOException {
638         final Map<String, String> headers = new HashMap<>(globalPaxHeaders);
639         Long offset = null;
640         // Format is "length keyword=value\n";
641         int totalRead = 0;
642         while (true) { // get length
643             int ch;
644             int len = 0;
645             int read = 0;
646             while ((ch = inputStream.read()) != -1) {
647                 read++;
648                 totalRead++;
649                 if (ch == '\n') { // blank line in header
650                     break;
651                 }
652                 if (ch == ' ') { // End of length string
653                     // Get keyword
654                     final ByteArrayOutputStream coll = new ByteArrayOutputStream();
655                     while ((ch = inputStream.read()) != -1) {
656                         read++;
657                         totalRead++;
658                         if (totalRead < 0 || headerSize >= 0 && totalRead >= headerSize) {
659                             break;
660                         }
661                         if (ch == '=') { // end of keyword
662                             final String keyword = coll.toString(StandardCharsets.UTF_8);
663                             // Get rest of entry
664                             final int restLen = len - read;
665                             if (restLen <= 1) { // only NL
666                                 headers.remove(keyword);
667                             } else if (headerSize >= 0 && restLen > headerSize - totalRead) {
668                                 throw new IOException("Paxheader value size " + restLen + " exceeds size of header record");
669                             } else {
670                                 final byte[] rest = IOUtils.readRange(inputStream, restLen);
671                                 final int got = rest.length;
672                                 if (got != restLen) {
673                                     throw new IOException("Failed to read Paxheader. Expected " + restLen + " bytes, read " + got);
674                                 }
675                                 totalRead += restLen;
676                                 // Drop trailing NL
677                                 if (rest[restLen - 1] != '\n') {
678                                     throw new IOException("Failed to read Paxheader.Value should end with a newline");
679                                 }
680                                 final String value = new String(rest, 0, restLen - 1, StandardCharsets.UTF_8);
681                                 headers.put(keyword, value);
682 
683                                 // for 0.0 PAX Headers
684                                 if (keyword.equals(TarGnuSparseKeys.OFFSET)) {
685                                     if (offset != null) {
686                                         // previous GNU.sparse.offset header but no numBytes
687                                         sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
688                                     }
689                                     try {
690                                         offset = Long.valueOf(value);
691                                     } catch (final NumberFormatException ex) {
692                                         throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains a non-numeric value");
693                                     }
694                                     if (offset < 0) {
695                                         throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains negative value");
696                                     }
697                                 }
698 
699                                 // for 0.0 PAX Headers
700                                 if (keyword.equals(TarGnuSparseKeys.NUMBYTES)) {
701                                     if (offset == null) {
702                                         throw new IOException(
703                                                 "Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " is expected before GNU.sparse.numbytes shows up.");
704                                     }
705                                     final long numbytes = ParsingUtils.parseLongValue(value);
706                                     if (numbytes < 0) {
707                                         throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.NUMBYTES + " contains negative value");
708                                     }
709                                     sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes));
710                                     offset = null;
711                                 }
712                             }
713                             break;
714                         }
715                         coll.write((byte) ch);
716                     }
717                     break; // Processed single header
718                 }
719                 // COMPRESS-530 : throw if we encounter a non-number while reading length
720                 if (ch < '0' || ch > '9') {
721                     throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length");
722                 }
723                 len *= 10;
724                 len += ch - '0';
725             }
726             if (ch == -1) { // EOF
727                 break;
728             }
729         }
730         if (offset != null) {
731             // offset but no numBytes
732             sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
733         }
734         return headers;
735     }
736 
737     /**
738      * Parses the content of a PAX 1.0 sparse block.
739      *
740      * @param buffer The buffer from which to parse.
741      * @param offset The offset into the buffer from which to parse.
742      * @return a parsed sparse struct
743      * @since 1.20
744      */
745     public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) {
746         final long sparseOffset = parseOctalOrBinary(buffer, offset, TarConstants.SPARSE_OFFSET_LEN);
747         final long sparseNumbytes = parseOctalOrBinary(buffer, offset + TarConstants.SPARSE_OFFSET_LEN, TarConstants.SPARSE_NUMBYTES_LEN);
748         return new TarArchiveStructSparse(sparseOffset, sparseNumbytes);
749     }
750 
751     /**
752      * For 1.x PAX Format, the sparse headers are stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers
753      * delimited by newlines.
754      *
755      * @param inputStream the input stream of the tar file
756      * @return the decimal number delimited by '\n', and the bytes read from input stream
757      * @throws IOException if an I/O error occurs.
758      */
759     private static long[] readLineOfNumberForPax1x(final InputStream inputStream) throws IOException {
760         int number;
761         long result = 0;
762         long bytesRead = 0;
763         while ((number = inputStream.read()) != '\n') {
764             bytesRead += 1;
765             if (number == -1) {
766                 throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format");
767             }
768             if (number < '0' || number > '9') {
769                 throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block");
770             }
771             result = result * 10 + (number - '0');
772         }
773         bytesRead += 1;
774         return new long[] { result, bytesRead };
775     }
776 
777     /**
778      * @since 1.21
779      */
780     static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries) throws IOException {
781         final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
782         for (int i = 0; i < entries; i++) {
783             try {
784                 final TarArchiveStructSparse sparseHeader = parseSparse(buffer,
785                         offset + i * (TarConstants.SPARSE_OFFSET_LEN + TarConstants.SPARSE_NUMBYTES_LEN));
786                 if (sparseHeader.getOffset() < 0) {
787                     throw new IOException("Corrupted TAR archive, sparse entry with negative offset");
788                 }
789                 if (sparseHeader.getNumbytes() < 0) {
790                     throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes");
791                 }
792                 sparseHeaders.add(sparseHeader);
793             } catch (final IllegalArgumentException ex) {
794                 // thrown internally by parseOctalOrBinary
795                 throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex);
796             }
797         }
798         return Collections.unmodifiableList(sparseHeaders);
799     }
800 
801     /**
802      * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>: <blockquote> The checksum is calculated by taking the sum of the
803      * unsigned byte values of the header block with the eight checksum bytes taken to be ASCII spaces (decimal value 32). It is stored as a six digit octal
804      * number with leading zeroes followed by a NUL and then a space. Various implementations do not adhere to this format. For better compatibility, ignore
805      * leading and trailing whitespace, and get the first six digits. In addition, some historic tar implementations treated bytes as signed. Implementations
806      * typically calculate the checksum both ways, and treat it as good if either the signed or unsigned sum matches the included checksum. </blockquote>
807      * <p>
808      * The return value of this method should be treated as a best-effort heuristic rather than an absolute and final truth. The checksum verification logic may
809      * well evolve over time as more special cases are encountered.
810      * </p>
811      *
812      * @param header tar header
813      * @return whether the checksum is reasonably good
814      * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
815      * @since 1.5
816      */
817     public static boolean verifyCheckSum(final byte[] header) {
818         final long storedSum = parseOctal(header, TarConstants.CHKSUM_OFFSET, TarConstants.CHKSUMLEN);
819         long unsignedSum = 0;
820         long signedSum = 0;
821         for (int i = 0; i < header.length; i++) {
822             byte b = header[i];
823             if (TarConstants.CHKSUM_OFFSET <= i && i < TarConstants.CHKSUM_OFFSET + TarConstants.CHKSUMLEN) {
824                 b = ' ';
825             }
826             unsignedSum += 0xff & b;
827             signedSum += b;
828         }
829         return storedSum == unsignedSum || storedSum == signedSum;
830     }
831 
832     /** Prevents instantiation. */
833     private TarUtils() {
834     }
835 
836 }