View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.archivers.tar;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.UncheckedIOException;
24  import java.math.BigInteger;
25  import java.nio.ByteBuffer;
26  import java.nio.charset.Charset;
27  import java.nio.charset.StandardCharsets;
28  import java.util.ArrayList;
29  import java.util.Collections;
30  import java.util.HashMap;
31  import java.util.List;
32  import java.util.Map;
33  
34  import org.apache.commons.compress.archivers.zip.ZipEncoding;
35  import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
36  import org.apache.commons.compress.utils.IOUtils;
37  import org.apache.commons.compress.utils.ParsingUtils;
38  import org.apache.commons.io.output.ByteArrayOutputStream;
39  
40  /**
41   * This class provides static utility methods to work with byte streams.
42   *
43   * @Immutable
44   */
45  // CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
46  public class TarUtils {
47  
48      private static final int BYTE_MASK = 255;
49  
50      static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(Charset.defaultCharset());
51  
52      /**
53       * Encapsulates the algorithms used up to Commons Compress 1.3 as ZipEncoding.
54       */
55      static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
56  
57          @Override
58          public boolean canEncode(final String name) {
59              return true;
60          }
61  
62          @Override
63          public String decode(final byte[] buffer) {
64              final int length = buffer.length;
65              final StringBuilder result = new StringBuilder(length);
66              for (final byte b : buffer) {
67                  if (b == 0) { // Trailing null
68                      break;
69                  }
70                  result.append((char) (b & 0xFF)); // Allow for sign-extension
71              }
72              return result.toString();
73          }
74  
75          @Override
76          public ByteBuffer encode(final String name) {
77              final int length = name.length();
78              final byte[] buf = new byte[length];
79              // copy until end of input or output is reached.
80              for (int i = 0; i < length; ++i) {
81                  buf[i] = (byte) name.charAt(i);
82              }
83              return ByteBuffer.wrap(buf);
84          }
85      };
86  
87      /**
88       * Computes the checksum of a tar entry header.
89       *
90       * @param buf The tar entry's header buffer.
91       * @return The computed checksum.
92       */
93      public static long computeCheckSum(final byte[] buf) {
94          long sum = 0;
95          for (final byte element : buf) {
96              sum += BYTE_MASK & element;
97          }
98          return sum;
99      }
100 
101     // Helper method to generate the exception message
102     private static String exceptionMessage(final byte[] buffer, final int offset, final int length, final int current, final byte currentByte) {
103         // default charset is good enough for an exception message,
104         //
105         // the alternative was to modify parseOctal and
106         // parseOctalOrBinary to receive the ZipEncoding of the
107         // archive (deprecating the existing public methods, of
108         // course) and dealing with the fact that ZipEncoding#decode
109         // can throw an IOException which parseOctal* doesn't declare
110         String string = new String(buffer, offset, length, Charset.defaultCharset());
111 
112         string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed
113         return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length;
114     }
115 
116     private static void formatBigIntegerBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) {
117         final BigInteger val = BigInteger.valueOf(value);
118         final byte[] b = val.toByteArray();
119         final int len = b.length;
120         if (len > length - 1) {
121             throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field.");
122         }
123         final int off = offset + length - len;
124         System.arraycopy(b, 0, buf, off, len);
125         final byte fill = (byte) (negative ? 0xff : 0);
126         for (int i = offset + 1; i < off; i++) {
127             buf[i] = fill;
128         }
129     }
130 
131     /**
132      * Writes an octal value into a buffer.
133      *
134      * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by NUL and then
135      * space.
136      *
137      * @param value  The value to convert
138      * @param buf    The destination buffer
139      * @param offset The starting offset into the buffer.
140      * @param length The size of the buffer.
141      * @return The updated value of offset, i.e. offset+length
142      * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
143      */
144     public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
145         int idx = length - 2; // for NUL and space
146         formatUnsignedOctalString(value, buf, offset, idx);
147         buf[offset + idx++] = 0; // Trailing null
148         buf[offset + idx] = (byte) ' '; // Trailing space
149         return offset + length;
150     }
151 
152     private static void formatLongBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) {
153         final int bits = (length - 1) * 8;
154         final long max = 1L << bits;
155         long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE
156         if (val < 0 || val >= max) {
157             throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field.");
158         }
159         if (negative) {
160             val ^= max - 1;
161             val++;
162             val |= 0xffL << bits;
163         }
164         for (int i = offset + length - 1; i >= offset; i--) {
165             buf[i] = (byte) val;
166             val >>= 8;
167         }
168     }
169 
170     /**
171      * Writes an octal long integer into a buffer.
172      *
173      * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space.
174      *
175      * @param value  The value to write as octal
176      * @param buf    The destinationbuffer.
177      * @param offset The starting offset into the buffer.
178      * @param length The length of the buffer
179      * @return The updated offset
180      * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
181      */
182     public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
183         final int idx = length - 1; // For space
184         formatUnsignedOctalString(value, buf, offset, idx);
185         buf[offset + idx] = (byte) ' '; // Trailing space
186         return offset + length;
187     }
188 
189     /**
190      * Writes a long integer into a buffer as an octal string if this will fit, or as a binary number otherwise.
191      *
192      * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space.
193      *
194      * @param value  The value to write into the buffer.
195      * @param buf    The destination buffer.
196      * @param offset The starting offset into the buffer.
197      * @param length The length of the buffer.
198      * @return The updated offset.
199      * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer.
200      * @since 1.4
201      */
202     public static int formatLongOctalOrBinaryBytes(final long value, final byte[] buf, final int offset, final int length) {
203         // Check whether we are dealing with UID/GID or SIZE field
204         final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
205         final boolean negative = value < 0;
206         if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
207             return formatLongOctalBytes(value, buf, offset, length);
208         }
209         if (length < 9) {
210             formatLongBinary(value, buf, offset, length, negative);
211         } else {
212             formatBigIntegerBinary(value, buf, offset, length, negative);
213         }
214         buf[offset] = (byte) (negative ? 0xff : 0x80);
215         return offset + length;
216     }
217 
218     /**
219      * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the
220      * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated.
221      *
222      * @param name   The header name from which to copy the characters.
223      * @param buf    The buffer where the name is to be stored.
224      * @param offset The starting offset into the buffer
225      * @param length The maximum number of header bytes to copy.
226      * @return The updated offset, i.e. offset + length
227      */
228     public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
229         try {
230             return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
231         } catch (final IOException ex) { // NOSONAR
232             try {
233                 return formatNameBytes(name, buf, offset, length, FALLBACK_ENCODING);
234             } catch (final IOException ex2) {
235                 // impossible
236                 throw new UncheckedIOException(ex2); // NOSONAR
237             }
238         }
239     }
240 
241     /**
242      * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the
243      * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated.
244      *
245      * @param name     The header name from which to copy the characters.
246      * @param buf      The buffer where the name is to be stored.
247      * @param offset   The starting offset into the buffer
248      * @param length   The maximum number of header bytes to copy.
249      * @param encoding name of the encoding to use for file names
250      * @since 1.4
251      * @return The updated offset, i.e. offset + length
252      * @throws IOException on error
253      */
254     public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length, final ZipEncoding encoding) throws IOException {
255         int len = name.length();
256         ByteBuffer b = encoding.encode(name);
257         while (b.limit() > length && len > 0) {
258             b = encoding.encode(name.substring(0, --len));
259         }
260         final int limit = b.limit() - b.position();
261         System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
262         // Pad any remaining output bytes with NUL
263         for (int i = limit; i < length; ++i) {
264             buf[offset + i] = 0;
265         }
266         return offset + length;
267     }
268 
269     /**
270      * Writes an octal integer into a buffer.
271      *
272      * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by space and NUL
273      *
274      * @param value  The value to write
275      * @param buf    The buffer to receive the output
276      * @param offset The starting offset into the buffer
277      * @param length The size of the output buffer
278      * @return The updated offset, i.e. offset+length
279      * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
280      */
281     public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
282         int idx = length - 2; // For space and trailing null
283         formatUnsignedOctalString(value, buf, offset, idx);
284         buf[offset + idx++] = (byte) ' '; // Trailing space
285         buf[offset + idx] = 0; // Trailing null
286         return offset + length;
287     }
288 
289     /**
290      * Fills a buffer with unsigned octal number, padded with leading zeroes.
291      *
292      * @param value  number to convert to octal - treated as unsigned
293      * @param buffer destination buffer
294      * @param offset starting offset in buffer
295      * @param length length of buffer to fill
296      * @throws IllegalArgumentException if the value will not fit in the buffer
297      */
298     public static void formatUnsignedOctalString(final long value, final byte[] buffer, final int offset, final int length) {
299         int remaining = length;
300         remaining--;
301         if (value == 0) {
302             buffer[offset + remaining--] = (byte) '0';
303         } else {
304             long val = value;
305             for (; remaining >= 0 && val != 0; --remaining) {
306                 // CheckStyle:MagicNumber OFF
307                 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
308                 val = val >>> 3;
309                 // CheckStyle:MagicNumber ON
310             }
311             if (val != 0) {
312                 throw new IllegalArgumentException(value + "=" + Long.toOctalString(value) + " will not fit in octal number buffer of length " + length);
313             }
314         }
315 
316         for (; remaining >= 0; --remaining) { // leading zeros
317             buffer[offset + remaining] = (byte) '0';
318         }
319     }
320 
321     private static long parseBinaryBigInteger(final byte[] buffer, final int offset, final int length, final boolean negative) {
322         final byte[] remainder = new byte[length - 1];
323         System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
324         BigInteger val = new BigInteger(remainder);
325         if (negative) {
326             // 2's complement
327             val = val.add(BigInteger.valueOf(-1)).not();
328         }
329         if (val.bitLength() > 63) {
330             throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number exceeds maximum signed long value");
331         }
332         return negative ? -val.longValue() : val.longValue();
333     }
334 
335     private static long parseBinaryLong(final byte[] buffer, final int offset, final int length, final boolean negative) {
336         if (length >= 9) {
337             throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number exceeds maximum signed long value");
338         }
339         long val = 0;
340         for (int i = 1; i < length; i++) {
341             val = (val << 8) + (buffer[offset + i] & 0xff);
342         }
343         if (negative) {
344             // 2's complement
345             val--;
346             val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
347         }
348         return negative ? -val : val;
349     }
350 
351     /**
352      * Parses a boolean byte from a buffer. Leading spaces and NUL are ignored. The buffer may contain trailing spaces or NULs.
353      *
354      * @param buffer The buffer from which to parse.
355      * @param offset The offset into the buffer from which to parse.
356      * @return The boolean value of the bytes.
357      * @throws IllegalArgumentException if an invalid byte is detected.
358      */
359     public static boolean parseBoolean(final byte[] buffer, final int offset) {
360         return buffer[offset] == 1;
361     }
362 
363     /**
364      * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map GNU.sparse.map Map of non-null data chunks. It is a string
365      * consisting of comma-separated values "offset,size[,offset-1,size-1...]"
366      *
367      * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
368      * @return unmodifiable list of sparse headers parsed from sparse map
369      * @throws IOException Corrupted TAR archive.
370      * @since 1.21
371      */
372     protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(final String sparseMap) throws IOException {
373         final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
374         final String[] sparseHeaderStrings = sparseMap.split(",");
375         if (sparseHeaderStrings.length % 2 == 1) {
376             throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header");
377         }
378         for (int i = 0; i < sparseHeaderStrings.length; i += 2) {
379             final long sparseOffset = ParsingUtils.parseLongValue(sparseHeaderStrings[i]);
380             if (sparseOffset < 0) {
381                 throw new IOException("Corrupted TAR archive. Sparse struct offset contains negative value");
382             }
383             final long sparseNumbytes = ParsingUtils.parseLongValue(sparseHeaderStrings[i + 1]);
384             if (sparseNumbytes < 0) {
385                 throw new IOException("Corrupted TAR archive. Sparse struct numbytes contains negative value");
386             }
387             sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
388         }
389         return Collections.unmodifiableList(sparseHeaders);
390     }
391 
392     /**
393      * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached.
394      *
395      * @param buffer The buffer from which to parse.
396      * @param offset The offset into the buffer from which to parse.
397      * @param length The maximum number of bytes to parse.
398      * @return The entry name.
399      */
400     public static String parseName(final byte[] buffer, final int offset, final int length) {
401         try {
402             return parseName(buffer, offset, length, DEFAULT_ENCODING);
403         } catch (final IOException ex) { // NOSONAR
404             try {
405                 return parseName(buffer, offset, length, FALLBACK_ENCODING);
406             } catch (final IOException ex2) {
407                 // impossible
408                 throw new UncheckedIOException(ex2); // NOSONAR
409             }
410         }
411     }
412 
413     /**
414      * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached.
415      *
416      * @param buffer   The buffer from which to parse.
417      * @param offset   The offset into the buffer from which to parse.
418      * @param length   The maximum number of bytes to parse.
419      * @param encoding name of the encoding to use for file names
420      * @since 1.4
421      * @return The entry name.
422      * @throws IOException on error
423      */
424     public static String parseName(final byte[] buffer, final int offset, final int length, final ZipEncoding encoding) throws IOException {
425         int len = 0;
426         for (int i = offset; len < length && buffer[i] != 0; i++) {
427             len++;
428         }
429         if (len > 0) {
430             final byte[] b = new byte[len];
431             System.arraycopy(buffer, offset, b, 0, len);
432             return encoding.decode(b);
433         }
434         return "";
435     }
436 
437     /**
438      * Parses an octal string from a buffer.
439      *
440      * <p>
441      * Leading spaces are ignored. The buffer must contain a trailing space or NUL, and may contain an additional trailing space or NUL.
442      * </p>
443      *
444      * <p>
445      * The input buffer is allowed to contain all NULs, in which case the method returns 0L (this allows for missing fields).
446      * </p>
447      *
448      * <p>
449      * To work-around some tar implementations that insert a leading NUL this method returns 0 if it detects a leading NUL since Commons Compress 1.4.
450      * </p>
451      *
452      * @param buffer The buffer from which to parse.
453      * @param offset The offset into the buffer from which to parse.
454      * @param length The maximum number of bytes to parse - must be at least 2 bytes.
455      * @return The long value of the octal string.
456      * @throws IllegalArgumentException if the trailing space/NUL is missing or if an invalid byte is detected.
457      */
458     public static long parseOctal(final byte[] buffer, final int offset, final int length) {
459         long result = 0;
460         int end = offset + length;
461         int start = offset;
462         if (length < 2) {
463             throw new IllegalArgumentException("Length " + length + " must be at least 2");
464         }
465         if (buffer[start] == 0) {
466             return 0L;
467         }
468         // Skip leading spaces
469         while (start < end) {
470             if (buffer[start] != ' ') {
471                 break;
472             }
473             start++;
474         }
475         // Trim all trailing NULs and spaces.
476         // The ustar and POSIX tar specs require a trailing NUL or
477         // space but some implementations use the extra digit for big
478         // sizes/uids/gids ...
479         byte trailer = buffer[end - 1];
480         while (start < end && (trailer == 0 || trailer == ' ')) {
481             end--;
482             trailer = buffer[end - 1];
483         }
484         for (; start < end; start++) {
485             final byte currentByte = buffer[start];
486             // CheckStyle:MagicNumber OFF
487             if (currentByte < '0' || currentByte > '7') {
488                 throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte));
489             }
490             result = (result << 3) + (currentByte - '0'); // convert from ASCII
491             // CheckStyle:MagicNumber ON
492         }
493         return result;
494     }
495 
496     /**
497      * Computes the value contained in a byte buffer. If the most significant bit of the first byte in the buffer is set, this bit is ignored and the rest of
498      * the buffer is interpreted as a binary number. Otherwise, the buffer is interpreted as an octal number as per the parseOctal function above.
499      *
500      * @param buffer The buffer from which to parse.
501      * @param offset The offset into the buffer from which to parse.
502      * @param length The maximum number of bytes to parse.
503      * @return The long value of the octal or binary string.
504      * @throws IllegalArgumentException if the trailing space/NUL is missing or an invalid byte is detected in an octal number, or if a binary number would
505      *                                  exceed the size of a signed long 64-bit integer.
506      * @since 1.4
507      */
508     public static long parseOctalOrBinary(final byte[] buffer, final int offset, final int length) {
509         if ((buffer[offset] & 0x80) == 0) {
510             return parseOctal(buffer, offset, length);
511         }
512         final boolean negative = buffer[offset] == (byte) 0xff;
513         if (length < 9) {
514             return parseBinaryLong(buffer, offset, length, negative);
515         }
516         return parseBinaryBigInteger(buffer, offset, length, negative);
517     }
518 
519     /**
520      * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
521      *
522      * <p>
523      * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
524      * </p>
525      * <p>
526      * Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You should use
527      * {@link #parseFromPAX01SparseHeaders} directly instead.
528      * </p>
529      *
530      * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
531      * @return sparse headers parsed from sparse map
532      * @deprecated use #parseFromPAX01SparseHeaders instead
533      */
534     @Deprecated
535     protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(final String sparseMap) {
536         try {
537             return parseFromPAX01SparseHeaders(sparseMap);
538         } catch (final IOException ex) {
539             throw new UncheckedIOException(ex.getMessage(), ex);
540         }
541     }
542 
543     /**
544      * For PAX Format 1.X: The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers
545      * delimited by newlines. The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are
546      * map entries, each one consisting of two numbers giving the offset and size of the data block it describes.
547      *
548      * @param inputStream parsing source.
549      * @param recordSize  The size the TAR header
550      * @return sparse headers
551      * @throws IOException if an I/O error occurs.
552      */
553     protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException {
554         // for 1.X PAX Headers
555         final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
556         long bytesRead = 0;
557         long[] readResult = readLineOfNumberForPax1X(inputStream);
558         long sparseHeadersCount = readResult[0];
559         if (sparseHeadersCount < 0) {
560             // overflow while reading number?
561             throw new IOException("Corrupted TAR archive. Negative value in sparse headers block");
562         }
563         bytesRead += readResult[1];
564         while (sparseHeadersCount-- > 0) {
565             readResult = readLineOfNumberForPax1X(inputStream);
566             final long sparseOffset = readResult[0];
567             if (sparseOffset < 0) {
568                 throw new IOException("Corrupted TAR archive. Sparse header block offset contains negative value");
569             }
570             bytesRead += readResult[1];
571 
572             readResult = readLineOfNumberForPax1X(inputStream);
573             final long sparseNumbytes = readResult[0];
574             if (sparseNumbytes < 0) {
575                 throw new IOException("Corrupted TAR archive. Sparse header block numbytes contains negative value");
576             }
577             bytesRead += readResult[1];
578             sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
579         }
580         // skip the rest of this record data
581         final long bytesToSkip = recordSize - bytesRead % recordSize;
582         org.apache.commons.io.IOUtils.skip(inputStream, bytesToSkip);
583         return sparseHeaders;
584     }
585 
586     /**
587      * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
588      *
589      * <pre>
590      * GNU.sparse.size=size
591      * GNU.sparse.numblocks=numblocks
592      * repeat numblocks times
593      *   GNU.sparse.offset=offset
594      *   GNU.sparse.numbytes=numbytes
595      * end repeat
596      * </pre>
597      * <p>
598      * For PAX Format 0.1, the sparse headers are stored in a single variable: GNU.sparse.map
599      * </p>
600      * <p>
601      * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
602      * </p>
603      *
604      * @param inputStream      input stream to read keys and values
605      * @param sparseHeaders    used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map
606      * @param globalPaxHeaders global PAX headers of the tar archive
607      * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
608      * @throws IOException if an I/O error occurs.
609      * @deprecated use the four-arg version instead
610      */
611     @Deprecated
612     protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders,
613             final Map<String, String> globalPaxHeaders) throws IOException {
614         return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1);
615     }
616 
617     /**
618      * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
619      *
620      * <pre>
621      * GNU.sparse.size=size
622      * GNU.sparse.numblocks=numblocks
623      * repeat numblocks times
624      *   GNU.sparse.offset=offset
625      *   GNU.sparse.numbytes=numbytes
626      * end repeat
627      * </pre>
628      * <p>
629      * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
630      * </p>
631      * <p>
632      * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
633      * </p>
634      *
635      * @param inputStream      input stream to read keys and values
636      * @param sparseHeaders    used in PAX Format 0.0 &amp; 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map
637      * @param globalPaxHeaders global PAX headers of the tar archive
638      * @param headerSize       total size of the PAX header, will be ignored if negative
639      * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
640      * @throws IOException if an I/O error occurs.
641      * @since 1.21
642      */
643     protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders,
644             final Map<String, String> globalPaxHeaders, final long headerSize) throws IOException {
645         final Map<String, String> headers = new HashMap<>(globalPaxHeaders);
646         Long offset = null;
647         // Format is "length keyword=value\n";
648         int totalRead = 0;
649         while (true) { // get length
650             int ch;
651             int len = 0;
652             int read = 0;
653             while ((ch = inputStream.read()) != -1) {
654                 read++;
655                 totalRead++;
656                 if (ch == '\n') { // blank line in header
657                     break;
658                 }
659                 if (ch == ' ') { // End of length string
660                     // Get keyword
661                     final ByteArrayOutputStream coll = new ByteArrayOutputStream();
662                     while ((ch = inputStream.read()) != -1) {
663                         read++;
664                         totalRead++;
665                         if (totalRead < 0 || headerSize >= 0 && totalRead >= headerSize) {
666                             break;
667                         }
668                         if (ch == '=') { // end of keyword
669                             final String keyword = coll.toString(StandardCharsets.UTF_8);
670                             // Get rest of entry
671                             final int restLen = len - read;
672                             if (restLen <= 1) { // only NL
673                                 headers.remove(keyword);
674                             } else if (headerSize >= 0 && restLen > headerSize - totalRead) {
675                                 throw new IOException("Paxheader value size " + restLen + " exceeds size of header record");
676                             } else {
677                                 final byte[] rest = IOUtils.readRange(inputStream, restLen);
678                                 final int got = rest.length;
679                                 if (got != restLen) {
680                                     throw new IOException("Failed to read Paxheader. Expected " + restLen + " bytes, read " + got);
681                                 }
682                                 totalRead += restLen;
683                                 // Drop trailing NL
684                                 if (rest[restLen - 1] != '\n') {
685                                     throw new IOException("Failed to read Paxheader." + "Value should end with a newline");
686                                 }
687                                 final String value = new String(rest, 0, restLen - 1, StandardCharsets.UTF_8);
688                                 headers.put(keyword, value);
689 
690                                 // for 0.0 PAX Headers
691                                 if (keyword.equals(TarGnuSparseKeys.OFFSET)) {
692                                     if (offset != null) {
693                                         // previous GNU.sparse.offset header but no numBytes
694                                         sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
695                                     }
696                                     try {
697                                         offset = Long.valueOf(value);
698                                     } catch (final NumberFormatException ex) {
699                                         throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains a non-numeric value");
700                                     }
701                                     if (offset < 0) {
702                                         throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains negative value");
703                                     }
704                                 }
705 
706                                 // for 0.0 PAX Headers
707                                 if (keyword.equals(TarGnuSparseKeys.NUMBYTES)) {
708                                     if (offset == null) {
709                                         throw new IOException(
710                                                 "Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " is expected before GNU.sparse.numbytes shows up.");
711                                     }
712                                     final long numbytes = ParsingUtils.parseLongValue(value);
713                                     if (numbytes < 0) {
714                                         throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.NUMBYTES + " contains negative value");
715                                     }
716                                     sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes));
717                                     offset = null;
718                                 }
719                             }
720                             break;
721                         }
722                         coll.write((byte) ch);
723                     }
724                     break; // Processed single header
725                 }
726                 // COMPRESS-530 : throw if we encounter a non-number while reading length
727                 if (ch < '0' || ch > '9') {
728                     throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length");
729                 }
730                 len *= 10;
731                 len += ch - '0';
732             }
733             if (ch == -1) { // EOF
734                 break;
735             }
736         }
737         if (offset != null) {
738             // offset but no numBytes
739             sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
740         }
741         return headers;
742     }
743 
744     /**
745      * Parses the content of a PAX 1.0 sparse block.
746      *
747      * @since 1.20
748      * @param buffer The buffer from which to parse.
749      * @param offset The offset into the buffer from which to parse.
750      * @return a parsed sparse struct
751      */
752     public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) {
753         final long sparseOffset = parseOctalOrBinary(buffer, offset, TarConstants.SPARSE_OFFSET_LEN);
754         final long sparseNumbytes = parseOctalOrBinary(buffer, offset + TarConstants.SPARSE_OFFSET_LEN, TarConstants.SPARSE_NUMBYTES_LEN);
755         return new TarArchiveStructSparse(sparseOffset, sparseNumbytes);
756     }
757 
758     /**
759      * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers
760      * delimited by newlines.
761      *
762      * @param inputStream the input stream of the tar file
763      * @return the decimal number delimited by '\n', and the bytes read from input stream
764      * @throws IOException
765      */
766     private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException {
767         int number;
768         long result = 0;
769         long bytesRead = 0;
770         while ((number = inputStream.read()) != '\n') {
771             bytesRead += 1;
772             if (number == -1) {
773                 throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format");
774             }
775             if (number < '0' || number > '9') {
776                 throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block");
777             }
778             result = result * 10 + (number - '0');
779         }
780         bytesRead += 1;
781         return new long[] { result, bytesRead };
782     }
783 
784     /**
785      * @since 1.21
786      */
787     static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries) throws IOException {
788         final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
789         for (int i = 0; i < entries; i++) {
790             try {
791                 final TarArchiveStructSparse sparseHeader = parseSparse(buffer,
792                         offset + i * (TarConstants.SPARSE_OFFSET_LEN + TarConstants.SPARSE_NUMBYTES_LEN));
793                 if (sparseHeader.getOffset() < 0) {
794                     throw new IOException("Corrupted TAR archive, sparse entry with negative offset");
795                 }
796                 if (sparseHeader.getNumbytes() < 0) {
797                     throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes");
798                 }
799                 sparseHeaders.add(sparseHeader);
800             } catch (final IllegalArgumentException ex) {
801                 // thrown internally by parseOctalOrBinary
802                 throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex);
803             }
804         }
805         return Collections.unmodifiableList(sparseHeaders);
806     }
807 
808     /**
809      * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>: <blockquote> The checksum is calculated by taking the sum of the
810      * unsigned byte values of the header block with the eight checksum bytes taken to be ASCII spaces (decimal value 32). It is stored as a six digit octal
811      * number with leading zeroes followed by a NUL and then a space. Various implementations do not adhere to this format. For better compatibility, ignore
812      * leading and trailing whitespace, and get the first six digits. In addition, some historic tar implementations treated bytes as signed. Implementations
813      * typically calculate the checksum both ways, and treat it as good if either the signed or unsigned sum matches the included checksum. </blockquote>
814      * <p>
815      * The return value of this method should be treated as a best-effort heuristic rather than an absolute and final truth. The checksum verification logic may
816      * well evolve over time as more special cases are encountered.
817      * </p>
818      *
819      * @param header tar header
820      * @return whether the checksum is reasonably good
821      * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
822      * @since 1.5
823      */
824     public static boolean verifyCheckSum(final byte[] header) {
825         final long storedSum = parseOctal(header, TarConstants.CHKSUM_OFFSET, TarConstants.CHKSUMLEN);
826         long unsignedSum = 0;
827         long signedSum = 0;
828         for (int i = 0; i < header.length; i++) {
829             byte b = header[i];
830             if (TarConstants.CHKSUM_OFFSET <= i && i < TarConstants.CHKSUM_OFFSET + TarConstants.CHKSUMLEN) {
831                 b = ' ';
832             }
833             unsignedSum += 0xff & b;
834             signedSum += b;
835         }
836         return storedSum == unsignedSum || storedSum == signedSum;
837     }
838 
839     /** Prevents instantiation. */
840     private TarUtils() {
841     }
842 
843 }