1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * https://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.commons.compress.archivers.tar;
20
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.io.UncheckedIOException;
24 import java.math.BigInteger;
25 import java.nio.ByteBuffer;
26 import java.nio.charset.Charset;
27 import java.nio.charset.StandardCharsets;
28 import java.util.ArrayList;
29 import java.util.Arrays;
30 import java.util.Collections;
31 import java.util.HashMap;
32 import java.util.List;
33 import java.util.Map;
34
35 import org.apache.commons.compress.archivers.zip.ZipEncoding;
36 import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
37 import org.apache.commons.compress.utils.IOUtils;
38 import org.apache.commons.compress.utils.ParsingUtils;
39 import org.apache.commons.io.output.ByteArrayOutputStream;
40
41 /**
42 * This class provides static utility methods to work with byte streams.
43 *
44 * @Immutable
45 */
46 // CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
47 public class TarUtils {
48
49 private static final BigInteger NEG_1_BIG_INT = BigInteger.valueOf(-1);
50
51 private static final int BYTE_MASK = 255;
52
53 static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(Charset.defaultCharset());
54
55 /**
56 * Encapsulates the algorithms used up to Commons Compress 1.3 as ZipEncoding.
57 */
58 static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
59
60 @Override
61 public boolean canEncode(final String name) {
62 return true;
63 }
64
65 @Override
66 public String decode(final byte[] buffer) {
67 final int length = buffer.length;
68 final StringBuilder result = new StringBuilder(length);
69 for (final byte b : buffer) {
70 if (b == 0) { // Trailing null
71 break;
72 }
73 result.append((char) (b & 0xFF)); // Allow for sign-extension
74 }
75 return result.toString();
76 }
77
78 @Override
79 public ByteBuffer encode(final String name) {
80 return ByteBuffer.wrap(name.getBytes(StandardCharsets.US_ASCII));
81 }
82 };
83
84 /**
85 * Computes the checksum of a tar entry header.
86 *
87 * @param buf The tar entry's header buffer.
88 * @return The computed checksum.
89 */
90 public static long computeCheckSum(final byte[] buf) {
91 long sum = 0;
92 for (final byte element : buf) {
93 sum += BYTE_MASK & element;
94 }
95 return sum;
96 }
97
98 /*
99 * Generates an exception message.
100 */
101 private static String exceptionMessage(final byte[] buffer, final int offset, final int length, final int current, final byte currentByte) {
102 // default charset is good enough for an exception message,
103 //
104 // the alternative was to modify parseOctal and
105 // parseOctalOrBinary to receive the ZipEncoding of the
106 // archive (deprecating the existing public methods, of
107 // course) and dealing with the fact that ZipEncoding#decode
108 // can throw an IOException which parseOctal* doesn't declare
109 String string = new String(buffer, offset, length, Charset.defaultCharset());
110 string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed
111 return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length;
112 }
113
114 private static void formatBigIntegerBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) {
115 final BigInteger val = BigInteger.valueOf(value);
116 final byte[] b = val.toByteArray();
117 final int len = b.length;
118 if (len > length - 1) {
119 throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field.");
120 }
121 final int off = offset + length - len;
122 System.arraycopy(b, 0, buf, off, len);
123 Arrays.fill(buf, offset + 1, off, (byte) (negative ? 0xff : 0));
124 }
125
126 /**
127 * Writes an octal value into a buffer.
128 *
129 * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by NUL and then
130 * space.
131 *
132 * @param value The value to convert
133 * @param buf The destination buffer
134 * @param offset The starting offset into the buffer.
135 * @param length The size of the buffer.
136 * @return The updated value of offset, i.e. offset+length
137 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
138 */
139 public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
140 int idx = length - 2; // for NUL and space
141 formatUnsignedOctalString(value, buf, offset, idx);
142 buf[offset + idx++] = 0; // Trailing null
143 buf[offset + idx] = (byte) ' '; // Trailing space
144 return offset + length;
145 }
146
147 private static void formatLongBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) {
148 final int bits = (length - 1) * 8;
149 final long max = 1L << bits;
150 long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE
151 if (val < 0 || val >= max) {
152 throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field.");
153 }
154 if (negative) {
155 val ^= max - 1;
156 val++;
157 val |= 0xffL << bits;
158 }
159 for (int i = offset + length - 1; i >= offset; i--) {
160 buf[i] = (byte) val;
161 val >>= 8;
162 }
163 }
164
165 /**
166 * Writes an octal long integer into a buffer.
167 *
168 * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space.
169 *
170 * @param value The value to write as octal
171 * @param buf The destinationbuffer.
172 * @param offset The starting offset into the buffer.
173 * @param length The length of the buffer
174 * @return The updated offset
175 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
176 */
177 public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
178 final int idx = length - 1; // For space
179 formatUnsignedOctalString(value, buf, offset, idx);
180 buf[offset + idx] = (byte) ' '; // Trailing space
181 return offset + length;
182 }
183
184 /**
185 * Writes a long integer into a buffer as an octal string if this will fit, or as a binary number otherwise.
186 *
187 * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space.
188 *
189 * @param value The value to write into the buffer.
190 * @param buf The destination buffer.
191 * @param offset The starting offset into the buffer.
192 * @param length The length of the buffer.
193 * @return The updated offset.
194 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer.
195 * @since 1.4
196 */
197 public static int formatLongOctalOrBinaryBytes(final long value, final byte[] buf, final int offset, final int length) {
198 // Check whether we are dealing with UID/GID or SIZE field
199 final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
200 final boolean negative = value < 0;
201 if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
202 return formatLongOctalBytes(value, buf, offset, length);
203 }
204 if (length < 9) {
205 formatLongBinary(value, buf, offset, length, negative);
206 } else {
207 formatBigIntegerBinary(value, buf, offset, length, negative);
208 }
209 buf[offset] = (byte) (negative ? 0xff : 0x80);
210 return offset + length;
211 }
212
213 /**
214 * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the
215 * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated.
216 *
217 * @param name The header name from which to copy the characters.
218 * @param buf The buffer where the name is to be stored.
219 * @param offset The starting offset into the buffer
220 * @param length The maximum number of header bytes to copy.
221 * @return The updated offset, i.e. offset + length
222 */
223 public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
224 try {
225 return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
226 } catch (final IOException ex) { // NOSONAR
227 try {
228 return formatNameBytes(name, buf, offset, length, FALLBACK_ENCODING);
229 } catch (final IOException ex2) {
230 // impossible
231 throw new UncheckedIOException(ex2); // NOSONAR
232 }
233 }
234 }
235
236 /**
237 * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the
238 * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated.
239 *
240 * @param name The header name from which to copy the characters.
241 * @param buf The buffer where the name is to be stored.
242 * @param offset The starting offset into the buffer
243 * @param length The maximum number of header bytes to copy.
244 * @param encoding name of the encoding to use for file names
245 * @return The updated offset, i.e. offset + length
246 * @throws IOException on error
247 * @since 1.4
248 */
249 public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length, final ZipEncoding encoding) throws IOException {
250 int len = name.length();
251 ByteBuffer b = encoding.encode(name);
252 while (b.limit() > length && len > 0) {
253 b = encoding.encode(name.substring(0, --len));
254 }
255 final int limit = b.limit() - b.position();
256 System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
257 // Pad any remaining output bytes with NUL
258 Arrays.fill(buf, offset + limit, offset + length, (byte) 0);
259 return offset + length;
260 }
261
262 /**
263 * Writes an octal integer into a buffer.
264 *
265 * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by space and NUL
266 *
267 * @param value The value to write
268 * @param buf The buffer to receive the output
269 * @param offset The starting offset into the buffer
270 * @param length The size of the output buffer
271 * @return The updated offset, i.e. offset+length
272 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
273 */
274 public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
275 int idx = length - 2; // For space and trailing null
276 formatUnsignedOctalString(value, buf, offset, idx);
277 buf[offset + idx++] = (byte) ' '; // Trailing space
278 buf[offset + idx] = 0; // Trailing null
279 return offset + length;
280 }
281
282 /**
283 * Fills a buffer with unsigned octal number, padded with leading zeroes.
284 *
285 * @param value number to convert to octal - treated as unsigned
286 * @param buffer destination buffer
287 * @param offset starting offset in buffer
288 * @param length length of buffer to fill
289 * @throws IllegalArgumentException if the value will not fit in the buffer
290 */
291 public static void formatUnsignedOctalString(final long value, final byte[] buffer, final int offset, final int length) {
292 int remaining = length;
293 remaining--;
294 if (value == 0) {
295 buffer[offset + remaining--] = (byte) '0';
296 } else {
297 long val = value;
298 for (; remaining >= 0 && val != 0; --remaining) {
299 // CheckStyle:MagicNumber OFF
300 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
301 val = val >>> 3;
302 // CheckStyle:MagicNumber ON
303 }
304 if (val != 0) {
305 throw new IllegalArgumentException(value + "=" + Long.toOctalString(value) + " will not fit in octal number buffer of length " + length);
306 }
307 }
308 for (; remaining >= 0; --remaining) { // leading zeros
309 buffer[offset + remaining] = (byte) '0';
310 }
311 Arrays.fill(buffer, offset, offset + remaining + 1, (byte) '0');
312 }
313
314 private static long parseBinaryBigInteger(final byte[] buffer, final int offset, final int length, final boolean negative) {
315 final byte[] remainder = new byte[length - 1];
316 System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
317 BigInteger val = new BigInteger(remainder);
318 if (negative) {
319 // 2's complement
320 val = val.add(NEG_1_BIG_INT).not();
321 }
322 if (val.bitLength() > 63) {
323 throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number exceeds maximum signed long value");
324 }
325 return negative ? -val.longValue() : val.longValue();
326 }
327
328 private static long parseBinaryLong(final byte[] buffer, final int offset, final int length, final boolean negative) {
329 if (length >= 9) {
330 throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number exceeds maximum signed long value");
331 }
332 long val = 0;
333 for (int i = 1; i < length; i++) {
334 val = (val << 8) + (buffer[offset + i] & 0xff);
335 }
336 if (negative) {
337 // 2's complement
338 val--;
339 val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
340 }
341 return negative ? -val : val;
342 }
343
344 /**
345 * Parses a boolean byte from a buffer. Leading spaces and NUL are ignored. The buffer may contain trailing spaces or NULs.
346 *
347 * @param buffer The buffer from which to parse.
348 * @param offset The offset into the buffer from which to parse.
349 * @return The boolean value of the bytes.
350 * @throws IllegalArgumentException if an invalid byte is detected.
351 */
352 public static boolean parseBoolean(final byte[] buffer, final int offset) {
353 return buffer[offset] == 1;
354 }
355
356 /**
357 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map GNU.sparse.map Map of non-null data chunks. It is a string
358 * consisting of comma-separated values "offset,size[,offset-1,size-1...]"
359 *
360 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
361 * @return unmodifiable list of sparse headers parsed from sparse map
362 * @throws IOException Corrupted TAR archive.
363 * @since 1.21
364 */
365 protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(final String sparseMap) throws IOException {
366 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
367 final String[] sparseHeaderStrings = sparseMap.split(",");
368 if (sparseHeaderStrings.length % 2 == 1) {
369 throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header");
370 }
371 for (int i = 0; i < sparseHeaderStrings.length; i += 2) {
372 final long sparseOffset = ParsingUtils.parseLongValue(sparseHeaderStrings[i]);
373 if (sparseOffset < 0) {
374 throw new IOException("Corrupted TAR archive. Sparse struct offset contains negative value");
375 }
376 final long sparseNumbytes = ParsingUtils.parseLongValue(sparseHeaderStrings[i + 1]);
377 if (sparseNumbytes < 0) {
378 throw new IOException("Corrupted TAR archive. Sparse struct numbytes contains negative value");
379 }
380 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
381 }
382 return Collections.unmodifiableList(sparseHeaders);
383 }
384
385 /**
386 * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached.
387 *
388 * @param buffer The buffer from which to parse.
389 * @param offset The offset into the buffer from which to parse.
390 * @param length The maximum number of bytes to parse.
391 * @return The entry name.
392 */
393 public static String parseName(final byte[] buffer, final int offset, final int length) {
394 try {
395 return parseName(buffer, offset, length, DEFAULT_ENCODING);
396 } catch (final IOException ex) { // NOSONAR
397 try {
398 return parseName(buffer, offset, length, FALLBACK_ENCODING);
399 } catch (final IOException ex2) {
400 // impossible
401 throw new UncheckedIOException(ex2); // NOSONAR
402 }
403 }
404 }
405
406 /**
407 * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached.
408 *
409 * @param buffer The buffer from which to parse.
410 * @param offset The offset into the buffer from which to parse.
411 * @param length The maximum number of bytes to parse.
412 * @param encoding name of the encoding to use for file names
413 * @return The entry name.
414 * @throws IOException on error
415 * @since 1.4
416 */
417 public static String parseName(final byte[] buffer, final int offset, final int length, final ZipEncoding encoding) throws IOException {
418 int len = 0;
419 for (int i = offset; len < length && buffer[i] != 0; i++) {
420 len++;
421 }
422 if (len > 0) {
423 final byte[] b = new byte[len];
424 System.arraycopy(buffer, offset, b, 0, len);
425 return encoding.decode(b);
426 }
427 return "";
428 }
429
430 /**
431 * Parses an octal string from a buffer.
432 *
433 * <p>
434 * Leading spaces are ignored. The buffer must contain a trailing space or NUL, and may contain an additional trailing space or NUL.
435 * </p>
436 *
437 * <p>
438 * The input buffer is allowed to contain all NULs, in which case the method returns 0L (this allows for missing fields).
439 * </p>
440 *
441 * <p>
442 * To work-around some tar implementations that insert a leading NUL this method returns 0 if it detects a leading NUL since Commons Compress 1.4.
443 * </p>
444 *
445 * @param buffer The buffer from which to parse.
446 * @param offset The offset into the buffer from which to parse.
447 * @param length The maximum number of bytes to parse - must be at least 2 bytes.
448 * @return The long value of the octal string.
449 * @throws IllegalArgumentException if the trailing space/NUL is missing or if an invalid byte is detected.
450 */
451 public static long parseOctal(final byte[] buffer, final int offset, final int length) {
452 long result = 0;
453 int end = offset + length;
454 int start = offset;
455 if (length < 2) {
456 throw new IllegalArgumentException("Length " + length + " must be at least 2");
457 }
458 if (buffer[start] == 0) {
459 return 0L;
460 }
461 // Skip leading spaces
462 while (start < end) {
463 if (buffer[start] != ' ') {
464 break;
465 }
466 start++;
467 }
468 // Trim all trailing NULs and spaces.
469 // The ustar and POSIX tar specs require a trailing NUL or
470 // space but some implementations use the extra digit for big
471 // sizes/uids/gids ...
472 byte trailer = buffer[end - 1];
473 while (start < end && (trailer == 0 || trailer == ' ')) {
474 end--;
475 trailer = buffer[end - 1];
476 }
477 for (; start < end; start++) {
478 final byte currentByte = buffer[start];
479 // CheckStyle:MagicNumber OFF
480 if (currentByte < '0' || currentByte > '7') {
481 throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte));
482 }
483 result = (result << 3) + (currentByte - '0'); // convert from ASCII
484 // CheckStyle:MagicNumber ON
485 }
486 return result;
487 }
488
489 /**
490 * Computes the value contained in a byte buffer. If the most significant bit of the first byte in the buffer is set, this bit is ignored and the rest of
491 * the buffer is interpreted as a binary number. Otherwise, the buffer is interpreted as an octal number as per the parseOctal function above.
492 *
493 * @param buffer The buffer from which to parse.
494 * @param offset The offset into the buffer from which to parse.
495 * @param length The maximum number of bytes to parse.
496 * @return The long value of the octal or binary string.
497 * @throws IllegalArgumentException if the trailing space/NUL is missing or an invalid byte is detected in an octal number, or if a binary number would
498 * exceed the size of a signed long 64-bit integer.
499 * @since 1.4
500 */
501 public static long parseOctalOrBinary(final byte[] buffer, final int offset, final int length) {
502 if ((buffer[offset] & 0x80) == 0) {
503 return parseOctal(buffer, offset, length);
504 }
505 final boolean negative = buffer[offset] == (byte) 0xff;
506 if (length < 9) {
507 return parseBinaryLong(buffer, offset, length, negative);
508 }
509 return parseBinaryBigInteger(buffer, offset, length, negative);
510 }
511
512 /**
513 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
514 *
515 * <p>
516 * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
517 * </p>
518 * <p>
519 * Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You should use
520 * {@link #parseFromPAX01SparseHeaders} directly instead.
521 * </p>
522 *
523 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
524 * @return sparse headers parsed from sparse map
525 * @deprecated use #parseFromPAX01SparseHeaders instead
526 */
527 @Deprecated
528 protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(final String sparseMap) {
529 try {
530 return parseFromPAX01SparseHeaders(sparseMap);
531 } catch (final IOException ex) {
532 throw new UncheckedIOException(ex.getMessage(), ex);
533 }
534 }
535
536 /**
537 * For PAX Format 1.X: The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers
538 * delimited by newlines. The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are
539 * map entries, each one consisting of two numbers giving the offset and size of the data block it describes.
540 *
541 * @param inputStream parsing source.
542 * @param recordSize The size the TAR header
543 * @return sparse headers
544 * @throws IOException if an I/O error occurs.
545 */
546 protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException {
547 // for 1.X PAX Headers
548 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
549 long bytesRead = 0;
550 long[] readResult = readLineOfNumberForPax1x(inputStream);
551 long sparseHeadersCount = readResult[0];
552 if (sparseHeadersCount < 0) {
553 // overflow while reading number?
554 throw new IOException("Corrupted TAR archive. Negative value in sparse headers block");
555 }
556 bytesRead += readResult[1];
557 while (sparseHeadersCount-- > 0) {
558 readResult = readLineOfNumberForPax1x(inputStream);
559 final long sparseOffset = readResult[0];
560 if (sparseOffset < 0) {
561 throw new IOException("Corrupted TAR archive. Sparse header block offset contains negative value");
562 }
563 bytesRead += readResult[1];
564
565 readResult = readLineOfNumberForPax1x(inputStream);
566 final long sparseNumbytes = readResult[0];
567 if (sparseNumbytes < 0) {
568 throw new IOException("Corrupted TAR archive. Sparse header block numbytes contains negative value");
569 }
570 bytesRead += readResult[1];
571 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
572 }
573 // skip the rest of this record data
574 final long bytesToSkip = recordSize - bytesRead % recordSize;
575 IOUtils.skip(inputStream, bytesToSkip);
576 return sparseHeaders;
577 }
578
579 /**
580 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
581 *
582 * <pre>
583 * GNU.sparse.size=size
584 * GNU.sparse.numblocks=numblocks
585 * repeat numblocks times
586 * GNU.sparse.offset=offset
587 * GNU.sparse.numbytes=numbytes
588 * end repeat
589 * </pre>
590 * <p>
591 * For PAX Format 0.1, the sparse headers are stored in a single variable: GNU.sparse.map
592 * </p>
593 * <p>
594 * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
595 * </p>
596 *
597 * @param inputStream input stream to read keys and values
598 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map
599 * @param globalPaxHeaders global PAX headers of the tar archive
600 * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
601 * @throws IOException if an I/O error occurs.
602 * @deprecated use the four-arg version instead
603 */
604 @Deprecated
605 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders,
606 final Map<String, String> globalPaxHeaders) throws IOException {
607 return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1);
608 }
609
610 /**
611 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like:
612 *
613 * <pre>
614 * GNU.sparse.size=size
615 * GNU.sparse.numblocks=numblocks
616 * repeat numblocks times
617 * GNU.sparse.offset=offset
618 * GNU.sparse.numbytes=numbytes
619 * end repeat
620 * </pre>
621 * <p>
622 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
623 * </p>
624 * <p>
625 * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
626 * </p>
627 *
628 * @param inputStream input stream to read keys and values
629 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map
630 * @param globalPaxHeaders global PAX headers of the tar archive
631 * @param headerSize total size of the PAX header, will be ignored if negative
632 * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry.
633 * @throws IOException if an I/O error occurs.
634 * @since 1.21
635 */
636 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders,
637 final Map<String, String> globalPaxHeaders, final long headerSize) throws IOException {
638 final Map<String, String> headers = new HashMap<>(globalPaxHeaders);
639 Long offset = null;
640 // Format is "length keyword=value\n";
641 int totalRead = 0;
642 while (true) { // get length
643 int ch;
644 int len = 0;
645 int read = 0;
646 while ((ch = inputStream.read()) != -1) {
647 read++;
648 totalRead++;
649 if (ch == '\n') { // blank line in header
650 break;
651 }
652 if (ch == ' ') { // End of length string
653 // Get keyword
654 final ByteArrayOutputStream coll = new ByteArrayOutputStream();
655 while ((ch = inputStream.read()) != -1) {
656 read++;
657 totalRead++;
658 if (totalRead < 0 || headerSize >= 0 && totalRead >= headerSize) {
659 break;
660 }
661 if (ch == '=') { // end of keyword
662 final String keyword = coll.toString(StandardCharsets.UTF_8);
663 // Get rest of entry
664 final int restLen = len - read;
665 if (restLen <= 1) { // only NL
666 headers.remove(keyword);
667 } else if (headerSize >= 0 && restLen > headerSize - totalRead) {
668 throw new IOException("Paxheader value size " + restLen + " exceeds size of header record");
669 } else {
670 final byte[] rest = IOUtils.readRange(inputStream, restLen);
671 final int got = rest.length;
672 if (got != restLen) {
673 throw new IOException("Failed to read Paxheader. Expected " + restLen + " bytes, read " + got);
674 }
675 totalRead += restLen;
676 // Drop trailing NL
677 if (rest[restLen - 1] != '\n') {
678 throw new IOException("Failed to read Paxheader.Value should end with a newline");
679 }
680 final String value = new String(rest, 0, restLen - 1, StandardCharsets.UTF_8);
681 headers.put(keyword, value);
682
683 // for 0.0 PAX Headers
684 if (keyword.equals(TarGnuSparseKeys.OFFSET)) {
685 if (offset != null) {
686 // previous GNU.sparse.offset header but no numBytes
687 sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
688 }
689 try {
690 offset = Long.valueOf(value);
691 } catch (final NumberFormatException ex) {
692 throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains a non-numeric value");
693 }
694 if (offset < 0) {
695 throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains negative value");
696 }
697 }
698
699 // for 0.0 PAX Headers
700 if (keyword.equals(TarGnuSparseKeys.NUMBYTES)) {
701 if (offset == null) {
702 throw new IOException(
703 "Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " is expected before GNU.sparse.numbytes shows up.");
704 }
705 final long numbytes = ParsingUtils.parseLongValue(value);
706 if (numbytes < 0) {
707 throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.NUMBYTES + " contains negative value");
708 }
709 sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes));
710 offset = null;
711 }
712 }
713 break;
714 }
715 coll.write((byte) ch);
716 }
717 break; // Processed single header
718 }
719 // COMPRESS-530 : throw if we encounter a non-number while reading length
720 if (ch < '0' || ch > '9') {
721 throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length");
722 }
723 len *= 10;
724 len += ch - '0';
725 }
726 if (ch == -1) { // EOF
727 break;
728 }
729 }
730 if (offset != null) {
731 // offset but no numBytes
732 sparseHeaders.add(new TarArchiveStructSparse(offset, 0));
733 }
734 return headers;
735 }
736
737 /**
738 * Parses the content of a PAX 1.0 sparse block.
739 *
740 * @param buffer The buffer from which to parse.
741 * @param offset The offset into the buffer from which to parse.
742 * @return a parsed sparse struct
743 * @since 1.20
744 */
745 public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) {
746 final long sparseOffset = parseOctalOrBinary(buffer, offset, TarConstants.SPARSE_OFFSET_LEN);
747 final long sparseNumbytes = parseOctalOrBinary(buffer, offset + TarConstants.SPARSE_OFFSET_LEN, TarConstants.SPARSE_NUMBYTES_LEN);
748 return new TarArchiveStructSparse(sparseOffset, sparseNumbytes);
749 }
750
751 /**
752 * For 1.x PAX Format, the sparse headers are stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers
753 * delimited by newlines.
754 *
755 * @param inputStream the input stream of the tar file
756 * @return the decimal number delimited by '\n', and the bytes read from input stream
757 * @throws IOException if an I/O error occurs.
758 */
759 private static long[] readLineOfNumberForPax1x(final InputStream inputStream) throws IOException {
760 int number;
761 long result = 0;
762 long bytesRead = 0;
763 while ((number = inputStream.read()) != '\n') {
764 bytesRead += 1;
765 if (number == -1) {
766 throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format");
767 }
768 if (number < '0' || number > '9') {
769 throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block");
770 }
771 result = result * 10 + (number - '0');
772 }
773 bytesRead += 1;
774 return new long[] { result, bytesRead };
775 }
776
777 /**
778 * @since 1.21
779 */
780 static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries) throws IOException {
781 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>();
782 for (int i = 0; i < entries; i++) {
783 try {
784 final TarArchiveStructSparse sparseHeader = parseSparse(buffer,
785 offset + i * (TarConstants.SPARSE_OFFSET_LEN + TarConstants.SPARSE_NUMBYTES_LEN));
786 if (sparseHeader.getOffset() < 0) {
787 throw new IOException("Corrupted TAR archive, sparse entry with negative offset");
788 }
789 if (sparseHeader.getNumbytes() < 0) {
790 throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes");
791 }
792 sparseHeaders.add(sparseHeader);
793 } catch (final IllegalArgumentException ex) {
794 // thrown internally by parseOctalOrBinary
795 throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex);
796 }
797 }
798 return Collections.unmodifiableList(sparseHeaders);
799 }
800
801 /**
802 * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>: <blockquote> The checksum is calculated by taking the sum of the
803 * unsigned byte values of the header block with the eight checksum bytes taken to be ASCII spaces (decimal value 32). It is stored as a six digit octal
804 * number with leading zeroes followed by a NUL and then a space. Various implementations do not adhere to this format. For better compatibility, ignore
805 * leading and trailing whitespace, and get the first six digits. In addition, some historic tar implementations treated bytes as signed. Implementations
806 * typically calculate the checksum both ways, and treat it as good if either the signed or unsigned sum matches the included checksum. </blockquote>
807 * <p>
808 * The return value of this method should be treated as a best-effort heuristic rather than an absolute and final truth. The checksum verification logic may
809 * well evolve over time as more special cases are encountered.
810 * </p>
811 *
812 * @param header tar header
813 * @return whether the checksum is reasonably good
814 * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
815 * @since 1.5
816 */
817 public static boolean verifyCheckSum(final byte[] header) {
818 final long storedSum = parseOctal(header, TarConstants.CHKSUM_OFFSET, TarConstants.CHKSUMLEN);
819 long unsignedSum = 0;
820 long signedSum = 0;
821 for (int i = 0; i < header.length; i++) {
822 byte b = header[i];
823 if (TarConstants.CHKSUM_OFFSET <= i && i < TarConstants.CHKSUM_OFFSET + TarConstants.CHKSUMLEN) {
824 b = ' ';
825 }
826 unsignedSum += 0xff & b;
827 signedSum += b;
828 }
829 return storedSum == unsignedSum || storedSum == signedSum;
830 }
831
832 /** Prevents instantiation. */
833 private TarUtils() {
834 }
835
836 }