001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.tar; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.io.UncheckedIOException; 024import java.math.BigInteger; 025import java.nio.ByteBuffer; 026import java.nio.charset.Charset; 027import java.nio.charset.StandardCharsets; 028import java.util.ArrayList; 029import java.util.Arrays; 030import java.util.Collections; 031import java.util.HashMap; 032import java.util.List; 033import java.util.Map; 034 035import org.apache.commons.compress.archivers.zip.ZipEncoding; 036import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 037import org.apache.commons.compress.utils.IOUtils; 038import org.apache.commons.compress.utils.ParsingUtils; 039import org.apache.commons.io.output.ByteArrayOutputStream; 040 041/** 042 * This class provides static utility methods to work with byte streams. 043 * 044 * @Immutable 045 */ 046// CheckStyle:HideUtilityClassConstructorCheck OFF (bc) 047public class TarUtils { 048 049 private static final BigInteger NEG_1_BIG_INT = BigInteger.valueOf(-1); 050 051 private static final int BYTE_MASK = 255; 052 053 static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(Charset.defaultCharset()); 054 055 /** 056 * Encapsulates the algorithms used up to Commons Compress 1.3 as ZipEncoding. 057 */ 058 static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() { 059 060 @Override 061 public boolean canEncode(final String name) { 062 return true; 063 } 064 065 @Override 066 public String decode(final byte[] buffer) { 067 final int length = buffer.length; 068 final StringBuilder result = new StringBuilder(length); 069 for (final byte b : buffer) { 070 if (b == 0) { // Trailing null 071 break; 072 } 073 result.append((char) (b & 0xFF)); // Allow for sign-extension 074 } 075 return result.toString(); 076 } 077 078 @Override 079 public ByteBuffer encode(final String name) { 080 return ByteBuffer.wrap(name.getBytes(StandardCharsets.US_ASCII)); 081 } 082 }; 083 084 /** 085 * Computes the checksum of a tar entry header. 086 * 087 * @param buf The tar entry's header buffer. 088 * @return The computed checksum. 089 */ 090 public static long computeCheckSum(final byte[] buf) { 091 long sum = 0; 092 for (final byte element : buf) { 093 sum += BYTE_MASK & element; 094 } 095 return sum; 096 } 097 098 /* 099 * Generates an exception message. 100 */ 101 private static String exceptionMessage(final byte[] buffer, final int offset, final int length, final int current, final byte currentByte) { 102 // default charset is good enough for an exception message, 103 // 104 // the alternative was to modify parseOctal and 105 // parseOctalOrBinary to receive the ZipEncoding of the 106 // archive (deprecating the existing public methods, of 107 // course) and dealing with the fact that ZipEncoding#decode 108 // can throw an IOException which parseOctal* doesn't declare 109 String string = new String(buffer, offset, length, Charset.defaultCharset()); 110 string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed 111 return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length; 112 } 113 114 private static void formatBigIntegerBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) { 115 final BigInteger val = BigInteger.valueOf(value); 116 final byte[] b = val.toByteArray(); 117 final int len = b.length; 118 if (len > length - 1) { 119 throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field."); 120 } 121 final int off = offset + length - len; 122 System.arraycopy(b, 0, buf, off, len); 123 Arrays.fill(buf, offset + 1, off, (byte) (negative ? 0xff : 0)); 124 } 125 126 /** 127 * Writes an octal value into a buffer. 128 * 129 * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by NUL and then 130 * space. 131 * 132 * @param value The value to convert 133 * @param buf The destination buffer 134 * @param offset The starting offset into the buffer. 135 * @param length The size of the buffer. 136 * @return The updated value of offset, i.e. offset+length 137 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 138 */ 139 public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 140 int idx = length - 2; // for NUL and space 141 formatUnsignedOctalString(value, buf, offset, idx); 142 buf[offset + idx++] = 0; // Trailing null 143 buf[offset + idx] = (byte) ' '; // Trailing space 144 return offset + length; 145 } 146 147 private static void formatLongBinary(final long value, final byte[] buf, final int offset, final int length, final boolean negative) { 148 final int bits = (length - 1) * 8; 149 final long max = 1L << bits; 150 long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE 151 if (val < 0 || val >= max) { 152 throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field."); 153 } 154 if (negative) { 155 val ^= max - 1; 156 val++; 157 val |= 0xffL << bits; 158 } 159 for (int i = offset + length - 1; i >= offset; i--) { 160 buf[i] = (byte) val; 161 val >>= 8; 162 } 163 } 164 165 /** 166 * Writes an octal long integer into a buffer. 167 * 168 * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space. 169 * 170 * @param value The value to write as octal 171 * @param buf The destinationbuffer. 172 * @param offset The starting offset into the buffer. 173 * @param length The length of the buffer 174 * @return The updated offset 175 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 176 */ 177 public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 178 final int idx = length - 1; // For space 179 formatUnsignedOctalString(value, buf, offset, idx); 180 buf[offset + idx] = (byte) ' '; // Trailing space 181 return offset + length; 182 } 183 184 /** 185 * Writes a long integer into a buffer as an octal string if this will fit, or as a binary number otherwise. 186 * 187 * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by a space. 188 * 189 * @param value The value to write into the buffer. 190 * @param buf The destination buffer. 191 * @param offset The starting offset into the buffer. 192 * @param length The length of the buffer. 193 * @return The updated offset. 194 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer. 195 * @since 1.4 196 */ 197 public static int formatLongOctalOrBinaryBytes(final long value, final byte[] buf, final int offset, final int length) { 198 // Check whether we are dealing with UID/GID or SIZE field 199 final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE; 200 final boolean negative = value < 0; 201 if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars 202 return formatLongOctalBytes(value, buf, offset, length); 203 } 204 if (length < 9) { 205 formatLongBinary(value, buf, offset, length, negative); 206 } else { 207 formatBigIntegerBinary(value, buf, offset, length, negative); 208 } 209 buf[offset] = (byte) (negative ? 0xff : 0x80); 210 return offset + length; 211 } 212 213 /** 214 * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the 215 * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated. 216 * 217 * @param name The header name from which to copy the characters. 218 * @param buf The buffer where the name is to be stored. 219 * @param offset The starting offset into the buffer 220 * @param length The maximum number of header bytes to copy. 221 * @return The updated offset, i.e. offset + length 222 */ 223 public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) { 224 try { 225 return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); 226 } catch (final IOException ex) { // NOSONAR 227 try { 228 return formatNameBytes(name, buf, offset, length, FALLBACK_ENCODING); 229 } catch (final IOException ex2) { 230 // impossible 231 throw new UncheckedIOException(ex2); // NOSONAR 232 } 233 } 234 } 235 236 /** 237 * Copies a name into a buffer. Copies characters from the name into the buffer starting at the specified offset. If the buffer is longer than the name, the 238 * buffer is filled with trailing NULs. If the name is longer than the buffer, the output is truncated. 239 * 240 * @param name The header name from which to copy the characters. 241 * @param buf The buffer where the name is to be stored. 242 * @param offset The starting offset into the buffer 243 * @param length The maximum number of header bytes to copy. 244 * @param encoding name of the encoding to use for file names 245 * @return The updated offset, i.e. offset + length 246 * @throws IOException on error 247 * @since 1.4 248 */ 249 public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length, final ZipEncoding encoding) throws IOException { 250 int len = name.length(); 251 ByteBuffer b = encoding.encode(name); 252 while (b.limit() > length && len > 0) { 253 b = encoding.encode(name.substring(0, --len)); 254 } 255 final int limit = b.limit() - b.position(); 256 System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); 257 // Pad any remaining output bytes with NUL 258 Arrays.fill(buf, offset + limit, offset + length, (byte) 0); 259 return offset + length; 260 } 261 262 /** 263 * Writes an octal integer into a buffer. 264 * 265 * Uses {@link #formatUnsignedOctalString} to format the value as an octal string with leading zeros. The converted number is followed by space and NUL 266 * 267 * @param value The value to write 268 * @param buf The buffer to receive the output 269 * @param offset The starting offset into the buffer 270 * @param length The size of the output buffer 271 * @return The updated offset, i.e. offset+length 272 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 273 */ 274 public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 275 int idx = length - 2; // For space and trailing null 276 formatUnsignedOctalString(value, buf, offset, idx); 277 buf[offset + idx++] = (byte) ' '; // Trailing space 278 buf[offset + idx] = 0; // Trailing null 279 return offset + length; 280 } 281 282 /** 283 * Fills a buffer with unsigned octal number, padded with leading zeroes. 284 * 285 * @param value number to convert to octal - treated as unsigned 286 * @param buffer destination buffer 287 * @param offset starting offset in buffer 288 * @param length length of buffer to fill 289 * @throws IllegalArgumentException if the value will not fit in the buffer 290 */ 291 public static void formatUnsignedOctalString(final long value, final byte[] buffer, final int offset, final int length) { 292 int remaining = length; 293 remaining--; 294 if (value == 0) { 295 buffer[offset + remaining--] = (byte) '0'; 296 } else { 297 long val = value; 298 for (; remaining >= 0 && val != 0; --remaining) { 299 // CheckStyle:MagicNumber OFF 300 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7)); 301 val = val >>> 3; 302 // CheckStyle:MagicNumber ON 303 } 304 if (val != 0) { 305 throw new IllegalArgumentException(value + "=" + Long.toOctalString(value) + " will not fit in octal number buffer of length " + length); 306 } 307 } 308 for (; remaining >= 0; --remaining) { // leading zeros 309 buffer[offset + remaining] = (byte) '0'; 310 } 311 Arrays.fill(buffer, offset, offset + remaining + 1, (byte) '0'); 312 } 313 314 private static long parseBinaryBigInteger(final byte[] buffer, final int offset, final int length, final boolean negative) { 315 final byte[] remainder = new byte[length - 1]; 316 System.arraycopy(buffer, offset + 1, remainder, 0, length - 1); 317 BigInteger val = new BigInteger(remainder); 318 if (negative) { 319 // 2's complement 320 val = val.add(NEG_1_BIG_INT).not(); 321 } 322 if (val.bitLength() > 63) { 323 throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number exceeds maximum signed long value"); 324 } 325 return negative ? -val.longValue() : val.longValue(); 326 } 327 328 private static long parseBinaryLong(final byte[] buffer, final int offset, final int length, final boolean negative) { 329 if (length >= 9) { 330 throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number exceeds maximum signed long value"); 331 } 332 long val = 0; 333 for (int i = 1; i < length; i++) { 334 val = (val << 8) + (buffer[offset + i] & 0xff); 335 } 336 if (negative) { 337 // 2's complement 338 val--; 339 val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1; 340 } 341 return negative ? -val : val; 342 } 343 344 /** 345 * Parses a boolean byte from a buffer. Leading spaces and NUL are ignored. The buffer may contain trailing spaces or NULs. 346 * 347 * @param buffer The buffer from which to parse. 348 * @param offset The offset into the buffer from which to parse. 349 * @return The boolean value of the bytes. 350 * @throws IllegalArgumentException if an invalid byte is detected. 351 */ 352 public static boolean parseBoolean(final byte[] buffer, final int offset) { 353 return buffer[offset] == 1; 354 } 355 356 /** 357 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map GNU.sparse.map Map of non-null data chunks. It is a string 358 * consisting of comma-separated values "offset,size[,offset-1,size-1...]" 359 * 360 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 361 * @return unmodifiable list of sparse headers parsed from sparse map 362 * @throws IOException Corrupted TAR archive. 363 * @since 1.21 364 */ 365 protected static List<TarArchiveStructSparse> parseFromPAX01SparseHeaders(final String sparseMap) throws IOException { 366 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 367 final String[] sparseHeaderStrings = sparseMap.split(","); 368 if (sparseHeaderStrings.length % 2 == 1) { 369 throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header"); 370 } 371 for (int i = 0; i < sparseHeaderStrings.length; i += 2) { 372 final long sparseOffset = ParsingUtils.parseLongValue(sparseHeaderStrings[i]); 373 if (sparseOffset < 0) { 374 throw new IOException("Corrupted TAR archive. Sparse struct offset contains negative value"); 375 } 376 final long sparseNumbytes = ParsingUtils.parseLongValue(sparseHeaderStrings[i + 1]); 377 if (sparseNumbytes < 0) { 378 throw new IOException("Corrupted TAR archive. Sparse struct numbytes contains negative value"); 379 } 380 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); 381 } 382 return Collections.unmodifiableList(sparseHeaders); 383 } 384 385 /** 386 * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached. 387 * 388 * @param buffer The buffer from which to parse. 389 * @param offset The offset into the buffer from which to parse. 390 * @param length The maximum number of bytes to parse. 391 * @return The entry name. 392 */ 393 public static String parseName(final byte[] buffer, final int offset, final int length) { 394 try { 395 return parseName(buffer, offset, length, DEFAULT_ENCODING); 396 } catch (final IOException ex) { // NOSONAR 397 try { 398 return parseName(buffer, offset, length, FALLBACK_ENCODING); 399 } catch (final IOException ex2) { 400 // impossible 401 throw new UncheckedIOException(ex2); // NOSONAR 402 } 403 } 404 } 405 406 /** 407 * Parses an entry name from a buffer. Parsing stops when a NUL is found or the buffer length is reached. 408 * 409 * @param buffer The buffer from which to parse. 410 * @param offset The offset into the buffer from which to parse. 411 * @param length The maximum number of bytes to parse. 412 * @param encoding name of the encoding to use for file names 413 * @return The entry name. 414 * @throws IOException on error 415 * @since 1.4 416 */ 417 public static String parseName(final byte[] buffer, final int offset, final int length, final ZipEncoding encoding) throws IOException { 418 int len = 0; 419 for (int i = offset; len < length && buffer[i] != 0; i++) { 420 len++; 421 } 422 if (len > 0) { 423 final byte[] b = new byte[len]; 424 System.arraycopy(buffer, offset, b, 0, len); 425 return encoding.decode(b); 426 } 427 return ""; 428 } 429 430 /** 431 * Parses an octal string from a buffer. 432 * 433 * <p> 434 * Leading spaces are ignored. The buffer must contain a trailing space or NUL, and may contain an additional trailing space or NUL. 435 * </p> 436 * 437 * <p> 438 * The input buffer is allowed to contain all NULs, in which case the method returns 0L (this allows for missing fields). 439 * </p> 440 * 441 * <p> 442 * To work-around some tar implementations that insert a leading NUL this method returns 0 if it detects a leading NUL since Commons Compress 1.4. 443 * </p> 444 * 445 * @param buffer The buffer from which to parse. 446 * @param offset The offset into the buffer from which to parse. 447 * @param length The maximum number of bytes to parse - must be at least 2 bytes. 448 * @return The long value of the octal string. 449 * @throws IllegalArgumentException if the trailing space/NUL is missing or if an invalid byte is detected. 450 */ 451 public static long parseOctal(final byte[] buffer, final int offset, final int length) { 452 long result = 0; 453 int end = offset + length; 454 int start = offset; 455 if (length < 2) { 456 throw new IllegalArgumentException("Length " + length + " must be at least 2"); 457 } 458 if (buffer[start] == 0) { 459 return 0L; 460 } 461 // Skip leading spaces 462 while (start < end) { 463 if (buffer[start] != ' ') { 464 break; 465 } 466 start++; 467 } 468 // Trim all trailing NULs and spaces. 469 // The ustar and POSIX tar specs require a trailing NUL or 470 // space but some implementations use the extra digit for big 471 // sizes/uids/gids ... 472 byte trailer = buffer[end - 1]; 473 while (start < end && (trailer == 0 || trailer == ' ')) { 474 end--; 475 trailer = buffer[end - 1]; 476 } 477 for (; start < end; start++) { 478 final byte currentByte = buffer[start]; 479 // CheckStyle:MagicNumber OFF 480 if (currentByte < '0' || currentByte > '7') { 481 throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte)); 482 } 483 result = (result << 3) + (currentByte - '0'); // convert from ASCII 484 // CheckStyle:MagicNumber ON 485 } 486 return result; 487 } 488 489 /** 490 * Computes the value contained in a byte buffer. If the most significant bit of the first byte in the buffer is set, this bit is ignored and the rest of 491 * the buffer is interpreted as a binary number. Otherwise, the buffer is interpreted as an octal number as per the parseOctal function above. 492 * 493 * @param buffer The buffer from which to parse. 494 * @param offset The offset into the buffer from which to parse. 495 * @param length The maximum number of bytes to parse. 496 * @return The long value of the octal or binary string. 497 * @throws IllegalArgumentException if the trailing space/NUL is missing or an invalid byte is detected in an octal number, or if a binary number would 498 * exceed the size of a signed long 64-bit integer. 499 * @since 1.4 500 */ 501 public static long parseOctalOrBinary(final byte[] buffer, final int offset, final int length) { 502 if ((buffer[offset] & 0x80) == 0) { 503 return parseOctal(buffer, offset, length); 504 } 505 final boolean negative = buffer[offset] == (byte) 0xff; 506 if (length < 9) { 507 return parseBinaryLong(buffer, offset, length, negative); 508 } 509 return parseBinaryBigInteger(buffer, offset, length, negative); 510 } 511 512 /** 513 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 514 * 515 * <p> 516 * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 517 * </p> 518 * <p> 519 * Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You should use 520 * {@link #parseFromPAX01SparseHeaders} directly instead. 521 * </p> 522 * 523 * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 524 * @return sparse headers parsed from sparse map 525 * @deprecated use #parseFromPAX01SparseHeaders instead 526 */ 527 @Deprecated 528 protected static List<TarArchiveStructSparse> parsePAX01SparseHeaders(final String sparseMap) { 529 try { 530 return parseFromPAX01SparseHeaders(sparseMap); 531 } catch (final IOException ex) { 532 throw new UncheckedIOException(ex.getMessage(), ex); 533 } 534 } 535 536 /** 537 * For PAX Format 1.X: The sparse map itself is stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers 538 * delimited by newlines. The map is padded with nulls to the nearest block boundary. The first number gives the number of entries in the map. Following are 539 * map entries, each one consisting of two numbers giving the offset and size of the data block it describes. 540 * 541 * @param inputStream parsing source. 542 * @param recordSize The size the TAR header 543 * @return sparse headers 544 * @throws IOException if an I/O error occurs. 545 */ 546 protected static List<TarArchiveStructSparse> parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException { 547 // for 1.X PAX Headers 548 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 549 long bytesRead = 0; 550 long[] readResult = readLineOfNumberForPax1x(inputStream); 551 long sparseHeadersCount = readResult[0]; 552 if (sparseHeadersCount < 0) { 553 // overflow while reading number? 554 throw new IOException("Corrupted TAR archive. Negative value in sparse headers block"); 555 } 556 bytesRead += readResult[1]; 557 while (sparseHeadersCount-- > 0) { 558 readResult = readLineOfNumberForPax1x(inputStream); 559 final long sparseOffset = readResult[0]; 560 if (sparseOffset < 0) { 561 throw new IOException("Corrupted TAR archive. Sparse header block offset contains negative value"); 562 } 563 bytesRead += readResult[1]; 564 565 readResult = readLineOfNumberForPax1x(inputStream); 566 final long sparseNumbytes = readResult[0]; 567 if (sparseNumbytes < 0) { 568 throw new IOException("Corrupted TAR archive. Sparse header block numbytes contains negative value"); 569 } 570 bytesRead += readResult[1]; 571 sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); 572 } 573 // skip the rest of this record data 574 final long bytesToSkip = recordSize - bytesRead % recordSize; 575 IOUtils.skip(inputStream, bytesToSkip); 576 return sparseHeaders; 577 } 578 579 /** 580 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like: 581 * 582 * <pre> 583 * GNU.sparse.size=size 584 * GNU.sparse.numblocks=numblocks 585 * repeat numblocks times 586 * GNU.sparse.offset=offset 587 * GNU.sparse.numbytes=numbytes 588 * end repeat 589 * </pre> 590 * <p> 591 * For PAX Format 0.1, the sparse headers are stored in a single variable: GNU.sparse.map 592 * </p> 593 * <p> 594 * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 595 * </p> 596 * 597 * @param inputStream input stream to read keys and values 598 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map 599 * @param globalPaxHeaders global PAX headers of the tar archive 600 * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry. 601 * @throws IOException if an I/O error occurs. 602 * @deprecated use the four-arg version instead 603 */ 604 @Deprecated 605 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders, 606 final Map<String, String> globalPaxHeaders) throws IOException { 607 return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1); 608 } 609 610 /** 611 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) may appear multi times, and they look like: 612 * 613 * <pre> 614 * GNU.sparse.size=size 615 * GNU.sparse.numblocks=numblocks 616 * repeat numblocks times 617 * GNU.sparse.offset=offset 618 * GNU.sparse.numbytes=numbytes 619 * end repeat 620 * </pre> 621 * <p> 622 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 623 * </p> 624 * <p> 625 * <em>GNU.sparse.map</em>: Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 626 * </p> 627 * 628 * @param inputStream input stream to read keys and values 629 * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, the sparse headers need to be stored in an array, not a map 630 * @param globalPaxHeaders global PAX headers of the tar archive 631 * @param headerSize total size of the PAX header, will be ignored if negative 632 * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry. 633 * @throws IOException if an I/O error occurs. 634 * @since 1.21 635 */ 636 protected static Map<String, String> parsePaxHeaders(final InputStream inputStream, final List<TarArchiveStructSparse> sparseHeaders, 637 final Map<String, String> globalPaxHeaders, final long headerSize) throws IOException { 638 final Map<String, String> headers = new HashMap<>(globalPaxHeaders); 639 Long offset = null; 640 // Format is "length keyword=value\n"; 641 int totalRead = 0; 642 while (true) { // get length 643 int ch; 644 int len = 0; 645 int read = 0; 646 while ((ch = inputStream.read()) != -1) { 647 read++; 648 totalRead++; 649 if (ch == '\n') { // blank line in header 650 break; 651 } 652 if (ch == ' ') { // End of length string 653 // Get keyword 654 final ByteArrayOutputStream coll = new ByteArrayOutputStream(); 655 while ((ch = inputStream.read()) != -1) { 656 read++; 657 totalRead++; 658 if (totalRead < 0 || headerSize >= 0 && totalRead >= headerSize) { 659 break; 660 } 661 if (ch == '=') { // end of keyword 662 final String keyword = coll.toString(StandardCharsets.UTF_8); 663 // Get rest of entry 664 final int restLen = len - read; 665 if (restLen <= 1) { // only NL 666 headers.remove(keyword); 667 } else if (headerSize >= 0 && restLen > headerSize - totalRead) { 668 throw new IOException("Paxheader value size " + restLen + " exceeds size of header record"); 669 } else { 670 final byte[] rest = IOUtils.readRange(inputStream, restLen); 671 final int got = rest.length; 672 if (got != restLen) { 673 throw new IOException("Failed to read Paxheader. Expected " + restLen + " bytes, read " + got); 674 } 675 totalRead += restLen; 676 // Drop trailing NL 677 if (rest[restLen - 1] != '\n') { 678 throw new IOException("Failed to read Paxheader.Value should end with a newline"); 679 } 680 final String value = new String(rest, 0, restLen - 1, StandardCharsets.UTF_8); 681 headers.put(keyword, value); 682 683 // for 0.0 PAX Headers 684 if (keyword.equals(TarGnuSparseKeys.OFFSET)) { 685 if (offset != null) { 686 // previous GNU.sparse.offset header but no numBytes 687 sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); 688 } 689 try { 690 offset = Long.valueOf(value); 691 } catch (final NumberFormatException ex) { 692 throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains a non-numeric value"); 693 } 694 if (offset < 0) { 695 throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains negative value"); 696 } 697 } 698 699 // for 0.0 PAX Headers 700 if (keyword.equals(TarGnuSparseKeys.NUMBYTES)) { 701 if (offset == null) { 702 throw new IOException( 703 "Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " is expected before GNU.sparse.numbytes shows up."); 704 } 705 final long numbytes = ParsingUtils.parseLongValue(value); 706 if (numbytes < 0) { 707 throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.NUMBYTES + " contains negative value"); 708 } 709 sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes)); 710 offset = null; 711 } 712 } 713 break; 714 } 715 coll.write((byte) ch); 716 } 717 break; // Processed single header 718 } 719 // COMPRESS-530 : throw if we encounter a non-number while reading length 720 if (ch < '0' || ch > '9') { 721 throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length"); 722 } 723 len *= 10; 724 len += ch - '0'; 725 } 726 if (ch == -1) { // EOF 727 break; 728 } 729 } 730 if (offset != null) { 731 // offset but no numBytes 732 sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); 733 } 734 return headers; 735 } 736 737 /** 738 * Parses the content of a PAX 1.0 sparse block. 739 * 740 * @param buffer The buffer from which to parse. 741 * @param offset The offset into the buffer from which to parse. 742 * @return a parsed sparse struct 743 * @since 1.20 744 */ 745 public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) { 746 final long sparseOffset = parseOctalOrBinary(buffer, offset, TarConstants.SPARSE_OFFSET_LEN); 747 final long sparseNumbytes = parseOctalOrBinary(buffer, offset + TarConstants.SPARSE_OFFSET_LEN, TarConstants.SPARSE_NUMBYTES_LEN); 748 return new TarArchiveStructSparse(sparseOffset, sparseNumbytes); 749 } 750 751 /** 752 * For 1.x PAX Format, the sparse headers are stored in the file data block, preceding the actual file data. It consists of a series of decimal numbers 753 * delimited by newlines. 754 * 755 * @param inputStream the input stream of the tar file 756 * @return the decimal number delimited by '\n', and the bytes read from input stream 757 * @throws IOException if an I/O error occurs. 758 */ 759 private static long[] readLineOfNumberForPax1x(final InputStream inputStream) throws IOException { 760 int number; 761 long result = 0; 762 long bytesRead = 0; 763 while ((number = inputStream.read()) != '\n') { 764 bytesRead += 1; 765 if (number == -1) { 766 throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format"); 767 } 768 if (number < '0' || number > '9') { 769 throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block"); 770 } 771 result = result * 10 + (number - '0'); 772 } 773 bytesRead += 1; 774 return new long[] { result, bytesRead }; 775 } 776 777 /** 778 * @since 1.21 779 */ 780 static List<TarArchiveStructSparse> readSparseStructs(final byte[] buffer, final int offset, final int entries) throws IOException { 781 final List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 782 for (int i = 0; i < entries; i++) { 783 try { 784 final TarArchiveStructSparse sparseHeader = parseSparse(buffer, 785 offset + i * (TarConstants.SPARSE_OFFSET_LEN + TarConstants.SPARSE_NUMBYTES_LEN)); 786 if (sparseHeader.getOffset() < 0) { 787 throw new IOException("Corrupted TAR archive, sparse entry with negative offset"); 788 } 789 if (sparseHeader.getNumbytes() < 0) { 790 throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes"); 791 } 792 sparseHeaders.add(sparseHeader); 793 } catch (final IllegalArgumentException ex) { 794 // thrown internally by parseOctalOrBinary 795 throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex); 796 } 797 } 798 return Collections.unmodifiableList(sparseHeaders); 799 } 800 801 /** 802 * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(computing)#File_header">says</a>: <blockquote> The checksum is calculated by taking the sum of the 803 * unsigned byte values of the header block with the eight checksum bytes taken to be ASCII spaces (decimal value 32). It is stored as a six digit octal 804 * number with leading zeroes followed by a NUL and then a space. Various implementations do not adhere to this format. For better compatibility, ignore 805 * leading and trailing whitespace, and get the first six digits. In addition, some historic tar implementations treated bytes as signed. Implementations 806 * typically calculate the checksum both ways, and treat it as good if either the signed or unsigned sum matches the included checksum. </blockquote> 807 * <p> 808 * The return value of this method should be treated as a best-effort heuristic rather than an absolute and final truth. The checksum verification logic may 809 * well evolve over time as more special cases are encountered. 810 * </p> 811 * 812 * @param header tar header 813 * @return whether the checksum is reasonably good 814 * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a> 815 * @since 1.5 816 */ 817 public static boolean verifyCheckSum(final byte[] header) { 818 final long storedSum = parseOctal(header, TarConstants.CHKSUM_OFFSET, TarConstants.CHKSUMLEN); 819 long unsignedSum = 0; 820 long signedSum = 0; 821 for (int i = 0; i < header.length; i++) { 822 byte b = header[i]; 823 if (TarConstants.CHKSUM_OFFSET <= i && i < TarConstants.CHKSUM_OFFSET + TarConstants.CHKSUMLEN) { 824 b = ' '; 825 } 826 unsignedSum += 0xff & b; 827 signedSum += b; 828 } 829 return storedSum == unsignedSum || storedSum == signedSum; 830 } 831 832 /** Prevents instantiation. */ 833 private TarUtils() { 834 } 835 836}