001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.archivers.sevenz; 018 019import static java.nio.charset.StandardCharsets.UTF_16LE; 020 021import java.io.BufferedInputStream; 022import java.io.ByteArrayInputStream; 023import java.io.Closeable; 024import java.io.DataInputStream; 025import java.io.EOFException; 026import java.io.File; 027import java.io.FilterInputStream; 028import java.io.IOException; 029import java.io.InputStream; 030import java.nio.ByteBuffer; 031import java.nio.ByteOrder; 032import java.nio.channels.Channels; 033import java.nio.channels.SeekableByteChannel; 034import java.nio.file.Files; 035import java.nio.file.OpenOption; 036import java.nio.file.Path; 037import java.nio.file.StandardOpenOption; 038import java.util.ArrayList; 039import java.util.Arrays; 040import java.util.BitSet; 041import java.util.EnumSet; 042import java.util.LinkedHashMap; 043import java.util.LinkedList; 044import java.util.List; 045import java.util.Map; 046import java.util.Objects; 047import java.util.zip.CRC32; 048import java.util.zip.CheckedInputStream; 049 050import org.apache.commons.compress.MemoryLimitException; 051import org.apache.commons.compress.utils.BoundedInputStream; 052import org.apache.commons.compress.utils.ByteUtils; 053import org.apache.commons.compress.utils.CRC32VerifyingInputStream; 054import org.apache.commons.compress.utils.IOUtils; 055import org.apache.commons.compress.utils.InputStreamStatistics; 056import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; 057import org.apache.commons.io.build.AbstractOrigin.ByteArrayOrigin; 058import org.apache.commons.io.build.AbstractStreamBuilder; 059 060/** 061 * Reads a 7z file, using SeekableByteChannel under the covers. 062 * <p> 063 * The 7z file format is a flexible container that can contain many compression and encryption types, but at the moment only only Copy, LZMA, LZMA2, BZIP2, 064 * Deflate and AES-256 + SHA-256 are supported. 065 * </p> 066 * <p> 067 * The format is very Windows/Intel specific, so it uses little-endian byte order, doesn't store user/group or permission bits, and represents times using NTFS 068 * timestamps (100 nanosecond units since 1 January 1601). Hence the official tools recommend against using it for backup purposes on *nix, and recommend 069 * .tar.7z or .tar.lzma or .tar.xz instead. 070 * </p> 071 * <p> 072 * Both the header and file contents may be compressed and/or encrypted. With both encrypted, neither file names nor file contents can be read, but the use of 073 * encryption isn't plausibly deniable. 074 * </p> 075 * <p> 076 * Multi volume archives can be read by concatenating the parts in correct order - either manually or by using {link 077 * org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel} for example. 078 * </p> 079 * 080 * @NotThreadSafe 081 * @since 1.6 082 */ 083public class SevenZFile implements Closeable { 084 085 private static final class ArchiveStatistics { 086 private int numberOfPackedStreams; 087 private long numberOfCoders; 088 private long numberOfOutStreams; 089 private long numberOfInStreams; 090 private long numberOfUnpackSubStreams; 091 private int numberOfFolders; 092 private BitSet folderHasCrc; 093 private int numberOfEntries; 094 private int numberOfEntriesWithStream; 095 096 void assertValidity(final int maxMemoryLimitInKb) throws IOException { 097 if (numberOfEntriesWithStream > 0 && numberOfFolders == 0) { 098 throw new IOException("archive with entries but no folders"); 099 } 100 if (numberOfEntriesWithStream > numberOfUnpackSubStreams) { 101 throw new IOException("archive doesn't contain enough substreams for entries"); 102 } 103 104 final long memoryNeededInKb = estimateSize() / 1024; 105 if (maxMemoryLimitInKb < memoryNeededInKb) { 106 throw new MemoryLimitException(memoryNeededInKb, maxMemoryLimitInKb); 107 } 108 } 109 110 private long bindPairSize() { 111 return 16; 112 } 113 114 private long coderSize() { 115 return 2 /* methodId is between 1 and four bytes currently, COPY and LZMA2 are the most common with 1 */ 116 + 16 + 4 /* properties, guess */ 117 ; 118 } 119 120 private long entrySize() { 121 return 100; /* real size depends on name length, everything without name is about 70 bytes */ 122 } 123 124 long estimateSize() { 125 final long lowerBound = 16L * numberOfPackedStreams /* packSizes, packCrcs in Archive */ 126 + numberOfPackedStreams / 8 /* packCrcsDefined in Archive */ 127 + numberOfFolders * folderSize() /* folders in Archive */ 128 + numberOfCoders * coderSize() /* coders in Folder */ 129 + (numberOfOutStreams - numberOfFolders) * bindPairSize() /* bindPairs in Folder */ 130 + 8L * (numberOfInStreams - numberOfOutStreams + numberOfFolders) /* packedStreams in Folder */ 131 + 8L * numberOfOutStreams /* unpackSizes in Folder */ 132 + numberOfEntries * entrySize() /* files in Archive */ 133 + streamMapSize(); 134 return 2 * lowerBound /* conservative guess */; 135 } 136 137 private long folderSize() { 138 return 30; /* nested arrays are accounted for separately */ 139 } 140 141 private long streamMapSize() { 142 return 8 * numberOfFolders /* folderFirstPackStreamIndex, folderFirstFileIndex */ 143 + 8 * numberOfPackedStreams /* packStreamOffsets */ 144 + 4 * numberOfEntries /* fileFolderIndex */ 145 ; 146 } 147 148 @Override 149 public String toString() { 150 return "Archive with " + numberOfEntries + " entries in " + numberOfFolders + " folders. Estimated size " + estimateSize() / 1024L + " kB."; 151 } 152 } 153 154 /** 155 * Builds new instances of {@link SevenZFile}. 156 * 157 * @since 1.26.0 158 */ 159 public static class Builder extends AbstractStreamBuilder<SevenZFile, Builder> { 160 161 static final int MEMORY_LIMIT_IN_KB = Integer.MAX_VALUE; 162 static final boolean USE_DEFAULTNAME_FOR_UNNAMED_ENTRIES = false; 163 static final boolean TRY_TO_RECOVER_BROKEN_ARCHIVES = false; 164 165 private SeekableByteChannel seekableByteChannel; 166 private String defaultName = DEFAULT_FILE_NAME; 167 private byte[] password; 168 private int maxMemoryLimitKb = MEMORY_LIMIT_IN_KB; 169 private boolean useDefaultNameForUnnamedEntries = USE_DEFAULTNAME_FOR_UNNAMED_ENTRIES; 170 private boolean tryToRecoverBrokenArchives = TRY_TO_RECOVER_BROKEN_ARCHIVES; 171 172 @SuppressWarnings("resource") // Caller closes 173 @Override 174 public SevenZFile get() throws IOException { 175 final SeekableByteChannel actualChannel; 176 final String actualDescription; 177 if (seekableByteChannel != null) { 178 actualChannel = seekableByteChannel; 179 actualDescription = defaultName; 180 } else if (checkOrigin() instanceof ByteArrayOrigin) { 181 actualChannel = new SeekableInMemoryByteChannel(checkOrigin().getByteArray()); 182 actualDescription = defaultName; 183 } else { 184 OpenOption[] openOptions = getOpenOptions(); 185 if (openOptions.length == 0) { 186 openOptions = new OpenOption[] { StandardOpenOption.READ }; 187 } 188 final Path path = getPath(); 189 actualChannel = Files.newByteChannel(path, openOptions); 190 actualDescription = path.toAbsolutePath().toString(); 191 } 192 final boolean closeOnError = seekableByteChannel != null; 193 return new SevenZFile(actualChannel, actualDescription, password, closeOnError, maxMemoryLimitKb, useDefaultNameForUnnamedEntries, 194 tryToRecoverBrokenArchives); 195 } 196 197 /** 198 * Sets the default name. 199 * 200 * @param defaultName the default name. 201 * @return this. 202 */ 203 public Builder setDefaultName(final String defaultName) { 204 this.defaultName = defaultName; 205 return this; 206 } 207 208 /** 209 * Sets the maximum amount of memory in kilobytes to use for parsing the archive and during extraction. 210 * <p> 211 * Not all codecs honor this setting. Currently only LZMA and LZMA2 are supported. 212 * </p> 213 * 214 * @param maxMemoryLimitKb the max memory limit in kilobytes. 215 * @return this. 216 */ 217 public Builder setMaxMemoryLimitKb(final int maxMemoryLimitKb) { 218 this.maxMemoryLimitKb = maxMemoryLimitKb; 219 return this; 220 } 221 222 /** 223 * Sets the password. 224 * 225 * @param password the password. 226 * @return this. 227 */ 228 public Builder setPassword(final byte[] password) { 229 this.password = password != null ? password.clone() : null; 230 return this; 231 } 232 233 /** 234 * Sets the password. 235 * 236 * @param password the password. 237 * @return this. 238 */ 239 public Builder setPassword(final char[] password) { 240 this.password = password != null ? AES256SHA256Decoder.utf16Decode(password.clone()) : null; 241 return this; 242 } 243 244 /** 245 * Sets the password. 246 * 247 * @param password the password. 248 * @return this. 249 */ 250 public Builder setPassword(final String password) { 251 this.password = password != null ? AES256SHA256Decoder.utf16Decode(password.toCharArray()) : null; 252 return this; 253 } 254 255 /** 256 * Sets the input channel. 257 * 258 * @param seekableByteChannel the input channel. 259 * @return this. 260 */ 261 public Builder setSeekableByteChannel(final SeekableByteChannel seekableByteChannel) { 262 this.seekableByteChannel = seekableByteChannel; 263 return this; 264 } 265 266 /** 267 * Sets whether {@link SevenZFile} will try to recover broken archives where the CRC of the file's metadata is 0. 268 * <p> 269 * This special kind of broken archive is encountered when mutli volume archives are closed prematurely. If you enable this option SevenZFile will trust 270 * data that looks as if it could contain metadata of an archive and allocate big amounts of memory. It is strongly recommended to not enable this 271 * option without setting {@link #setMaxMemoryLimitKb(int)} at the same time. 272 * </p> 273 * 274 * @param tryToRecoverBrokenArchives whether {@link SevenZFile} will try to recover broken archives where the CRC of the file's metadata is 0. 275 * @return this. 276 */ 277 public Builder setTryToRecoverBrokenArchives(final boolean tryToRecoverBrokenArchives) { 278 this.tryToRecoverBrokenArchives = tryToRecoverBrokenArchives; 279 return this; 280 } 281 282 /** 283 * Sets whether entries without a name should get their names set to the archive's default file name. 284 * 285 * @param useDefaultNameForUnnamedEntries whether entries without a name should get their names set to the archive's default file name. 286 * @return this. 287 */ 288 public Builder setUseDefaultNameForUnnamedEntries(final boolean useDefaultNameForUnnamedEntries) { 289 this.useDefaultNameForUnnamedEntries = useDefaultNameForUnnamedEntries; 290 return this; 291 } 292 293 } 294 295 static final int SIGNATURE_HEADER_SIZE = 32; 296 297 private static final String DEFAULT_FILE_NAME = "unknown archive"; 298 299 /** Shared with SevenZOutputFile and tests, neither mutates it. */ 300 static final byte[] sevenZSignature = { // NOSONAR 301 (byte) '7', (byte) 'z', (byte) 0xBC, (byte) 0xAF, (byte) 0x27, (byte) 0x1C }; 302 303 private static int assertFitsIntoNonNegativeInt(final String what, final long value) throws IOException { 304 if (value > Integer.MAX_VALUE || value < 0) { 305 throw new IOException(String.format("Cannot handle % %,d", what, value)); 306 } 307 return (int) value; 308 } 309 310 /** 311 * Creates a new Builder. 312 * 313 * @return a new Builder. 314 * @since 1.26.0 315 */ 316 public static Builder builder() { 317 return new Builder(); 318 } 319 320 private static ByteBuffer checkEndOfFile(final ByteBuffer buf, final int expectRemaining) throws EOFException { 321 final int remaining = buf.remaining(); 322 if (remaining < expectRemaining) { 323 throw new EOFException(String.format("remaining %,d < expectRemaining %,d", remaining, expectRemaining)); 324 } 325 return buf; 326 } 327 328 private static void get(final ByteBuffer buf, final byte[] to) throws EOFException { 329 checkEndOfFile(buf, to.length).get(to); 330 } 331 332 private static char getChar(final ByteBuffer buf) throws EOFException { 333 return checkEndOfFile(buf, Character.BYTES).getChar(); 334 } 335 336 private static int getInt(final ByteBuffer buf) throws EOFException { 337 return checkEndOfFile(buf, Integer.BYTES).getInt(); 338 } 339 340 private static long getLong(final ByteBuffer buf) throws EOFException { 341 return checkEndOfFile(buf, Long.BYTES).getLong(); 342 } 343 344 private static int getUnsignedByte(final ByteBuffer buf) throws EOFException { 345 if (!buf.hasRemaining()) { 346 throw new EOFException(); 347 } 348 return buf.get() & 0xff; 349 } 350 351 /** 352 * Checks if the signature matches what is expected for a 7z file. 353 * 354 * @param signature the bytes to check 355 * @param length the number of bytes to check 356 * @return true, if this is the signature of a 7z archive. 357 * @since 1.8 358 */ 359 public static boolean matches(final byte[] signature, final int length) { 360 if (length < sevenZSignature.length) { 361 return false; 362 } 363 for (int i = 0; i < sevenZSignature.length; i++) { 364 if (signature[i] != sevenZSignature[i]) { 365 return false; 366 } 367 } 368 return true; 369 } 370 371 private static SeekableByteChannel newByteChannel(final File file) throws IOException { 372 return Files.newByteChannel(file.toPath(), EnumSet.of(StandardOpenOption.READ)); 373 } 374 375 private static long readUint64(final ByteBuffer in) throws IOException { 376 // long rather than int as it might get shifted beyond the range of an int 377 final long firstByte = getUnsignedByte(in); 378 int mask = 0x80; 379 long value = 0; 380 for (int i = 0; i < 8; i++) { 381 if ((firstByte & mask) == 0) { 382 return value | (firstByte & mask - 1) << 8 * i; 383 } 384 final long nextByte = getUnsignedByte(in); 385 value |= nextByte << 8 * i; 386 mask >>>= 1; 387 } 388 return value; 389 } 390 391 private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) { 392 if (bytesToSkip < 1) { 393 return 0; 394 } 395 final int current = input.position(); 396 final int maxSkip = input.remaining(); 397 if (maxSkip < bytesToSkip) { 398 bytesToSkip = maxSkip; 399 } 400 input.position(current + (int) bytesToSkip); 401 return bytesToSkip; 402 } 403 404 private final String fileName; 405 private SeekableByteChannel channel; 406 private final Archive archive; 407 private int currentEntryIndex = -1; 408 private int currentFolderIndex = -1; 409 private InputStream currentFolderInputStream; 410 private byte[] password; 411 private long compressedBytesReadFromCurrentEntry; 412 private long uncompressedBytesReadFromCurrentEntry; 413 private final ArrayList<InputStream> deferredBlockStreams = new ArrayList<>(); 414 private final int maxMemoryLimitKb; 415 private final boolean useDefaultNameForUnnamedEntries; 416 417 private final boolean tryToRecoverBrokenArchives; 418 419 /** 420 * Reads a file as unencrypted 7z archive. 421 * 422 * @param fileName the file to read. 423 * @throws IOException if reading the archive fails. 424 * @deprecated Use {@link Builder#get()}. 425 */ 426 @Deprecated 427 public SevenZFile(final File fileName) throws IOException { 428 this(fileName, SevenZFileOptions.DEFAULT); 429 } 430 431 /** 432 * Reads a file as 7z archive 433 * 434 * @param file the file to read 435 * @param password optional password if the archive is encrypted - the byte array is supposed to be the UTF16-LE encoded representation of the password. 436 * @throws IOException if reading the archive fails 437 * @deprecated Use {@link Builder#get()}. 438 */ 439 @SuppressWarnings("resource") // caller closes 440 @Deprecated 441 public SevenZFile(final File file, final byte[] password) throws IOException { 442 this(newByteChannel(file), file.getAbsolutePath(), password, true, SevenZFileOptions.DEFAULT); 443 } 444 445 /** 446 * Reads a file as 7z archive 447 * 448 * @param file the file to read 449 * @param password optional password if the archive is encrypted 450 * @throws IOException if reading the archive fails 451 * @since 1.17 452 * @deprecated Use {@link Builder#get()}. 453 */ 454 @Deprecated 455 public SevenZFile(final File file, final char[] password) throws IOException { 456 this(file, password, SevenZFileOptions.DEFAULT); 457 } 458 459 /** 460 * Reads a file as 7z archive with additional options. 461 * 462 * @param file the file to read 463 * @param password optional password if the archive is encrypted 464 * @param options the options to apply 465 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 466 * @since 1.19 467 * @deprecated Use {@link Builder#get()}. 468 */ 469 @SuppressWarnings("resource") // caller closes 470 @Deprecated 471 public SevenZFile(final File file, final char[] password, final SevenZFileOptions options) throws IOException { 472 this(newByteChannel(file), // NOSONAR 473 file.getAbsolutePath(), AES256SHA256Decoder.utf16Decode(password), true, options); 474 } 475 476 /** 477 * Reads a file as unencrypted 7z archive 478 * 479 * @param file the file to read 480 * @param options the options to apply 481 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 482 * @since 1.19 483 * @deprecated Use {@link Builder#get()}. 484 */ 485 @Deprecated 486 public SevenZFile(final File file, final SevenZFileOptions options) throws IOException { 487 this(file, null, options); 488 } 489 490 /** 491 * Reads a SeekableByteChannel as 7z archive 492 * <p> 493 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 494 * </p> 495 * 496 * @param channel the channel to read 497 * @throws IOException if reading the archive fails 498 * @since 1.13 499 * @deprecated Use {@link Builder#get()}. 500 */ 501 @Deprecated 502 public SevenZFile(final SeekableByteChannel channel) throws IOException { 503 this(channel, SevenZFileOptions.DEFAULT); 504 } 505 506 /** 507 * Reads a SeekableByteChannel as 7z archive 508 * <p> 509 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 510 * </p> 511 * 512 * @param channel the channel to read 513 * @param password optional password if the archive is encrypted - the byte array is supposed to be the UTF16-LE encoded representation of the password. 514 * @throws IOException if reading the archive fails 515 * @since 1.13 516 * @deprecated Use {@link Builder#get()}. 517 */ 518 @Deprecated 519 public SevenZFile(final SeekableByteChannel channel, final byte[] password) throws IOException { 520 this(channel, DEFAULT_FILE_NAME, password); 521 } 522 523 /** 524 * Reads a SeekableByteChannel as 7z archive 525 * <p> 526 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 527 * </p> 528 * 529 * @param channel the channel to read 530 * @param password optional password if the archive is encrypted 531 * @throws IOException if reading the archive fails 532 * @since 1.17 533 * @deprecated Use {@link Builder#get()}. 534 */ 535 @Deprecated 536 public SevenZFile(final SeekableByteChannel channel, final char[] password) throws IOException { 537 this(channel, password, SevenZFileOptions.DEFAULT); 538 } 539 540 /** 541 * Reads a SeekableByteChannel as 7z archive with additional options. 542 * <p> 543 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 544 * </p> 545 * 546 * @param channel the channel to read 547 * @param password optional password if the archive is encrypted 548 * @param options the options to apply 549 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 550 * @since 1.19 551 * @deprecated Use {@link Builder#get()}. 552 */ 553 @Deprecated 554 public SevenZFile(final SeekableByteChannel channel, final char[] password, final SevenZFileOptions options) throws IOException { 555 this(channel, DEFAULT_FILE_NAME, password, options); 556 } 557 558 /** 559 * Reads a SeekableByteChannel as 7z archive with additional options. 560 * <p> 561 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 562 * </p> 563 * 564 * @param channel the channel to read 565 * @param options the options to apply 566 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 567 * @since 1.19 568 * @deprecated Use {@link Builder#get()}. 569 */ 570 @Deprecated 571 public SevenZFile(final SeekableByteChannel channel, final SevenZFileOptions options) throws IOException { 572 this(channel, DEFAULT_FILE_NAME, null, options); 573 } 574 575 /** 576 * Reads a SeekableByteChannel as 7z archive 577 * <p> 578 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 579 * </p> 580 * 581 * @param channel the channel to read 582 * @param fileName name of the archive - only used for error reporting 583 * @throws IOException if reading the archive fails 584 * @since 1.17 585 * @deprecated Use {@link Builder#get()}. 586 */ 587 @Deprecated 588 public SevenZFile(final SeekableByteChannel channel, final String fileName) throws IOException { 589 this(channel, fileName, SevenZFileOptions.DEFAULT); 590 } 591 592 /** 593 * Reads a SeekableByteChannel as 7z archive 594 * <p> 595 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 596 * </p> 597 * 598 * @param channel the channel to read 599 * @param fileName name of the archive - only used for error reporting 600 * @param password optional password if the archive is encrypted - the byte array is supposed to be the UTF16-LE encoded representation of the password. 601 * @throws IOException if reading the archive fails 602 * @since 1.13 603 * @deprecated Use {@link Builder#get()}. 604 */ 605 @Deprecated 606 public SevenZFile(final SeekableByteChannel channel, final String fileName, final byte[] password) throws IOException { 607 this(channel, fileName, password, false, SevenZFileOptions.DEFAULT); 608 } 609 610 private SevenZFile(final SeekableByteChannel channel, final String fileName, final byte[] password, final boolean closeOnError, final int maxMemoryLimitKb, 611 final boolean useDefaultNameForUnnamedEntries, final boolean tryToRecoverBrokenArchives) throws IOException { 612 boolean succeeded = false; 613 this.channel = channel; 614 this.fileName = fileName; 615 this.maxMemoryLimitKb = maxMemoryLimitKb; 616 this.useDefaultNameForUnnamedEntries = useDefaultNameForUnnamedEntries; 617 this.tryToRecoverBrokenArchives = tryToRecoverBrokenArchives; 618 try { 619 archive = readHeaders(password); 620 if (password != null) { 621 this.password = Arrays.copyOf(password, password.length); 622 } else { 623 this.password = null; 624 } 625 succeeded = true; 626 } finally { 627 if (!succeeded && closeOnError) { 628 this.channel.close(); 629 } 630 } 631 } 632 633 /** 634 * Constructs a new instance. 635 * 636 * @param channel the channel to read. 637 * @param fileName name of the archive - only used for error reporting. 638 * @param password optional password if the archive is encrypted. 639 * @param closeOnError closes the channel on error. 640 * @param options options. 641 * @throws IOException if reading the archive fails 642 * @deprecated Use {@link Builder#get()}. 643 */ 644 @Deprecated 645 private SevenZFile(final SeekableByteChannel channel, final String fileName, final byte[] password, final boolean closeOnError, 646 final SevenZFileOptions options) throws IOException { 647 this(channel, fileName, password, closeOnError, options.getMaxMemoryLimitInKb(), options.getUseDefaultNameForUnnamedEntries(), 648 options.getTryToRecoverBrokenArchives()); 649 } 650 651 /** 652 * Reads a SeekableByteChannel as 7z archive 653 * <p> 654 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 655 * </p> 656 * 657 * @param channel the channel to read 658 * @param fileName name of the archive - only used for error reporting 659 * @param password optional password if the archive is encrypted 660 * @throws IOException if reading the archive fails 661 * @since 1.17 662 * @deprecated Use {@link Builder#get()}. 663 */ 664 @Deprecated 665 public SevenZFile(final SeekableByteChannel channel, final String fileName, final char[] password) throws IOException { 666 this(channel, fileName, password, SevenZFileOptions.DEFAULT); 667 } 668 669 /** 670 * Reads a SeekableByteChannel as 7z archive with additional options. 671 * <p> 672 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 673 * </p> 674 * 675 * @param channel the channel to read 676 * @param fileName name of the archive - only used for error reporting 677 * @param password optional password if the archive is encrypted 678 * @param options the options to apply 679 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 680 * @since 1.19 681 * @deprecated Use {@link Builder#get()}. 682 */ 683 @Deprecated 684 public SevenZFile(final SeekableByteChannel channel, final String fileName, final char[] password, final SevenZFileOptions options) throws IOException { 685 this(channel, fileName, AES256SHA256Decoder.utf16Decode(password), false, options); 686 } 687 688 /** 689 * Reads a SeekableByteChannel as 7z archive with additional options. 690 * <p> 691 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 692 * </p> 693 * 694 * @param channel the channel to read 695 * @param fileName name of the archive - only used for error reporting 696 * @param options the options to apply 697 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 698 * @since 1.19 699 * @deprecated Use {@link Builder#get()}. 700 */ 701 @Deprecated 702 public SevenZFile(final SeekableByteChannel channel, final String fileName, final SevenZFileOptions options) throws IOException { 703 this(channel, fileName, null, false, options); 704 } 705 706 private InputStream buildDecoderStack(final Folder folder, final long folderOffset, final int firstPackStreamIndex, final SevenZArchiveEntry entry) 707 throws IOException { 708 channel.position(folderOffset); 709 InputStream inputStreamStack = new FilterInputStream( 710 new BufferedInputStream(new BoundedSeekableByteChannelInputStream(channel, archive.packSizes[firstPackStreamIndex]))) { 711 private void count(final int c) { 712 compressedBytesReadFromCurrentEntry += c; 713 } 714 715 @Override 716 public int read() throws IOException { 717 final int r = in.read(); 718 if (r >= 0) { 719 count(1); 720 } 721 return r; 722 } 723 724 @Override 725 public int read(final byte[] b) throws IOException { 726 return read(b, 0, b.length); 727 } 728 729 @Override 730 public int read(final byte[] b, final int off, final int len) throws IOException { 731 if (len == 0) { 732 return 0; 733 } 734 final int r = in.read(b, off, len); 735 if (r >= 0) { 736 count(r); 737 } 738 return r; 739 } 740 }; 741 final LinkedList<SevenZMethodConfiguration> methods = new LinkedList<>(); 742 for (final Coder coder : folder.getOrderedCoders()) { 743 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 744 throw new IOException("Multi input/output stream coders are not yet supported"); 745 } 746 final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId); 747 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, folder.getUnpackSizeForCoder(coder), coder, password, maxMemoryLimitKb); 748 methods.addFirst(new SevenZMethodConfiguration(method, Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack))); 749 } 750 entry.setContentMethods(methods); 751 if (folder.hasCrc) { 752 return new CRC32VerifyingInputStream(inputStreamStack, folder.getUnpackSize(), folder.crc); 753 } 754 return inputStreamStack; 755 } 756 757 /** 758 * Builds the decoding stream for the entry to be read. This method may be called from a random access(getInputStream) or sequential access(getNextEntry). 759 * If this method is called from a random access, some entries may need to be skipped(we put them to the deferredBlockStreams and skip them when actually 760 * needed to improve the performance) 761 * 762 * @param entryIndex the index of the entry to be read 763 * @param isRandomAccess is this called in a random access 764 * @throws IOException if there are exceptions when reading the file 765 */ 766 private void buildDecodingStream(final int entryIndex, final boolean isRandomAccess) throws IOException { 767 if (archive.streamMap == null) { 768 throw new IOException("Archive doesn't contain stream information to read entries"); 769 } 770 final int folderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 771 if (folderIndex < 0) { 772 deferredBlockStreams.clear(); 773 // TODO: previously it'd return an empty stream? 774 // new BoundedInputStream(new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY), 0); 775 return; 776 } 777 final SevenZArchiveEntry file = archive.files[entryIndex]; 778 boolean isInSameFolder = false; 779 if (currentFolderIndex == folderIndex) { 780 // (COMPRESS-320). 781 // The current entry is within the same (potentially opened) folder. The 782 // previous stream has to be fully decoded before we can start reading 783 // but don't do it eagerly -- if the user skips over the entire folder nothing 784 // is effectively decompressed. 785 if (entryIndex > 0) { 786 file.setContentMethods(archive.files[entryIndex - 1].getContentMethods()); 787 } 788 789 // if this is called in a random access, then the content methods of previous entry may be null 790 // the content methods should be set to methods of the first entry as it must not be null, 791 // and the content methods would only be set if the content methods was not set 792 if (isRandomAccess && file.getContentMethods() == null) { 793 final int folderFirstFileIndex = archive.streamMap.folderFirstFileIndex[folderIndex]; 794 final SevenZArchiveEntry folderFirstFile = archive.files[folderFirstFileIndex]; 795 file.setContentMethods(folderFirstFile.getContentMethods()); 796 } 797 isInSameFolder = true; 798 } else { 799 currentFolderIndex = folderIndex; 800 // We're opening a new folder. Discard any queued streams/ folder stream. 801 reopenFolderInputStream(folderIndex, file); 802 } 803 804 boolean haveSkippedEntries = false; 805 if (isRandomAccess) { 806 // entries will only need to be skipped if it's a random access 807 haveSkippedEntries = skipEntriesWhenNeeded(entryIndex, isInSameFolder, folderIndex); 808 } 809 810 if (isRandomAccess && currentEntryIndex == entryIndex && !haveSkippedEntries) { 811 // we don't need to add another entry to the deferredBlockStreams when : 812 // 1. If this method is called in a random access and the entry index 813 // to be read equals to the current entry index, the input stream 814 // has already been put in the deferredBlockStreams 815 // 2. If this entry has not been read(which means no entries are skipped) 816 return; 817 } 818 819 InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize()); 820 if (file.getHasCrc()) { 821 fileStream = new CRC32VerifyingInputStream(fileStream, file.getSize(), file.getCrcValue()); 822 } 823 824 deferredBlockStreams.add(fileStream); 825 } 826 827 private void calculateStreamMap(final Archive archive) throws IOException { 828 int nextFolderPackStreamIndex = 0; 829 final int numFolders = archive.folders != null ? archive.folders.length : 0; 830 final int[] folderFirstPackStreamIndex = new int[numFolders]; 831 for (int i = 0; i < numFolders; i++) { 832 folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex; 833 nextFolderPackStreamIndex += archive.folders[i].packedStreams.length; 834 } 835 long nextPackStreamOffset = 0; 836 final int numPackSizes = archive.packSizes.length; 837 final long[] packStreamOffsets = new long[numPackSizes]; 838 for (int i = 0; i < numPackSizes; i++) { 839 packStreamOffsets[i] = nextPackStreamOffset; 840 nextPackStreamOffset += archive.packSizes[i]; 841 } 842 final int[] folderFirstFileIndex = new int[numFolders]; 843 final int[] fileFolderIndex = new int[archive.files.length]; 844 int nextFolderIndex = 0; 845 int nextFolderUnpackStreamIndex = 0; 846 for (int i = 0; i < archive.files.length; i++) { 847 if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) { 848 fileFolderIndex[i] = -1; 849 continue; 850 } 851 if (nextFolderUnpackStreamIndex == 0) { 852 for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) { 853 folderFirstFileIndex[nextFolderIndex] = i; 854 if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) { 855 break; 856 } 857 } 858 if (nextFolderIndex >= archive.folders.length) { 859 throw new IOException("Too few folders in archive"); 860 } 861 } 862 fileFolderIndex[i] = nextFolderIndex; 863 if (!archive.files[i].hasStream()) { 864 continue; 865 } 866 ++nextFolderUnpackStreamIndex; 867 if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) { 868 ++nextFolderIndex; 869 nextFolderUnpackStreamIndex = 0; 870 } 871 } 872 archive.streamMap = new StreamMap(folderFirstPackStreamIndex, packStreamOffsets, folderFirstFileIndex, fileFolderIndex); 873 } 874 875 private void checkEntryIsInitialized(final Map<Integer, SevenZArchiveEntry> archiveEntries, final int index) { 876 archiveEntries.computeIfAbsent(index, i -> new SevenZArchiveEntry()); 877 } 878 879 /** 880 * Closes the archive. 881 * 882 * @throws IOException if closing the file fails 883 */ 884 @Override 885 public void close() throws IOException { 886 if (channel != null) { 887 try { 888 channel.close(); 889 } finally { 890 channel = null; 891 if (password != null) { 892 Arrays.fill(password, (byte) 0); 893 } 894 password = null; 895 } 896 } 897 } 898 899 private InputStream getCurrentStream() throws IOException { 900 if (archive.files[currentEntryIndex].getSize() == 0) { 901 return new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY); 902 } 903 if (deferredBlockStreams.isEmpty()) { 904 throw new IllegalStateException("No current 7z entry (call getNextEntry() first)."); 905 } 906 907 while (deferredBlockStreams.size() > 1) { 908 // In solid compression mode we need to decompress all leading folder' 909 // streams to get access to an entry. We defer this until really needed 910 // so that entire blocks can be skipped without wasting time for decompression. 911 try (InputStream stream = deferredBlockStreams.remove(0)) { 912 org.apache.commons.io.IOUtils.skip(stream, Long.MAX_VALUE); 913 } 914 compressedBytesReadFromCurrentEntry = 0; 915 } 916 917 return deferredBlockStreams.get(0); 918 } 919 920 /** 921 * Gets a default file name from the archive name - if known. 922 * <p> 923 * This implements the same heuristics the 7z tools use. In 7z's case if an archive contains entries without a name - i.e. 924 * {@link SevenZArchiveEntry#getName} returns {@code null} - then its command line and GUI tools will use this default name when extracting the entries. 925 * </p> 926 * 927 * @return null if the name of the archive is unknown. Otherwise, if the name of the archive has got any extension, it is stripped and the remainder 928 * returned. Finally, if the name of the archive hasn't got any extension, then a {@code ~} character is appended to the archive name. 929 * @since 1.19 930 */ 931 public String getDefaultName() { 932 if (DEFAULT_FILE_NAME.equals(fileName) || fileName == null) { 933 return null; 934 } 935 936 final String lastSegment = new File(fileName).getName(); 937 final int dotPos = lastSegment.lastIndexOf("."); 938 if (dotPos > 0) { // if the file starts with a dot then this is not an extension 939 return lastSegment.substring(0, dotPos); 940 } 941 return lastSegment + "~"; 942 } 943 944 /** 945 * Gets a copy of meta-data of all archive entries. 946 * <p> 947 * This method only provides meta-data, the entries can not be used to read the contents, you still need to process all entries in order using 948 * {@link #getNextEntry} for that. 949 * </p> 950 * <p> 951 * The content methods are only available for entries that have already been reached via {@link #getNextEntry}. 952 * </p> 953 * 954 * @return a copy of meta-data of all archive entries. 955 * @since 1.11 956 */ 957 public Iterable<SevenZArchiveEntry> getEntries() { 958 return new ArrayList<>(Arrays.asList(archive.files)); 959 } 960 961 /** 962 * Gets an InputStream for reading the contents of the given entry. 963 * <p> 964 * For archives using solid compression randomly accessing entries will be significantly slower than reading the archive sequentially. 965 * </p> 966 * 967 * @param entry the entry to get the stream for. 968 * @return a stream to read the entry from. 969 * @throws IOException if unable to create an input stream from the entry 970 * @since 1.20 971 */ 972 public InputStream getInputStream(final SevenZArchiveEntry entry) throws IOException { 973 int entryIndex = -1; 974 for (int i = 0; i < this.archive.files.length; i++) { 975 if (entry == this.archive.files[i]) { 976 entryIndex = i; 977 break; 978 } 979 } 980 981 if (entryIndex < 0) { 982 throw new IllegalArgumentException("Can not find " + entry.getName() + " in " + this.fileName); 983 } 984 985 buildDecodingStream(entryIndex, true); 986 currentEntryIndex = entryIndex; 987 currentFolderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 988 return getCurrentStream(); 989 } 990 991 /** 992 * Gets the next Archive Entry in this archive. 993 * 994 * @return the next entry, or {@code null} if there are no more entries 995 * @throws IOException if the next entry could not be read 996 */ 997 public SevenZArchiveEntry getNextEntry() throws IOException { 998 if (currentEntryIndex >= archive.files.length - 1) { 999 return null; 1000 } 1001 ++currentEntryIndex; 1002 final SevenZArchiveEntry entry = archive.files[currentEntryIndex]; 1003 if (entry.getName() == null && useDefaultNameForUnnamedEntries) { 1004 entry.setName(getDefaultName()); 1005 } 1006 buildDecodingStream(currentEntryIndex, false); 1007 uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0; 1008 return entry; 1009 } 1010 1011 /** 1012 * Gets statistics for bytes read from the current entry. 1013 * 1014 * @return statistics for bytes read from the current entry 1015 * @since 1.17 1016 */ 1017 public InputStreamStatistics getStatisticsForCurrentEntry() { 1018 return new InputStreamStatistics() { 1019 @Override 1020 public long getCompressedCount() { 1021 return compressedBytesReadFromCurrentEntry; 1022 } 1023 1024 @Override 1025 public long getUncompressedCount() { 1026 return uncompressedBytesReadFromCurrentEntry; 1027 } 1028 }; 1029 } 1030 1031 /** 1032 * Tests if any data of current entry has been read or not. This is achieved by comparing the bytes remaining to read and the size of the file. 1033 * 1034 * @return true if any data of current entry has been read 1035 * @since 1.21 1036 */ 1037 private boolean hasCurrentEntryBeenRead() { 1038 boolean hasCurrentEntryBeenRead = false; 1039 if (!deferredBlockStreams.isEmpty()) { 1040 final InputStream currentEntryInputStream = deferredBlockStreams.get(deferredBlockStreams.size() - 1); 1041 // get the bytes remaining to read, and compare it with the size of 1042 // the file to figure out if the file has been read 1043 if (currentEntryInputStream instanceof CRC32VerifyingInputStream) { 1044 hasCurrentEntryBeenRead = ((CRC32VerifyingInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex] 1045 .getSize(); 1046 } 1047 1048 if (currentEntryInputStream instanceof BoundedInputStream) { 1049 hasCurrentEntryBeenRead = ((BoundedInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); 1050 } 1051 } 1052 return hasCurrentEntryBeenRead; 1053 } 1054 1055 private Archive initializeArchive(final StartHeader startHeader, final byte[] password, final boolean verifyCrc) throws IOException { 1056 assertFitsIntoNonNegativeInt("nextHeaderSize", startHeader.nextHeaderSize); 1057 final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize; 1058 channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); 1059 if (verifyCrc) { 1060 final long position = channel.position(); 1061 final CheckedInputStream cis = new CheckedInputStream(Channels.newInputStream(channel), new CRC32()); 1062 if (cis.skip(nextHeaderSizeInt) != nextHeaderSizeInt) { 1063 throw new IOException("Problem computing NextHeader CRC-32"); 1064 } 1065 if (startHeader.nextHeaderCrc != cis.getChecksum().getValue()) { 1066 throw new IOException("NextHeader CRC-32 mismatch"); 1067 } 1068 channel.position(position); 1069 } 1070 Archive archive = new Archive(); 1071 ByteBuffer buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN); 1072 readFully(buf); 1073 int nid = getUnsignedByte(buf); 1074 if (nid == NID.kEncodedHeader) { 1075 buf = readEncodedHeader(buf, archive, password); 1076 // Archive gets rebuilt with the new header 1077 archive = new Archive(); 1078 nid = getUnsignedByte(buf); 1079 } 1080 if (nid != NID.kHeader) { 1081 throw new IOException("Broken or unsupported archive: no Header"); 1082 } 1083 readHeader(buf, archive); 1084 archive.subStreamsInfo = null; 1085 return archive; 1086 } 1087 1088 /** 1089 * Reads a byte of data. 1090 * 1091 * @return the byte read, or -1 if end of input is reached 1092 * @throws IOException if an I/O error has occurred 1093 */ 1094 public int read() throws IOException { 1095 final int b = getCurrentStream().read(); 1096 if (b >= 0) { 1097 uncompressedBytesReadFromCurrentEntry++; 1098 } 1099 return b; 1100 } 1101 1102 /** 1103 * Reads data into an array of bytes. 1104 * 1105 * @param b the array to write data to 1106 * @return the number of bytes read, or -1 if end of input is reached 1107 * @throws IOException if an I/O error has occurred 1108 */ 1109 public int read(final byte[] b) throws IOException { 1110 return read(b, 0, b.length); 1111 } 1112 1113 /** 1114 * Reads data into an array of bytes. 1115 * 1116 * @param b the array to write data to 1117 * @param off offset into the buffer to start filling at 1118 * @param len of bytes to read 1119 * @return the number of bytes read, or -1 if end of input is reached 1120 * @throws IOException if an I/O error has occurred 1121 */ 1122 public int read(final byte[] b, final int off, final int len) throws IOException { 1123 if (len == 0) { 1124 return 0; 1125 } 1126 final int cnt = getCurrentStream().read(b, off, len); 1127 if (cnt > 0) { 1128 uncompressedBytesReadFromCurrentEntry += cnt; 1129 } 1130 return cnt; 1131 } 1132 1133 private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException { 1134 final int areAllDefined = getUnsignedByte(header); 1135 final BitSet bits; 1136 if (areAllDefined != 0) { 1137 bits = new BitSet(size); 1138 for (int i = 0; i < size; i++) { 1139 bits.set(i, true); 1140 } 1141 } else { 1142 bits = readBits(header, size); 1143 } 1144 return bits; 1145 } 1146 1147 private void readArchiveProperties(final ByteBuffer input) throws IOException { 1148 // FIXME: the reference implementation just throws them away? 1149 int nid = getUnsignedByte(input); 1150 while (nid != NID.kEnd) { 1151 final long propertySize = readUint64(input); 1152 final byte[] property = new byte[(int) propertySize]; 1153 get(input, property); 1154 nid = getUnsignedByte(input); 1155 } 1156 } 1157 1158 private BitSet readBits(final ByteBuffer header, final int size) throws IOException { 1159 final BitSet bits = new BitSet(size); 1160 int mask = 0; 1161 int cache = 0; 1162 for (int i = 0; i < size; i++) { 1163 if (mask == 0) { 1164 mask = 0x80; 1165 cache = getUnsignedByte(header); 1166 } 1167 bits.set(i, (cache & mask) != 0); 1168 mask >>>= 1; 1169 } 1170 return bits; 1171 } 1172 1173 private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive, final byte[] password) throws IOException { 1174 final int pos = header.position(); 1175 final ArchiveStatistics stats = new ArchiveStatistics(); 1176 sanityCheckStreamsInfo(header, stats); 1177 stats.assertValidity(maxMemoryLimitKb); 1178 header.position(pos); 1179 1180 readStreamsInfo(header, archive); 1181 1182 if (archive.folders == null || archive.folders.length == 0) { 1183 throw new IOException("no folders, can't read encoded header"); 1184 } 1185 if (archive.packSizes == null || archive.packSizes.length == 0) { 1186 throw new IOException("no packed streams, can't read encoded header"); 1187 } 1188 1189 // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? 1190 final Folder folder = archive.folders[0]; 1191 final int firstPackStreamIndex = 0; 1192 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 0; 1193 1194 channel.position(folderOffset); 1195 InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, archive.packSizes[firstPackStreamIndex]); 1196 for (final Coder coder : folder.getOrderedCoders()) { 1197 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 1198 throw new IOException("Multi input/output stream coders are not yet supported"); 1199 } 1200 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, // NOSONAR 1201 folder.getUnpackSizeForCoder(coder), coder, password, maxMemoryLimitKb); 1202 } 1203 if (folder.hasCrc) { 1204 inputStreamStack = new CRC32VerifyingInputStream(inputStreamStack, folder.getUnpackSize(), folder.crc); 1205 } 1206 final int unpackSize = assertFitsIntoNonNegativeInt("unpackSize", folder.getUnpackSize()); 1207 final byte[] nextHeader = IOUtils.readRange(inputStreamStack, unpackSize); 1208 if (nextHeader.length < unpackSize) { 1209 throw new IOException("premature end of stream"); 1210 } 1211 inputStreamStack.close(); 1212 return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN); 1213 } 1214 1215 private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { 1216 final int numFilesInt = (int) readUint64(header); 1217 final Map<Integer, SevenZArchiveEntry> fileMap = new LinkedHashMap<>(); 1218 BitSet isEmptyStream = null; 1219 BitSet isEmptyFile = null; 1220 BitSet isAnti = null; 1221 while (true) { 1222 final int propertyType = getUnsignedByte(header); 1223 if (propertyType == 0) { 1224 break; 1225 } 1226 final long size = readUint64(header); 1227 switch (propertyType) { 1228 case NID.kEmptyStream: { 1229 isEmptyStream = readBits(header, numFilesInt); 1230 break; 1231 } 1232 case NID.kEmptyFile: { 1233 isEmptyFile = readBits(header, isEmptyStream.cardinality()); 1234 break; 1235 } 1236 case NID.kAnti: { 1237 isAnti = readBits(header, isEmptyStream.cardinality()); 1238 break; 1239 } 1240 case NID.kName: { 1241 /* final int external = */ getUnsignedByte(header); 1242 final byte[] names = new byte[(int) (size - 1)]; 1243 final int namesLength = names.length; 1244 get(header, names); 1245 int nextFile = 0; 1246 int nextName = 0; 1247 for (int i = 0; i < namesLength; i += 2) { 1248 if (names[i] == 0 && names[i + 1] == 0) { 1249 checkEntryIsInitialized(fileMap, nextFile); 1250 fileMap.get(nextFile).setName(new String(names, nextName, i - nextName, UTF_16LE)); 1251 nextName = i + 2; 1252 nextFile++; 1253 } 1254 } 1255 if (nextName != namesLength || nextFile != numFilesInt) { 1256 throw new IOException("Error parsing file names"); 1257 } 1258 break; 1259 } 1260 case NID.kCTime: { 1261 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1262 /* final int external = */ getUnsignedByte(header); 1263 for (int i = 0; i < numFilesInt; i++) { 1264 checkEntryIsInitialized(fileMap, i); 1265 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1266 entryAtIndex.setHasCreationDate(timesDefined.get(i)); 1267 if (entryAtIndex.getHasCreationDate()) { 1268 entryAtIndex.setCreationDate(getLong(header)); 1269 } 1270 } 1271 break; 1272 } 1273 case NID.kATime: { 1274 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1275 /* final int external = */ getUnsignedByte(header); 1276 for (int i = 0; i < numFilesInt; i++) { 1277 checkEntryIsInitialized(fileMap, i); 1278 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1279 entryAtIndex.setHasAccessDate(timesDefined.get(i)); 1280 if (entryAtIndex.getHasAccessDate()) { 1281 entryAtIndex.setAccessDate(getLong(header)); 1282 } 1283 } 1284 break; 1285 } 1286 case NID.kMTime: { 1287 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1288 /* final int external = */ getUnsignedByte(header); 1289 for (int i = 0; i < numFilesInt; i++) { 1290 checkEntryIsInitialized(fileMap, i); 1291 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1292 entryAtIndex.setHasLastModifiedDate(timesDefined.get(i)); 1293 if (entryAtIndex.getHasLastModifiedDate()) { 1294 entryAtIndex.setLastModifiedDate(getLong(header)); 1295 } 1296 } 1297 break; 1298 } 1299 case NID.kWinAttributes: { 1300 final BitSet attributesDefined = readAllOrBits(header, numFilesInt); 1301 /* final int external = */ getUnsignedByte(header); 1302 for (int i = 0; i < numFilesInt; i++) { 1303 checkEntryIsInitialized(fileMap, i); 1304 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1305 entryAtIndex.setHasWindowsAttributes(attributesDefined.get(i)); 1306 if (entryAtIndex.getHasWindowsAttributes()) { 1307 entryAtIndex.setWindowsAttributes(getInt(header)); 1308 } 1309 } 1310 break; 1311 } 1312 case NID.kDummy: { 1313 // 7z 9.20 asserts the content is all zeros and ignores the property 1314 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1315 1316 skipBytesFully(header, size); 1317 break; 1318 } 1319 1320 default: { 1321 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1322 skipBytesFully(header, size); 1323 break; 1324 } 1325 } 1326 } 1327 int nonEmptyFileCounter = 0; 1328 int emptyFileCounter = 0; 1329 for (int i = 0; i < numFilesInt; i++) { 1330 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1331 if (entryAtIndex == null) { 1332 continue; 1333 } 1334 entryAtIndex.setHasStream(isEmptyStream == null || !isEmptyStream.get(i)); 1335 if (entryAtIndex.hasStream()) { 1336 if (archive.subStreamsInfo == null) { 1337 throw new IOException("Archive contains file with streams but no subStreamsInfo"); 1338 } 1339 entryAtIndex.setDirectory(false); 1340 entryAtIndex.setAntiItem(false); 1341 entryAtIndex.setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); 1342 entryAtIndex.setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); 1343 entryAtIndex.setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); 1344 if (entryAtIndex.getSize() < 0) { 1345 throw new IOException("broken archive, entry with negative size"); 1346 } 1347 ++nonEmptyFileCounter; 1348 } else { 1349 entryAtIndex.setDirectory(isEmptyFile == null || !isEmptyFile.get(emptyFileCounter)); 1350 entryAtIndex.setAntiItem(isAnti != null && isAnti.get(emptyFileCounter)); 1351 entryAtIndex.setHasCrc(false); 1352 entryAtIndex.setSize(0); 1353 ++emptyFileCounter; 1354 } 1355 } 1356 archive.files = fileMap.values().stream().filter(Objects::nonNull).toArray(SevenZArchiveEntry[]::new); 1357 calculateStreamMap(archive); 1358 } 1359 1360 private Folder readFolder(final ByteBuffer header) throws IOException { 1361 final Folder folder = new Folder(); 1362 1363 final long numCoders = readUint64(header); 1364 final Coder[] coders = new Coder[(int) numCoders]; 1365 long totalInStreams = 0; 1366 long totalOutStreams = 0; 1367 for (int i = 0; i < coders.length; i++) { 1368 final int bits = getUnsignedByte(header); 1369 final int idSize = bits & 0xf; 1370 final boolean isSimple = (bits & 0x10) == 0; 1371 final boolean hasAttributes = (bits & 0x20) != 0; 1372 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1373 1374 final byte[] decompressionMethodId = new byte[idSize]; 1375 get(header, decompressionMethodId); 1376 final long numInStreams; 1377 final long numOutStreams; 1378 if (isSimple) { 1379 numInStreams = 1; 1380 numOutStreams = 1; 1381 } else { 1382 numInStreams = readUint64(header); 1383 numOutStreams = readUint64(header); 1384 } 1385 totalInStreams += numInStreams; 1386 totalOutStreams += numOutStreams; 1387 byte[] properties = null; 1388 if (hasAttributes) { 1389 final long propertiesSize = readUint64(header); 1390 properties = new byte[(int) propertiesSize]; 1391 get(header, properties); 1392 } 1393 // would need to keep looping as above: 1394 if (moreAlternativeMethods) { 1395 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1396 "The reference implementation doesn't support them either."); 1397 } 1398 coders[i] = new Coder(decompressionMethodId, numInStreams, numOutStreams, properties); 1399 } 1400 folder.coders = coders; 1401 folder.totalInputStreams = totalInStreams; 1402 folder.totalOutputStreams = totalOutStreams; 1403 1404 final long numBindPairs = totalOutStreams - 1; 1405 final BindPair[] bindPairs = new BindPair[(int) numBindPairs]; 1406 for (int i = 0; i < bindPairs.length; i++) { 1407 bindPairs[i] = new BindPair(readUint64(header), readUint64(header)); 1408 } 1409 folder.bindPairs = bindPairs; 1410 1411 final long numPackedStreams = totalInStreams - numBindPairs; 1412 final long[] packedStreams = new long[(int) numPackedStreams]; 1413 if (numPackedStreams == 1) { 1414 int i; 1415 for (i = 0; i < (int) totalInStreams; i++) { 1416 if (folder.findBindPairForInStream(i) < 0) { 1417 break; 1418 } 1419 } 1420 packedStreams[0] = i; 1421 } else { 1422 for (int i = 0; i < (int) numPackedStreams; i++) { 1423 packedStreams[i] = readUint64(header); 1424 } 1425 } 1426 folder.packedStreams = packedStreams; 1427 1428 return folder; 1429 } 1430 1431 private void readFully(final ByteBuffer buf) throws IOException { 1432 buf.rewind(); 1433 IOUtils.readFully(channel, buf); 1434 buf.flip(); 1435 } 1436 1437 private void readHeader(final ByteBuffer header, final Archive archive) throws IOException { 1438 final int pos = header.position(); 1439 final ArchiveStatistics stats = sanityCheckAndCollectStatistics(header); 1440 stats.assertValidity(maxMemoryLimitKb); 1441 header.position(pos); 1442 1443 int nid = getUnsignedByte(header); 1444 1445 if (nid == NID.kArchiveProperties) { 1446 readArchiveProperties(header); 1447 nid = getUnsignedByte(header); 1448 } 1449 1450 if (nid == NID.kAdditionalStreamsInfo) { 1451 throw new IOException("Additional streams unsupported"); 1452 // nid = getUnsignedByte(header); 1453 } 1454 1455 if (nid == NID.kMainStreamsInfo) { 1456 readStreamsInfo(header, archive); 1457 nid = getUnsignedByte(header); 1458 } 1459 1460 if (nid == NID.kFilesInfo) { 1461 readFilesInfo(header, archive); 1462 nid = getUnsignedByte(header); 1463 } 1464 } 1465 1466 private Archive readHeaders(final byte[] password) throws IOException { 1467 final ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */).order(ByteOrder.LITTLE_ENDIAN); 1468 readFully(buf); 1469 final byte[] signature = new byte[6]; 1470 buf.get(signature); 1471 if (!Arrays.equals(signature, sevenZSignature)) { 1472 throw new IOException("Bad 7z signature"); 1473 } 1474 // 7zFormat.txt has it wrong - it's first major then minor 1475 final byte archiveVersionMajor = buf.get(); 1476 final byte archiveVersionMinor = buf.get(); 1477 if (archiveVersionMajor != 0) { 1478 throw new IOException(String.format("Unsupported 7z version (%d,%d)", archiveVersionMajor, archiveVersionMinor)); 1479 } 1480 1481 boolean headerLooksValid = false; // See https://www.7-zip.org/recover.html - "There is no correct End Header at the end of archive" 1482 final long startHeaderCrc = 0xffffFFFFL & buf.getInt(); 1483 if (startHeaderCrc == 0) { 1484 // This is an indication of a corrupt header - peek the next 20 bytes 1485 final long currentPosition = channel.position(); 1486 final ByteBuffer peekBuf = ByteBuffer.allocate(20); 1487 readFully(peekBuf); 1488 channel.position(currentPosition); 1489 // Header invalid if all data is 0 1490 while (peekBuf.hasRemaining()) { 1491 if (peekBuf.get() != 0) { 1492 headerLooksValid = true; 1493 break; 1494 } 1495 } 1496 } else { 1497 headerLooksValid = true; 1498 } 1499 1500 if (headerLooksValid) { 1501 return initializeArchive(readStartHeader(startHeaderCrc), password, true); 1502 } 1503 // No valid header found - probably first file of multipart archive was removed too early. Scan for end header. 1504 if (tryToRecoverBrokenArchives) { 1505 return tryToLocateEndHeader(password); 1506 } 1507 throw new IOException("archive seems to be invalid.\nYou may want to retry and enable the" 1508 + " tryToRecoverBrokenArchives if the archive could be a multi volume archive that has been closed" + " prematurely."); 1509 } 1510 1511 private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException { 1512 archive.packPos = readUint64(header); 1513 final int numPackStreamsInt = (int) readUint64(header); 1514 int nid = getUnsignedByte(header); 1515 if (nid == NID.kSize) { 1516 archive.packSizes = new long[numPackStreamsInt]; 1517 for (int i = 0; i < archive.packSizes.length; i++) { 1518 archive.packSizes[i] = readUint64(header); 1519 } 1520 nid = getUnsignedByte(header); 1521 } 1522 1523 if (nid == NID.kCRC) { 1524 archive.packCrcsDefined = readAllOrBits(header, numPackStreamsInt); 1525 archive.packCrcs = new long[numPackStreamsInt]; 1526 for (int i = 0; i < numPackStreamsInt; i++) { 1527 if (archive.packCrcsDefined.get(i)) { 1528 archive.packCrcs[i] = 0xffffFFFFL & getInt(header); 1529 } 1530 } 1531 1532 nid = getUnsignedByte(header); 1533 } 1534 } 1535 1536 private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { 1537 // using Stream rather than ByteBuffer for the benefit of the 1538 // built-in CRC check 1539 try (DataInputStream dataInputStream = new DataInputStream( 1540 new CRC32VerifyingInputStream(new BoundedSeekableByteChannelInputStream(channel, 20), 20, startHeaderCrc))) { 1541 final long nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); 1542 if (nextHeaderOffset < 0 || nextHeaderOffset + SIGNATURE_HEADER_SIZE > channel.size()) { 1543 throw new IOException("nextHeaderOffset is out of bounds"); 1544 } 1545 final long nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); 1546 final long nextHeaderEnd = nextHeaderOffset + nextHeaderSize; 1547 if (nextHeaderEnd < nextHeaderOffset || nextHeaderEnd + SIGNATURE_HEADER_SIZE > channel.size()) { 1548 throw new IOException("nextHeaderSize is out of bounds"); 1549 } 1550 final long nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); 1551 return new StartHeader(nextHeaderOffset, nextHeaderSize, nextHeaderCrc); 1552 } 1553 } 1554 1555 private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 1556 int nid = getUnsignedByte(header); 1557 1558 if (nid == NID.kPackInfo) { 1559 readPackInfo(header, archive); 1560 nid = getUnsignedByte(header); 1561 } 1562 1563 if (nid == NID.kUnpackInfo) { 1564 readUnpackInfo(header, archive); 1565 nid = getUnsignedByte(header); 1566 } else { 1567 // archive without unpack/coders info 1568 archive.folders = Folder.EMPTY_FOLDER_ARRAY; 1569 } 1570 1571 if (nid == NID.kSubStreamsInfo) { 1572 readSubStreamsInfo(header, archive); 1573 nid = getUnsignedByte(header); 1574 } 1575 } 1576 1577 private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 1578 for (final Folder folder : archive.folders) { 1579 folder.numUnpackSubStreams = 1; 1580 } 1581 long unpackStreamsCount = archive.folders.length; 1582 1583 int nid = getUnsignedByte(header); 1584 if (nid == NID.kNumUnpackStream) { 1585 unpackStreamsCount = 0; 1586 for (final Folder folder : archive.folders) { 1587 final long numStreams = readUint64(header); 1588 folder.numUnpackSubStreams = (int) numStreams; 1589 unpackStreamsCount += numStreams; 1590 } 1591 nid = getUnsignedByte(header); 1592 } 1593 1594 final int totalUnpackStreams = (int) unpackStreamsCount; 1595 final SubStreamsInfo subStreamsInfo = new SubStreamsInfo(totalUnpackStreams); 1596 int nextUnpackStream = 0; 1597 for (final Folder folder : archive.folders) { 1598 if (folder.numUnpackSubStreams == 0) { 1599 continue; 1600 } 1601 long sum = 0; 1602 if (nid == NID.kSize) { 1603 for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) { 1604 final long size = readUint64(header); 1605 subStreamsInfo.unpackSizes[nextUnpackStream++] = size; 1606 sum += size; 1607 } 1608 } 1609 if (sum > folder.getUnpackSize()) { 1610 throw new IOException("sum of unpack sizes of folder exceeds total unpack size"); 1611 } 1612 subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum; 1613 } 1614 if (nid == NID.kSize) { 1615 nid = getUnsignedByte(header); 1616 } 1617 1618 int numDigests = 0; 1619 for (final Folder folder : archive.folders) { 1620 if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) { 1621 numDigests += folder.numUnpackSubStreams; 1622 } 1623 } 1624 1625 if (nid == NID.kCRC) { 1626 final BitSet hasMissingCrc = readAllOrBits(header, numDigests); 1627 final long[] missingCrcs = new long[numDigests]; 1628 for (int i = 0; i < numDigests; i++) { 1629 if (hasMissingCrc.get(i)) { 1630 missingCrcs[i] = 0xffffFFFFL & getInt(header); 1631 } 1632 } 1633 int nextCrc = 0; 1634 int nextMissingCrc = 0; 1635 for (final Folder folder : archive.folders) { 1636 if (folder.numUnpackSubStreams == 1 && folder.hasCrc) { 1637 subStreamsInfo.hasCrc.set(nextCrc, true); 1638 subStreamsInfo.crcs[nextCrc] = folder.crc; 1639 ++nextCrc; 1640 } else { 1641 for (int i = 0; i < folder.numUnpackSubStreams; i++) { 1642 subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc)); 1643 subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc]; 1644 ++nextCrc; 1645 ++nextMissingCrc; 1646 } 1647 } 1648 } 1649 1650 nid = getUnsignedByte(header); 1651 } 1652 1653 archive.subStreamsInfo = subStreamsInfo; 1654 } 1655 1656 private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException { 1657 int nid = getUnsignedByte(header); 1658 final int numFoldersInt = (int) readUint64(header); 1659 final Folder[] folders = new Folder[numFoldersInt]; 1660 archive.folders = folders; 1661 /* final int external = */ getUnsignedByte(header); 1662 for (int i = 0; i < numFoldersInt; i++) { 1663 folders[i] = readFolder(header); 1664 } 1665 1666 nid = getUnsignedByte(header); 1667 for (final Folder folder : folders) { 1668 assertFitsIntoNonNegativeInt("totalOutputStreams", folder.totalOutputStreams); 1669 folder.unpackSizes = new long[(int) folder.totalOutputStreams]; 1670 for (int i = 0; i < folder.totalOutputStreams; i++) { 1671 folder.unpackSizes[i] = readUint64(header); 1672 } 1673 } 1674 1675 nid = getUnsignedByte(header); 1676 if (nid == NID.kCRC) { 1677 final BitSet crcsDefined = readAllOrBits(header, numFoldersInt); 1678 for (int i = 0; i < numFoldersInt; i++) { 1679 if (crcsDefined.get(i)) { 1680 folders[i].hasCrc = true; 1681 folders[i].crc = 0xffffFFFFL & getInt(header); 1682 } else { 1683 folders[i].hasCrc = false; 1684 } 1685 } 1686 1687 nid = getUnsignedByte(header); 1688 } 1689 } 1690 1691 /** 1692 * Discard any queued streams/ folder stream, and reopen the current folder input stream. 1693 * 1694 * @param folderIndex the index of the folder to reopen 1695 * @param file the 7z entry to read 1696 * @throws IOException if exceptions occur when reading the 7z file 1697 */ 1698 private void reopenFolderInputStream(final int folderIndex, final SevenZArchiveEntry file) throws IOException { 1699 deferredBlockStreams.clear(); 1700 if (currentFolderInputStream != null) { 1701 currentFolderInputStream.close(); 1702 currentFolderInputStream = null; 1703 } 1704 final Folder folder = archive.folders[folderIndex]; 1705 final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; 1706 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + archive.streamMap.packStreamOffsets[firstPackStreamIndex]; 1707 1708 currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); 1709 } 1710 1711 private ArchiveStatistics sanityCheckAndCollectStatistics(final ByteBuffer header) throws IOException { 1712 final ArchiveStatistics stats = new ArchiveStatistics(); 1713 1714 int nid = getUnsignedByte(header); 1715 1716 if (nid == NID.kArchiveProperties) { 1717 sanityCheckArchiveProperties(header); 1718 nid = getUnsignedByte(header); 1719 } 1720 1721 if (nid == NID.kAdditionalStreamsInfo) { 1722 throw new IOException("Additional streams unsupported"); 1723 // nid = getUnsignedByte(header); 1724 } 1725 1726 if (nid == NID.kMainStreamsInfo) { 1727 sanityCheckStreamsInfo(header, stats); 1728 nid = getUnsignedByte(header); 1729 } 1730 1731 if (nid == NID.kFilesInfo) { 1732 sanityCheckFilesInfo(header, stats); 1733 nid = getUnsignedByte(header); 1734 } 1735 1736 if (nid != NID.kEnd) { 1737 throw new IOException("Badly terminated header, found " + nid); 1738 } 1739 1740 return stats; 1741 } 1742 1743 private void sanityCheckArchiveProperties(final ByteBuffer header) throws IOException { 1744 int nid = getUnsignedByte(header); 1745 while (nid != NID.kEnd) { 1746 final int propertySize = assertFitsIntoNonNegativeInt("propertySize", readUint64(header)); 1747 if (skipBytesFully(header, propertySize) < propertySize) { 1748 throw new IOException("invalid property size"); 1749 } 1750 nid = getUnsignedByte(header); 1751 } 1752 } 1753 1754 private void sanityCheckFilesInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1755 stats.numberOfEntries = assertFitsIntoNonNegativeInt("numFiles", readUint64(header)); 1756 1757 int emptyStreams = -1; 1758 while (true) { 1759 final int propertyType = getUnsignedByte(header); 1760 if (propertyType == 0) { 1761 break; 1762 } 1763 final long size = readUint64(header); 1764 switch (propertyType) { 1765 case NID.kEmptyStream: { 1766 emptyStreams = readBits(header, stats.numberOfEntries).cardinality(); 1767 break; 1768 } 1769 case NID.kEmptyFile: { 1770 if (emptyStreams == -1) { 1771 throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); 1772 } 1773 readBits(header, emptyStreams); 1774 break; 1775 } 1776 case NID.kAnti: { 1777 if (emptyStreams == -1) { 1778 throw new IOException("Header format error: kEmptyStream must appear before kAnti"); 1779 } 1780 readBits(header, emptyStreams); 1781 break; 1782 } 1783 case NID.kName: { 1784 final int external = getUnsignedByte(header); 1785 if (external != 0) { 1786 throw new IOException("Not implemented"); 1787 } 1788 final int namesLength = assertFitsIntoNonNegativeInt("file names length", size - 1); 1789 if ((namesLength & 1) != 0) { 1790 throw new IOException("File names length invalid"); 1791 } 1792 1793 int filesSeen = 0; 1794 for (int i = 0; i < namesLength; i += 2) { 1795 final char c = getChar(header); 1796 if (c == 0) { 1797 filesSeen++; 1798 } 1799 } 1800 if (filesSeen != stats.numberOfEntries) { 1801 throw new IOException("Invalid number of file names (" + filesSeen + " instead of " + stats.numberOfEntries + ")"); 1802 } 1803 break; 1804 } 1805 case NID.kCTime: { 1806 final int timesDefined = readAllOrBits(header, stats.numberOfEntries).cardinality(); 1807 final int external = getUnsignedByte(header); 1808 if (external != 0) { 1809 throw new IOException("Not implemented"); 1810 } 1811 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1812 throw new IOException("invalid creation dates size"); 1813 } 1814 break; 1815 } 1816 case NID.kATime: { 1817 final int timesDefined = readAllOrBits(header, stats.numberOfEntries).cardinality(); 1818 final int external = getUnsignedByte(header); 1819 if (external != 0) { 1820 throw new IOException("Not implemented"); 1821 } 1822 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1823 throw new IOException("invalid access dates size"); 1824 } 1825 break; 1826 } 1827 case NID.kMTime: { 1828 final int timesDefined = readAllOrBits(header, stats.numberOfEntries).cardinality(); 1829 final int external = getUnsignedByte(header); 1830 if (external != 0) { 1831 throw new IOException("Not implemented"); 1832 } 1833 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1834 throw new IOException("invalid modification dates size"); 1835 } 1836 break; 1837 } 1838 case NID.kWinAttributes: { 1839 final int attributesDefined = readAllOrBits(header, stats.numberOfEntries).cardinality(); 1840 final int external = getUnsignedByte(header); 1841 if (external != 0) { 1842 throw new IOException("Not implemented"); 1843 } 1844 if (skipBytesFully(header, 4 * attributesDefined) < 4 * attributesDefined) { 1845 throw new IOException("invalid windows attributes size"); 1846 } 1847 break; 1848 } 1849 case NID.kStartPos: { 1850 throw new IOException("kStartPos is unsupported, please report"); 1851 } 1852 case NID.kDummy: { 1853 // 7z 9.20 asserts the content is all zeros and ignores the property 1854 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1855 1856 if (skipBytesFully(header, size) < size) { 1857 throw new IOException("Incomplete kDummy property"); 1858 } 1859 break; 1860 } 1861 1862 default: { 1863 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1864 if (skipBytesFully(header, size) < size) { 1865 throw new IOException("Incomplete property of type " + propertyType); 1866 } 1867 break; 1868 } 1869 } 1870 } 1871 stats.numberOfEntriesWithStream = stats.numberOfEntries - Math.max(emptyStreams, 0); 1872 } 1873 1874 private int sanityCheckFolder(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1875 1876 final int numCoders = assertFitsIntoNonNegativeInt("numCoders", readUint64(header)); 1877 if (numCoders == 0) { 1878 throw new IOException("Folder without coders"); 1879 } 1880 stats.numberOfCoders += numCoders; 1881 1882 long totalOutStreams = 0; 1883 long totalInStreams = 0; 1884 for (int i = 0; i < numCoders; i++) { 1885 final int bits = getUnsignedByte(header); 1886 final int idSize = bits & 0xf; 1887 get(header, new byte[idSize]); 1888 1889 final boolean isSimple = (bits & 0x10) == 0; 1890 final boolean hasAttributes = (bits & 0x20) != 0; 1891 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1892 if (moreAlternativeMethods) { 1893 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1894 "The reference implementation doesn't support them either."); 1895 } 1896 1897 if (isSimple) { 1898 totalInStreams++; 1899 totalOutStreams++; 1900 } else { 1901 totalInStreams += assertFitsIntoNonNegativeInt("numInStreams", readUint64(header)); 1902 totalOutStreams += assertFitsIntoNonNegativeInt("numOutStreams", readUint64(header)); 1903 } 1904 1905 if (hasAttributes) { 1906 final int propertiesSize = assertFitsIntoNonNegativeInt("propertiesSize", readUint64(header)); 1907 if (skipBytesFully(header, propertiesSize) < propertiesSize) { 1908 throw new IOException("invalid propertiesSize in folder"); 1909 } 1910 } 1911 } 1912 assertFitsIntoNonNegativeInt("totalInStreams", totalInStreams); 1913 assertFitsIntoNonNegativeInt("totalOutStreams", totalOutStreams); 1914 stats.numberOfOutStreams += totalOutStreams; 1915 stats.numberOfInStreams += totalInStreams; 1916 1917 if (totalOutStreams == 0) { 1918 throw new IOException("Total output streams can't be 0"); 1919 } 1920 1921 final int numBindPairs = assertFitsIntoNonNegativeInt("numBindPairs", totalOutStreams - 1); 1922 if (totalInStreams < numBindPairs) { 1923 throw new IOException("Total input streams can't be less than the number of bind pairs"); 1924 } 1925 final BitSet inStreamsBound = new BitSet((int) totalInStreams); 1926 for (int i = 0; i < numBindPairs; i++) { 1927 final int inIndex = assertFitsIntoNonNegativeInt("inIndex", readUint64(header)); 1928 if (totalInStreams <= inIndex) { 1929 throw new IOException("inIndex is bigger than number of inStreams"); 1930 } 1931 inStreamsBound.set(inIndex); 1932 final int outIndex = assertFitsIntoNonNegativeInt("outIndex", readUint64(header)); 1933 if (totalOutStreams <= outIndex) { 1934 throw new IOException("outIndex is bigger than number of outStreams"); 1935 } 1936 } 1937 1938 final int numPackedStreams = assertFitsIntoNonNegativeInt("numPackedStreams", totalInStreams - numBindPairs); 1939 1940 if (numPackedStreams == 1) { 1941 if (inStreamsBound.nextClearBit(0) == -1) { 1942 throw new IOException("Couldn't find stream's bind pair index"); 1943 } 1944 } else { 1945 for (int i = 0; i < numPackedStreams; i++) { 1946 final int packedStreamIndex = assertFitsIntoNonNegativeInt("packedStreamIndex", readUint64(header)); 1947 if (packedStreamIndex >= totalInStreams) { 1948 throw new IOException("packedStreamIndex is bigger than number of totalInStreams"); 1949 } 1950 } 1951 } 1952 1953 return (int) totalOutStreams; 1954 } 1955 1956 private void sanityCheckPackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1957 final long packPos = readUint64(header); 1958 if (packPos < 0 || SIGNATURE_HEADER_SIZE + packPos > channel.size() || SIGNATURE_HEADER_SIZE + packPos < 0) { 1959 throw new IOException("packPos (" + packPos + ") is out of range"); 1960 } 1961 final long numPackStreams = readUint64(header); 1962 stats.numberOfPackedStreams = assertFitsIntoNonNegativeInt("numPackStreams", numPackStreams); 1963 int nid = getUnsignedByte(header); 1964 if (nid == NID.kSize) { 1965 long totalPackSizes = 0; 1966 for (int i = 0; i < stats.numberOfPackedStreams; i++) { 1967 final long packSize = readUint64(header); 1968 totalPackSizes += packSize; 1969 final long endOfPackStreams = SIGNATURE_HEADER_SIZE + packPos + totalPackSizes; 1970 if (packSize < 0 || endOfPackStreams > channel.size() || endOfPackStreams < packPos) { 1971 throw new IOException("packSize (" + packSize + ") is out of range"); 1972 } 1973 } 1974 nid = getUnsignedByte(header); 1975 } 1976 1977 if (nid == NID.kCRC) { 1978 final int crcsDefined = readAllOrBits(header, stats.numberOfPackedStreams).cardinality(); 1979 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 1980 throw new IOException("invalid number of CRCs in PackInfo"); 1981 } 1982 nid = getUnsignedByte(header); 1983 } 1984 1985 if (nid != NID.kEnd) { 1986 throw new IOException("Badly terminated PackInfo (" + nid + ")"); 1987 } 1988 } 1989 1990 private void sanityCheckStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1991 int nid = getUnsignedByte(header); 1992 1993 if (nid == NID.kPackInfo) { 1994 sanityCheckPackInfo(header, stats); 1995 nid = getUnsignedByte(header); 1996 } 1997 1998 if (nid == NID.kUnpackInfo) { 1999 sanityCheckUnpackInfo(header, stats); 2000 nid = getUnsignedByte(header); 2001 } 2002 2003 if (nid == NID.kSubStreamsInfo) { 2004 sanityCheckSubStreamsInfo(header, stats); 2005 nid = getUnsignedByte(header); 2006 } 2007 2008 if (nid != NID.kEnd) { 2009 throw new IOException("Badly terminated StreamsInfo"); 2010 } 2011 } 2012 2013 private void sanityCheckSubStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 2014 2015 int nid = getUnsignedByte(header); 2016 final List<Integer> numUnpackSubStreamsPerFolder = new LinkedList<>(); 2017 if (nid == NID.kNumUnpackStream) { 2018 for (int i = 0; i < stats.numberOfFolders; i++) { 2019 numUnpackSubStreamsPerFolder.add(assertFitsIntoNonNegativeInt("numStreams", readUint64(header))); 2020 } 2021 stats.numberOfUnpackSubStreams = numUnpackSubStreamsPerFolder.stream().mapToLong(Integer::longValue).sum(); 2022 nid = getUnsignedByte(header); 2023 } else { 2024 stats.numberOfUnpackSubStreams = stats.numberOfFolders; 2025 } 2026 2027 assertFitsIntoNonNegativeInt("totalUnpackStreams", stats.numberOfUnpackSubStreams); 2028 2029 if (nid == NID.kSize) { 2030 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 2031 if (numUnpackSubStreams == 0) { 2032 continue; 2033 } 2034 for (int i = 0; i < numUnpackSubStreams - 1; i++) { 2035 final long size = readUint64(header); 2036 if (size < 0) { 2037 throw new IOException("negative unpackSize"); 2038 } 2039 } 2040 } 2041 nid = getUnsignedByte(header); 2042 } 2043 2044 int numDigests = 0; 2045 if (numUnpackSubStreamsPerFolder.isEmpty()) { 2046 numDigests = stats.folderHasCrc == null ? stats.numberOfFolders : stats.numberOfFolders - stats.folderHasCrc.cardinality(); 2047 } else { 2048 int folderIdx = 0; 2049 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 2050 if (numUnpackSubStreams != 1 || stats.folderHasCrc == null || !stats.folderHasCrc.get(folderIdx++)) { 2051 numDigests += numUnpackSubStreams; 2052 } 2053 } 2054 } 2055 2056 if (nid == NID.kCRC) { 2057 assertFitsIntoNonNegativeInt("numDigests", numDigests); 2058 final int missingCrcs = readAllOrBits(header, numDigests).cardinality(); 2059 if (skipBytesFully(header, 4 * missingCrcs) < 4 * missingCrcs) { 2060 throw new IOException("invalid number of missing CRCs in SubStreamInfo"); 2061 } 2062 nid = getUnsignedByte(header); 2063 } 2064 2065 if (nid != NID.kEnd) { 2066 throw new IOException("Badly terminated SubStreamsInfo"); 2067 } 2068 } 2069 2070 private void sanityCheckUnpackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 2071 int nid = getUnsignedByte(header); 2072 if (nid != NID.kFolder) { 2073 throw new IOException("Expected kFolder, got " + nid); 2074 } 2075 final long numFolders = readUint64(header); 2076 stats.numberOfFolders = assertFitsIntoNonNegativeInt("numFolders", numFolders); 2077 final int external = getUnsignedByte(header); 2078 if (external != 0) { 2079 throw new IOException("External unsupported"); 2080 } 2081 2082 final List<Integer> numberOfOutputStreamsPerFolder = new LinkedList<>(); 2083 for (int i = 0; i < stats.numberOfFolders; i++) { 2084 numberOfOutputStreamsPerFolder.add(sanityCheckFolder(header, stats)); 2085 } 2086 2087 final long totalNumberOfBindPairs = stats.numberOfOutStreams - stats.numberOfFolders; 2088 final long packedStreamsRequiredByFolders = stats.numberOfInStreams - totalNumberOfBindPairs; 2089 if (packedStreamsRequiredByFolders < stats.numberOfPackedStreams) { 2090 throw new IOException("archive doesn't contain enough packed streams"); 2091 } 2092 2093 nid = getUnsignedByte(header); 2094 if (nid != NID.kCodersUnpackSize) { 2095 throw new IOException("Expected kCodersUnpackSize, got " + nid); 2096 } 2097 2098 for (final int numberOfOutputStreams : numberOfOutputStreamsPerFolder) { 2099 for (int i = 0; i < numberOfOutputStreams; i++) { 2100 final long unpackSize = readUint64(header); 2101 if (unpackSize < 0) { 2102 throw new IllegalArgumentException("negative unpackSize"); 2103 } 2104 } 2105 } 2106 2107 nid = getUnsignedByte(header); 2108 if (nid == NID.kCRC) { 2109 stats.folderHasCrc = readAllOrBits(header, stats.numberOfFolders); 2110 final int crcsDefined = stats.folderHasCrc.cardinality(); 2111 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 2112 throw new IOException("invalid number of CRCs in UnpackInfo"); 2113 } 2114 nid = getUnsignedByte(header); 2115 } 2116 2117 if (nid != NID.kEnd) { 2118 throw new IOException("Badly terminated UnpackInfo"); 2119 } 2120 } 2121 2122 /** 2123 * Skips all the entries if needed. Entries need to be skipped when: 2124 * <p> 2125 * 1. it's a random access 2. one of these 2 condition is meet : 2126 * </p> 2127 * <p> 2128 * 2.1 currentEntryIndex != entryIndex : this means there are some entries to be skipped(currentEntryIndex < entryIndex) or the entry has already been 2129 * read(currentEntryIndex > entryIndex) 2130 * </p> 2131 * <p> 2132 * 2.2 currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead: if the entry to be read is the current entry, but some data of it has been read before, 2133 * then we need to reopen the stream of the folder and skip all the entries before the current entries 2134 * </p> 2135 * 2136 * @param entryIndex the entry to be read 2137 * @param isInSameFolder are the entry to be read and the current entry in the same folder 2138 * @param folderIndex the index of the folder which contains the entry 2139 * @return true if there are entries actually skipped 2140 * @throws IOException there are exceptions when skipping entries 2141 * @since 1.21 2142 */ 2143 private boolean skipEntriesWhenNeeded(final int entryIndex, final boolean isInSameFolder, final int folderIndex) throws IOException { 2144 final SevenZArchiveEntry file = archive.files[entryIndex]; 2145 // if the entry to be read is the current entry, and the entry has not 2146 // been read yet, then there's nothing we need to do 2147 if (currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead()) { 2148 return false; 2149 } 2150 2151 // 1. if currentEntryIndex < entryIndex : 2152 // this means there are some entries to be skipped(currentEntryIndex < entryIndex) 2153 // 2. if currentEntryIndex > entryIndex || (currentEntryIndex == entryIndex && hasCurrentEntryBeenRead) : 2154 // this means the entry has already been read before, and we need to reopen the 2155 // stream of the folder and skip all the entries before the current entries 2156 int filesToSkipStartIndex = archive.streamMap.folderFirstFileIndex[currentFolderIndex]; 2157 if (isInSameFolder) { 2158 if (currentEntryIndex < entryIndex) { 2159 // the entries between filesToSkipStartIndex and currentEntryIndex had already been skipped 2160 filesToSkipStartIndex = currentEntryIndex + 1; 2161 } else { 2162 // the entry is in the same folder of current entry, but it has already been read before, we need to reset 2163 // the position of the currentFolderInputStream to the beginning of folder, and then skip the files 2164 // from the start entry of the folder again 2165 reopenFolderInputStream(folderIndex, file); 2166 } 2167 } 2168 2169 for (int i = filesToSkipStartIndex; i < entryIndex; i++) { 2170 final SevenZArchiveEntry fileToSkip = archive.files[i]; 2171 InputStream fileStreamToSkip = new BoundedInputStream(currentFolderInputStream, fileToSkip.getSize()); 2172 if (fileToSkip.getHasCrc()) { 2173 fileStreamToSkip = new CRC32VerifyingInputStream(fileStreamToSkip, fileToSkip.getSize(), fileToSkip.getCrcValue()); 2174 } 2175 deferredBlockStreams.add(fileStreamToSkip); 2176 2177 // set the content methods as well, it equals to file.getContentMethods() because they are in same folder 2178 fileToSkip.setContentMethods(file.getContentMethods()); 2179 } 2180 return true; 2181 } 2182 2183 @Override 2184 public String toString() { 2185 return archive.toString(); 2186 } 2187 2188 private Archive tryToLocateEndHeader(final byte[] password) throws IOException { 2189 final ByteBuffer nidBuf = ByteBuffer.allocate(1); 2190 final long searchLimit = 1024L * 1024 * 1; 2191 // Main header, plus bytes that readStartHeader would read 2192 final long previousDataSize = channel.position() + 20; 2193 final long minPos; 2194 // Determine minimal position - can't start before current position 2195 if (channel.position() + searchLimit > channel.size()) { 2196 minPos = channel.position(); 2197 } else { 2198 minPos = channel.size() - searchLimit; 2199 } 2200 long pos = channel.size() - 1; 2201 // Loop: Try from end of archive 2202 while (pos > minPos) { 2203 pos--; 2204 channel.position(pos); 2205 nidBuf.rewind(); 2206 if (channel.read(nidBuf) < 1) { 2207 throw new EOFException(); 2208 } 2209 final int nid = nidBuf.array()[0]; 2210 // First indicator: Byte equals one of these header identifiers 2211 if (nid == NID.kEncodedHeader || nid == NID.kHeader) { 2212 try { 2213 // Try to initialize Archive structure from here 2214 final long nextHeaderOffset = pos - previousDataSize; 2215 final long nextHeaderSize = channel.size() - pos; 2216 final StartHeader startHeader = new StartHeader(nextHeaderOffset, nextHeaderSize, 0); 2217 final Archive result = initializeArchive(startHeader, password, false); 2218 // Sanity check: There must be some data... 2219 if (result.packSizes.length > 0 && result.files.length > 0) { 2220 return result; 2221 } 2222 } catch (final Exception ignore) { 2223 // Wrong guess... 2224 } 2225 } 2226 } 2227 throw new IOException("Start header corrupt and unable to guess end header"); 2228 } 2229}