001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.sevenz; 020 021import static java.nio.charset.StandardCharsets.UTF_16LE; 022 023import java.io.BufferedInputStream; 024import java.io.ByteArrayInputStream; 025import java.io.Closeable; 026import java.io.DataInputStream; 027import java.io.EOFException; 028import java.io.File; 029import java.io.FilterInputStream; 030import java.io.IOException; 031import java.io.InputStream; 032import java.nio.ByteBuffer; 033import java.nio.ByteOrder; 034import java.nio.channels.Channels; 035import java.nio.channels.SeekableByteChannel; 036import java.nio.file.Files; 037import java.nio.file.OpenOption; 038import java.nio.file.Path; 039import java.nio.file.StandardOpenOption; 040import java.util.ArrayList; 041import java.util.Arrays; 042import java.util.BitSet; 043import java.util.EnumSet; 044import java.util.LinkedHashMap; 045import java.util.LinkedList; 046import java.util.List; 047import java.util.Map; 048import java.util.Objects; 049import java.util.zip.CRC32; 050import java.util.zip.CheckedInputStream; 051 052import org.apache.commons.compress.MemoryLimitException; 053import org.apache.commons.compress.archivers.ArchiveException; 054import org.apache.commons.compress.utils.ByteUtils; 055import org.apache.commons.compress.utils.IOUtils; 056import org.apache.commons.compress.utils.InputStreamStatistics; 057import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; 058import org.apache.commons.io.build.AbstractOrigin.ByteArrayOrigin; 059import org.apache.commons.io.build.AbstractStreamBuilder; 060import org.apache.commons.io.input.BoundedInputStream; 061import org.apache.commons.io.input.ChecksumInputStream; 062import org.apache.commons.lang3.ArrayUtils; 063 064/** 065 * Reads a 7z file, using SeekableByteChannel under the covers. 066 * <p> 067 * The 7z file format is a flexible container that can contain many compression and encryption types, but at the moment only only Copy, LZMA, LZMA2, BZIP2, 068 * Deflate and AES-256 + SHA-256 are supported. 069 * </p> 070 * <p> 071 * The format is very Windows/Intel specific, so it uses little-endian byte order, doesn't store user/group or permission bits, and represents times using NTFS 072 * timestamps (100 nanosecond units since 1 January 1601). Hence the official tools recommend against using it for backup purposes on *nix, and recommend 073 * .tar.7z or .tar.lzma or .tar.xz instead. 074 * </p> 075 * <p> 076 * Both the header and file contents may be compressed and/or encrypted. With both encrypted, neither file names nor file contents can be read, but the use of 077 * encryption isn't plausibly deniable. 078 * </p> 079 * <p> 080 * Multi volume archives can be read by concatenating the parts in correct order - either manually or by using {link 081 * org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel} for example. 082 * </p> 083 * 084 * @NotThreadSafe 085 * @since 1.6 086 */ 087public class SevenZFile implements Closeable { 088 089 private static final class ArchiveStatistics { 090 private int numberOfPackedStreams; 091 private long numberOfCoders; 092 private long numberOfOutStreams; 093 private long numberOfInStreams; 094 private long numberOfUnpackSubStreams; 095 private int numberOfFolders; 096 private BitSet folderHasCrc; 097 private int numberOfEntries; 098 private int numberOfEntriesWithStream; 099 100 /** 101 * Asserts the validity of the given input. 102 * 103 * @param maxMemoryLimitKiB kibibytes (KiB) to test. 104 * @throws IOException Thrown on basic assertion failure. 105 */ 106 void assertValidity(final int maxMemoryLimitKiB) throws IOException { 107 if (numberOfEntriesWithStream > 0 && numberOfFolders == 0) { 108 throw new IOException("archive with entries but no folders"); 109 } 110 if (numberOfEntriesWithStream > numberOfUnpackSubStreams) { 111 throw new IOException("archive doesn't contain enough substreams for entries"); 112 } 113 114 final long memoryNeededInKiB = estimateSize() / 1024; 115 if (maxMemoryLimitKiB < memoryNeededInKiB) { 116 throw new MemoryLimitException(memoryNeededInKiB, maxMemoryLimitKiB); 117 } 118 } 119 120 private long bindPairSize() { 121 return 16; 122 } 123 124 /** 125 * Gets a size estimate in bytes. 126 * 127 * @return a size estimate in bytes. 128 */ 129 private long coderSize() { 130 return 2 /* methodId is between 1 and four bytes currently, COPY and LZMA2 are the most common with 1 */ 131 + 16 + 4 /* properties, guess */ 132 ; 133 } 134 135 /** 136 * Gets a size estimate in bytes. 137 * 138 * @return a size estimate in bytes. 139 */ 140 private long entrySize() { 141 return 100; /* real size depends on name length, everything without name is about 70 bytes */ 142 } 143 144 /** 145 * Gets a size estimate in bytes. 146 * 147 * @return a size estimate in bytes. 148 */ 149 long estimateSize() { 150 final long lowerBound = 16L * numberOfPackedStreams /* packSizes, packCrcs in Archive */ 151 + numberOfPackedStreams / 8 /* packCrcsDefined in Archive */ 152 + numberOfFolders * folderSize() /* folders in Archive */ 153 + numberOfCoders * coderSize() /* coders in Folder */ 154 + (numberOfOutStreams - numberOfFolders) * bindPairSize() /* bindPairs in Folder */ 155 + 8L * (numberOfInStreams - numberOfOutStreams + numberOfFolders) /* packedStreams in Folder */ 156 + 8L * numberOfOutStreams /* unpackSizes in Folder */ 157 + numberOfEntries * entrySize() /* files in Archive */ 158 + streamMapSize(); 159 return 2 * lowerBound /* conservative guess */; 160 } 161 162 private long folderSize() { 163 return 30; /* nested arrays are accounted for separately */ 164 } 165 166 private long streamMapSize() { 167 return 8 * numberOfFolders /* folderFirstPackStreamIndex, folderFirstFileIndex */ 168 + 8 * numberOfPackedStreams /* packStreamOffsets */ 169 + 4 * numberOfEntries /* fileFolderIndex */ 170 ; 171 } 172 173 @Override 174 public String toString() { 175 return String.format("Archive with %,d entries in %,d folders, estimated size %,d KiB.", numberOfEntries, numberOfFolders, estimateSize() / 1024L); 176 } 177 } 178 179 /** 180 * Builds new instances of {@link SevenZFile}. 181 * 182 * @since 1.26.0 183 */ 184 public static class Builder extends AbstractStreamBuilder<SevenZFile, Builder> { 185 186 static final int MEMORY_LIMIT_IN_KB = Integer.MAX_VALUE; 187 static final boolean USE_DEFAULTNAME_FOR_UNNAMED_ENTRIES = false; 188 static final boolean TRY_TO_RECOVER_BROKEN_ARCHIVES = false; 189 190 private SeekableByteChannel seekableByteChannel; 191 private String defaultName = DEFAULT_FILE_NAME; 192 private byte[] password; 193 private int maxMemoryLimitKiB = MEMORY_LIMIT_IN_KB; 194 private boolean useDefaultNameForUnnamedEntries = USE_DEFAULTNAME_FOR_UNNAMED_ENTRIES; 195 private boolean tryToRecoverBrokenArchives = TRY_TO_RECOVER_BROKEN_ARCHIVES; 196 197 @SuppressWarnings("resource") // Caller closes 198 @Override 199 public SevenZFile get() throws IOException { 200 final SeekableByteChannel actualChannel; 201 final String actualDescription; 202 if (seekableByteChannel != null) { 203 actualChannel = seekableByteChannel; 204 actualDescription = defaultName; 205 } else if (checkOrigin() instanceof ByteArrayOrigin) { 206 actualChannel = new SeekableInMemoryByteChannel(checkOrigin().getByteArray()); 207 actualDescription = defaultName; 208 } else { 209 OpenOption[] openOptions = getOpenOptions(); 210 if (ArrayUtils.isEmpty(openOptions)) { 211 openOptions = new OpenOption[] { StandardOpenOption.READ }; 212 } 213 final Path path = getPath(); 214 actualChannel = Files.newByteChannel(path, openOptions); 215 actualDescription = path.toAbsolutePath().toString(); 216 } 217 final boolean closeOnError = seekableByteChannel != null; 218 return new SevenZFile(actualChannel, actualDescription, password, closeOnError, maxMemoryLimitKiB, useDefaultNameForUnnamedEntries, 219 tryToRecoverBrokenArchives); 220 } 221 222 /** 223 * Sets the default name. 224 * 225 * @param defaultName the default name. 226 * @return {@code this} instance. 227 */ 228 public Builder setDefaultName(final String defaultName) { 229 this.defaultName = defaultName; 230 return this; 231 } 232 233 /** 234 * Sets the maximum amount of memory in kilobytes to use for parsing the archive and during extraction. 235 * <p> 236 * Not all codecs honor this setting. Currently only LZMA and LZMA2 are supported. 237 * </p> 238 * 239 * @param maxMemoryLimitKiB the max memory limit in kilobytes. 240 * @return {@code this} instance. 241 */ 242 public Builder setMaxMemoryLimitKb(final int maxMemoryLimitKiB) { 243 this.maxMemoryLimitKiB = maxMemoryLimitKiB / 1024; 244 return this; 245 } 246 247 /** 248 * Sets the maximum amount of memory in kilobytes to use for parsing the archive and during extraction. 249 * <p> 250 * Not all codecs honor this setting. Currently only LZMA and LZMA2 are supported. 251 * </p> 252 * 253 * @param maxMemoryLimitKiB the max memory limit in kibibytes. 254 * @return {@code this} instance. 255 * @since 1.28.0 256 */ 257 public Builder setMaxMemoryLimitKiB(final int maxMemoryLimitKiB) { 258 this.maxMemoryLimitKiB = maxMemoryLimitKiB; 259 return this; 260 } 261 262 /** 263 * Sets the password. 264 * 265 * @param password the password. 266 * @return {@code this} instance. 267 */ 268 public Builder setPassword(final byte[] password) { 269 this.password = password != null ? password.clone() : null; 270 return this; 271 } 272 273 /** 274 * Sets the password. 275 * 276 * @param password the password. 277 * @return {@code this} instance. 278 */ 279 public Builder setPassword(final char[] password) { 280 this.password = password != null ? AES256SHA256Decoder.utf16Decode(password.clone()) : null; 281 return this; 282 } 283 284 /** 285 * Sets the password. 286 * 287 * @param password the password. 288 * @return {@code this} instance. 289 */ 290 public Builder setPassword(final String password) { 291 this.password = password != null ? AES256SHA256Decoder.utf16Decode(password.toCharArray()) : null; 292 return this; 293 } 294 295 /** 296 * Sets the input channel. 297 * 298 * @param seekableByteChannel the input channel. 299 * @return {@code this} instance. 300 */ 301 public Builder setSeekableByteChannel(final SeekableByteChannel seekableByteChannel) { 302 this.seekableByteChannel = seekableByteChannel; 303 return this; 304 } 305 306 /** 307 * Sets whether {@link SevenZFile} will try to recover broken archives where the CRC of the file's metadata is 0. 308 * <p> 309 * This special kind of broken archive is encountered when mutli volume archives are closed prematurely. If you enable this option SevenZFile will trust 310 * data that looks as if it could contain metadata of an archive and allocate big amounts of memory. It is strongly recommended to not enable this 311 * option without setting {@link #setMaxMemoryLimitKb(int)} at the same time. 312 * </p> 313 * 314 * @param tryToRecoverBrokenArchives whether {@link SevenZFile} will try to recover broken archives where the CRC of the file's metadata is 0. 315 * @return {@code this} instance. 316 */ 317 public Builder setTryToRecoverBrokenArchives(final boolean tryToRecoverBrokenArchives) { 318 this.tryToRecoverBrokenArchives = tryToRecoverBrokenArchives; 319 return this; 320 } 321 322 /** 323 * Sets whether entries without a name should get their names set to the archive's default file name. 324 * 325 * @param useDefaultNameForUnnamedEntries whether entries without a name should get their names set to the archive's default file name. 326 * @return {@code this} instance. 327 */ 328 public Builder setUseDefaultNameForUnnamedEntries(final boolean useDefaultNameForUnnamedEntries) { 329 this.useDefaultNameForUnnamedEntries = useDefaultNameForUnnamedEntries; 330 return this; 331 } 332 333 } 334 335 static final int SIGNATURE_HEADER_SIZE = 32; 336 337 private static final String DEFAULT_FILE_NAME = "unknown archive"; 338 339 /** Shared with SevenZOutputFile and tests, neither mutates it. */ 340 static final byte[] sevenZSignature = { // NOSONAR 341 (byte) '7', (byte) 'z', (byte) 0xBC, (byte) 0xAF, (byte) 0x27, (byte) 0x1C }; 342 343 private static int assertFitsIntoNonNegativeInt(final String what, final long value) throws IOException { 344 if (value > Integer.MAX_VALUE || value < 0) { 345 throw new IOException(String.format("Cannot handle % %,d", what, value)); 346 } 347 return (int) value; 348 } 349 350 /** 351 * Creates a new Builder. 352 * 353 * @return a new Builder. 354 * @since 1.26.0 355 */ 356 public static Builder builder() { 357 return new Builder(); 358 } 359 360 private static ByteBuffer checkEndOfFile(final ByteBuffer buf, final int expectRemaining) throws EOFException { 361 final int remaining = buf.remaining(); 362 if (remaining < expectRemaining) { 363 throw new EOFException(String.format("remaining %,d < expectRemaining %,d", remaining, expectRemaining)); 364 } 365 return buf; 366 } 367 368 private static void get(final ByteBuffer buf, final byte[] to) throws EOFException { 369 checkEndOfFile(buf, to.length).get(to); 370 } 371 372 private static char getChar(final ByteBuffer buf) throws EOFException { 373 return checkEndOfFile(buf, Character.BYTES).getChar(); 374 } 375 376 private static int getInt(final ByteBuffer buf) throws EOFException { 377 return checkEndOfFile(buf, Integer.BYTES).getInt(); 378 } 379 380 private static long getLong(final ByteBuffer buf) throws EOFException { 381 return checkEndOfFile(buf, Long.BYTES).getLong(); 382 } 383 384 private static int getUnsignedByte(final ByteBuffer buf) throws EOFException { 385 if (!buf.hasRemaining()) { 386 throw new EOFException(); 387 } 388 return buf.get() & 0xff; 389 } 390 391 /** 392 * Checks if the signature matches what is expected for a 7z file. 393 * 394 * @param signature the bytes to check 395 * @param length the number of bytes to check 396 * @return true, if this is the signature of a 7z archive. 397 * @since 1.8 398 */ 399 public static boolean matches(final byte[] signature, final int length) { 400 if (length < sevenZSignature.length) { 401 return false; 402 } 403 for (int i = 0; i < sevenZSignature.length; i++) { 404 if (signature[i] != sevenZSignature[i]) { 405 return false; 406 } 407 } 408 return true; 409 } 410 411 private static SeekableByteChannel newByteChannel(final File file) throws IOException { 412 return Files.newByteChannel(file.toPath(), EnumSet.of(StandardOpenOption.READ)); 413 } 414 415 private static long readUint64(final ByteBuffer in) throws IOException { 416 // long rather than int as it might get shifted beyond the range of an int 417 final long firstByte = getUnsignedByte(in); 418 int mask = 0x80; 419 long value = 0; 420 for (int i = 0; i < 8; i++) { 421 if ((firstByte & mask) == 0) { 422 return value | (firstByte & mask - 1) << 8 * i; 423 } 424 final long nextByte = getUnsignedByte(in); 425 value |= nextByte << 8 * i; 426 mask >>>= 1; 427 } 428 return value; 429 } 430 431 private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) { 432 if (bytesToSkip < 1) { 433 return 0; 434 } 435 final int current = input.position(); 436 final int maxSkip = input.remaining(); 437 if (maxSkip < bytesToSkip) { 438 bytesToSkip = maxSkip; 439 } 440 input.position(current + (int) bytesToSkip); 441 return bytesToSkip; 442 } 443 444 private final String fileName; 445 private SeekableByteChannel channel; 446 private final Archive archive; 447 private int currentEntryIndex = -1; 448 private int currentFolderIndex = -1; 449 private InputStream currentFolderInputStream; 450 private byte[] password; 451 private long compressedBytesReadFromCurrentEntry; 452 private long uncompressedBytesReadFromCurrentEntry; 453 private final ArrayList<InputStream> deferredBlockStreams = new ArrayList<>(); 454 private final int maxMemoryLimitKiB; 455 private final boolean useDefaultNameForUnnamedEntries; 456 457 private final boolean tryToRecoverBrokenArchives; 458 459 /** 460 * Reads a file as unencrypted 7z archive. 461 * 462 * @param fileName the file to read. 463 * @throws IOException if reading the archive fails. 464 * @deprecated Use {@link Builder#get()}. 465 */ 466 @Deprecated 467 public SevenZFile(final File fileName) throws IOException { 468 this(fileName, SevenZFileOptions.DEFAULT); 469 } 470 471 /** 472 * Reads a file as 7z archive 473 * 474 * @param file the file to read 475 * @param password optional password if the archive is encrypted - the byte array is supposed to be the UTF16-LE encoded representation of the password. 476 * @throws IOException if reading the archive fails 477 * @deprecated Use {@link Builder#get()}. 478 */ 479 @SuppressWarnings("resource") // caller closes 480 @Deprecated 481 public SevenZFile(final File file, final byte[] password) throws IOException { 482 this(newByteChannel(file), file.getAbsolutePath(), password, true, SevenZFileOptions.DEFAULT); 483 } 484 485 /** 486 * Reads a file as 7z archive 487 * 488 * @param file the file to read 489 * @param password optional password if the archive is encrypted 490 * @throws IOException if reading the archive fails 491 * @since 1.17 492 * @deprecated Use {@link Builder#get()}. 493 */ 494 @Deprecated 495 public SevenZFile(final File file, final char[] password) throws IOException { 496 this(file, password, SevenZFileOptions.DEFAULT); 497 } 498 499 /** 500 * Reads a file as 7z archive with additional options. 501 * 502 * @param file the file to read 503 * @param password optional password if the archive is encrypted 504 * @param options the options to apply 505 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 506 * @since 1.19 507 * @deprecated Use {@link Builder#get()}. 508 */ 509 @SuppressWarnings("resource") // caller closes 510 @Deprecated 511 public SevenZFile(final File file, final char[] password, final SevenZFileOptions options) throws IOException { 512 this(newByteChannel(file), // NOSONAR 513 file.getAbsolutePath(), AES256SHA256Decoder.utf16Decode(password), true, options); 514 } 515 516 /** 517 * Reads a file as unencrypted 7z archive 518 * 519 * @param file the file to read 520 * @param options the options to apply 521 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 522 * @since 1.19 523 * @deprecated Use {@link Builder#get()}. 524 */ 525 @Deprecated 526 public SevenZFile(final File file, final SevenZFileOptions options) throws IOException { 527 this(file, null, options); 528 } 529 530 /** 531 * Reads a SeekableByteChannel as 7z archive 532 * <p> 533 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 534 * </p> 535 * 536 * @param channel the channel to read 537 * @throws IOException if reading the archive fails 538 * @since 1.13 539 * @deprecated Use {@link Builder#get()}. 540 */ 541 @Deprecated 542 public SevenZFile(final SeekableByteChannel channel) throws IOException { 543 this(channel, SevenZFileOptions.DEFAULT); 544 } 545 546 /** 547 * Reads a SeekableByteChannel as 7z archive 548 * <p> 549 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 550 * </p> 551 * 552 * @param channel the channel to read 553 * @param password optional password if the archive is encrypted - the byte array is supposed to be the UTF16-LE encoded representation of the password. 554 * @throws IOException if reading the archive fails 555 * @since 1.13 556 * @deprecated Use {@link Builder#get()}. 557 */ 558 @Deprecated 559 public SevenZFile(final SeekableByteChannel channel, final byte[] password) throws IOException { 560 this(channel, DEFAULT_FILE_NAME, password); 561 } 562 563 /** 564 * Reads a SeekableByteChannel as 7z archive 565 * <p> 566 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 567 * </p> 568 * 569 * @param channel the channel to read 570 * @param password optional password if the archive is encrypted 571 * @throws IOException if reading the archive fails 572 * @since 1.17 573 * @deprecated Use {@link Builder#get()}. 574 */ 575 @Deprecated 576 public SevenZFile(final SeekableByteChannel channel, final char[] password) throws IOException { 577 this(channel, password, SevenZFileOptions.DEFAULT); 578 } 579 580 /** 581 * Reads a SeekableByteChannel as 7z archive with additional options. 582 * <p> 583 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 584 * </p> 585 * 586 * @param channel the channel to read 587 * @param password optional password if the archive is encrypted 588 * @param options the options to apply 589 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 590 * @since 1.19 591 * @deprecated Use {@link Builder#get()}. 592 */ 593 @Deprecated 594 public SevenZFile(final SeekableByteChannel channel, final char[] password, final SevenZFileOptions options) throws IOException { 595 this(channel, DEFAULT_FILE_NAME, password, options); 596 } 597 598 /** 599 * Reads a SeekableByteChannel as 7z archive with additional options. 600 * <p> 601 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 602 * </p> 603 * 604 * @param channel the channel to read 605 * @param options the options to apply 606 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 607 * @since 1.19 608 * @deprecated Use {@link Builder#get()}. 609 */ 610 @Deprecated 611 public SevenZFile(final SeekableByteChannel channel, final SevenZFileOptions options) throws IOException { 612 this(channel, DEFAULT_FILE_NAME, null, options); 613 } 614 615 /** 616 * Reads a SeekableByteChannel as 7z archive 617 * <p> 618 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 619 * </p> 620 * 621 * @param channel the channel to read 622 * @param fileName name of the archive - only used for error reporting 623 * @throws IOException if reading the archive fails 624 * @since 1.17 625 * @deprecated Use {@link Builder#get()}. 626 */ 627 @Deprecated 628 public SevenZFile(final SeekableByteChannel channel, final String fileName) throws IOException { 629 this(channel, fileName, SevenZFileOptions.DEFAULT); 630 } 631 632 /** 633 * Reads a SeekableByteChannel as 7z archive 634 * <p> 635 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 636 * </p> 637 * 638 * @param channel the channel to read 639 * @param fileName name of the archive - only used for error reporting 640 * @param password optional password if the archive is encrypted - the byte array is supposed to be the UTF16-LE encoded representation of the password. 641 * @throws IOException if reading the archive fails 642 * @since 1.13 643 * @deprecated Use {@link Builder#get()}. 644 */ 645 @Deprecated 646 public SevenZFile(final SeekableByteChannel channel, final String fileName, final byte[] password) throws IOException { 647 this(channel, fileName, password, false, SevenZFileOptions.DEFAULT); 648 } 649 650 private SevenZFile(final SeekableByteChannel channel, final String fileName, final byte[] password, final boolean closeOnError, final int maxMemoryLimitKiB, 651 final boolean useDefaultNameForUnnamedEntries, final boolean tryToRecoverBrokenArchives) throws IOException { 652 boolean succeeded = false; 653 this.channel = channel; 654 this.fileName = fileName; 655 this.maxMemoryLimitKiB = maxMemoryLimitKiB; 656 this.useDefaultNameForUnnamedEntries = useDefaultNameForUnnamedEntries; 657 this.tryToRecoverBrokenArchives = tryToRecoverBrokenArchives; 658 try { 659 archive = readHeaders(password); 660 if (password != null) { 661 this.password = Arrays.copyOf(password, password.length); 662 } else { 663 this.password = null; 664 } 665 succeeded = true; 666 } finally { 667 if (!succeeded && closeOnError) { 668 this.channel.close(); 669 } 670 } 671 } 672 673 /** 674 * Constructs a new instance. 675 * 676 * @param channel the channel to read. 677 * @param fileName name of the archive - only used for error reporting. 678 * @param password optional password if the archive is encrypted. 679 * @param closeOnError closes the channel on error. 680 * @param options options. 681 * @throws IOException if reading the archive fails 682 * @deprecated Use {@link Builder#get()}. 683 */ 684 @Deprecated 685 private SevenZFile(final SeekableByteChannel channel, final String fileName, final byte[] password, final boolean closeOnError, 686 final SevenZFileOptions options) throws IOException { 687 this(channel, fileName, password, closeOnError, options.getMaxMemoryLimitInKb(), options.getUseDefaultNameForUnnamedEntries(), 688 options.getTryToRecoverBrokenArchives()); 689 } 690 691 /** 692 * Reads a SeekableByteChannel as 7z archive 693 * <p> 694 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 695 * </p> 696 * 697 * @param channel the channel to read 698 * @param fileName name of the archive - only used for error reporting 699 * @param password optional password if the archive is encrypted 700 * @throws IOException if reading the archive fails 701 * @since 1.17 702 * @deprecated Use {@link Builder#get()}. 703 */ 704 @Deprecated 705 public SevenZFile(final SeekableByteChannel channel, final String fileName, final char[] password) throws IOException { 706 this(channel, fileName, password, SevenZFileOptions.DEFAULT); 707 } 708 709 /** 710 * Reads a SeekableByteChannel as 7z archive with additional options. 711 * <p> 712 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 713 * </p> 714 * 715 * @param channel the channel to read 716 * @param fileName name of the archive - only used for error reporting 717 * @param password optional password if the archive is encrypted 718 * @param options the options to apply 719 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 720 * @since 1.19 721 * @deprecated Use {@link Builder#get()}. 722 */ 723 @Deprecated 724 public SevenZFile(final SeekableByteChannel channel, final String fileName, final char[] password, final SevenZFileOptions options) throws IOException { 725 this(channel, fileName, AES256SHA256Decoder.utf16Decode(password), false, options); 726 } 727 728 /** 729 * Reads a SeekableByteChannel as 7z archive with additional options. 730 * <p> 731 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 732 * </p> 733 * 734 * @param channel the channel to read 735 * @param fileName name of the archive - only used for error reporting 736 * @param options the options to apply 737 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 738 * @since 1.19 739 * @deprecated Use {@link Builder#get()}. 740 */ 741 @Deprecated 742 public SevenZFile(final SeekableByteChannel channel, final String fileName, final SevenZFileOptions options) throws IOException { 743 this(channel, fileName, null, false, options); 744 } 745 746 private InputStream buildDecoderStack(final Folder folder, final long folderOffset, final int firstPackStreamIndex, final SevenZArchiveEntry entry) 747 throws IOException { 748 channel.position(folderOffset); 749 InputStream inputStreamStack = new FilterInputStream( 750 new BufferedInputStream(new BoundedSeekableByteChannelInputStream(channel, archive.packSizes[firstPackStreamIndex]))) { 751 private void count(final int c) { 752 compressedBytesReadFromCurrentEntry += c; 753 } 754 755 @Override 756 public int read() throws IOException { 757 final int r = in.read(); 758 if (r >= 0) { 759 count(1); 760 } 761 return r; 762 } 763 764 @Override 765 public int read(final byte[] b) throws IOException { 766 return read(b, 0, b.length); 767 } 768 769 @Override 770 public int read(final byte[] b, final int off, final int len) throws IOException { 771 if (len == 0) { 772 return 0; 773 } 774 final int r = in.read(b, off, len); 775 if (r >= 0) { 776 count(r); 777 } 778 return r; 779 } 780 }; 781 final LinkedList<SevenZMethodConfiguration> methods = new LinkedList<>(); 782 for (final Coder coder : folder.getOrderedCoders()) { 783 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 784 throw new IOException("Multi input/output stream coders are not yet supported"); 785 } 786 final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId); 787 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, folder.getUnpackSizeForCoder(coder), coder, password, maxMemoryLimitKiB); 788 methods.addFirst(new SevenZMethodConfiguration(method, Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack))); 789 } 790 entry.setContentMethods(methods); 791 if (folder.hasCrc) { 792 // @formatter:off 793 return ChecksumInputStream.builder() 794 .setChecksum(new CRC32()) 795 .setInputStream(inputStreamStack) 796 .setCountThreshold(folder.getUnpackSize()) 797 .setExpectedChecksumValue(folder.crc) 798 .get(); 799 // @formatter:on 800 } 801 return inputStreamStack; 802 } 803 804 /** 805 * Builds the decoding stream for the entry to be read. This method may be called from a random access(getInputStream) or sequential access(getNextEntry). 806 * If this method is called from a random access, some entries may need to be skipped(we put them to the deferredBlockStreams and skip them when actually 807 * needed to improve the performance) 808 * 809 * @param entryIndex the index of the entry to be read 810 * @param isRandomAccess is this called in a random access 811 * @throws IOException if there are exceptions when reading the file 812 */ 813 private void buildDecodingStream(final int entryIndex, final boolean isRandomAccess) throws IOException { 814 if (archive.streamMap == null) { 815 throw new IOException("Archive doesn't contain stream information to read entries"); 816 } 817 final int folderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 818 if (folderIndex < 0) { 819 deferredBlockStreams.clear(); 820 // TODO: previously it'd return an empty stream? 821 // new BoundedInputStream(new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY), 0); 822 return; 823 } 824 final SevenZArchiveEntry file = archive.files[entryIndex]; 825 boolean isInSameFolder = false; 826 if (currentFolderIndex == folderIndex) { 827 // (COMPRESS-320). 828 // The current entry is within the same (potentially opened) folder. The 829 // previous stream has to be fully decoded before we can start reading 830 // but don't do it eagerly -- if the user skips over the entire folder nothing 831 // is effectively decompressed. 832 if (entryIndex > 0) { 833 file.setContentMethods(archive.files[entryIndex - 1].getContentMethods()); 834 } 835 836 // if this is called in a random access, then the content methods of previous entry may be null 837 // the content methods should be set to methods of the first entry as it must not be null, 838 // and the content methods would only be set if the content methods was not set 839 if (isRandomAccess && file.getContentMethods() == null) { 840 final int folderFirstFileIndex = archive.streamMap.folderFirstFileIndex[folderIndex]; 841 final SevenZArchiveEntry folderFirstFile = archive.files[folderFirstFileIndex]; 842 file.setContentMethods(folderFirstFile.getContentMethods()); 843 } 844 isInSameFolder = true; 845 } else { 846 currentFolderIndex = folderIndex; 847 // We're opening a new folder. Discard any queued streams/ folder stream. 848 reopenFolderInputStream(folderIndex, file); 849 } 850 851 boolean haveSkippedEntries = false; 852 if (isRandomAccess) { 853 // entries will only need to be skipped if it's a random access 854 haveSkippedEntries = skipEntriesWhenNeeded(entryIndex, isInSameFolder, folderIndex); 855 } 856 857 if (isRandomAccess && currentEntryIndex == entryIndex && !haveSkippedEntries) { 858 // we don't need to add another entry to the deferredBlockStreams when : 859 // 1. If this method is called in a random access and the entry index 860 // to be read equals to the current entry index, the input stream 861 // has already been put in the deferredBlockStreams 862 // 2. If this entry has not been read(which means no entries are skipped) 863 return; 864 } 865 866 InputStream fileStream = BoundedInputStream.builder() 867 .setInputStream(currentFolderInputStream) 868 .setMaxCount(file.getSize()) 869 .setPropagateClose(false) 870 .get(); 871 if (file.getHasCrc()) { 872 // @formatter:off 873 fileStream = ChecksumInputStream.builder() 874 .setChecksum(new CRC32()) 875 .setInputStream(fileStream) 876 .setExpectedChecksumValue(file.getCrcValue()) 877 .get(); 878 // @formatter:on 879 } 880 881 deferredBlockStreams.add(fileStream); 882 } 883 884 private void calculateStreamMap(final Archive archive) throws IOException { 885 int nextFolderPackStreamIndex = 0; 886 final int numFolders = ArrayUtils.getLength(archive.folders); 887 final int[] folderFirstPackStreamIndex = new int[numFolders]; 888 for (int i = 0; i < numFolders; i++) { 889 folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex; 890 nextFolderPackStreamIndex += archive.folders[i].packedStreams.length; 891 } 892 long nextPackStreamOffset = 0; 893 final int numPackSizes = archive.packSizes.length; 894 final long[] packStreamOffsets = new long[numPackSizes]; 895 for (int i = 0; i < numPackSizes; i++) { 896 packStreamOffsets[i] = nextPackStreamOffset; 897 nextPackStreamOffset += archive.packSizes[i]; 898 } 899 final int[] folderFirstFileIndex = new int[numFolders]; 900 final int[] fileFolderIndex = new int[archive.files.length]; 901 int nextFolderIndex = 0; 902 int nextFolderUnpackStreamIndex = 0; 903 for (int i = 0; i < archive.files.length; i++) { 904 if (archive.files[i].isEmptyStream() && nextFolderUnpackStreamIndex == 0) { 905 fileFolderIndex[i] = -1; 906 continue; 907 } 908 if (nextFolderUnpackStreamIndex == 0) { 909 for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) { 910 folderFirstFileIndex[nextFolderIndex] = i; 911 if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) { 912 break; 913 } 914 } 915 if (nextFolderIndex >= archive.folders.length) { 916 throw new IOException("Too few folders in archive"); 917 } 918 } 919 fileFolderIndex[i] = nextFolderIndex; 920 if (archive.files[i].isEmptyStream()) { 921 continue; 922 } 923 ++nextFolderUnpackStreamIndex; 924 if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) { 925 ++nextFolderIndex; 926 nextFolderUnpackStreamIndex = 0; 927 } 928 } 929 archive.streamMap = new StreamMap(folderFirstPackStreamIndex, packStreamOffsets, folderFirstFileIndex, fileFolderIndex); 930 } 931 932 private void checkEntryIsInitialized(final Map<Integer, SevenZArchiveEntry> archiveEntries, final int index) { 933 archiveEntries.computeIfAbsent(index, i -> new SevenZArchiveEntry()); 934 } 935 936 /** 937 * Closes the archive. 938 * 939 * @throws IOException if closing the file fails 940 */ 941 @Override 942 public void close() throws IOException { 943 if (channel != null) { 944 try { 945 channel.close(); 946 } finally { 947 channel = null; 948 if (password != null) { 949 Arrays.fill(password, (byte) 0); 950 } 951 password = null; 952 } 953 } 954 } 955 956 private InputStream getCurrentStream() throws IOException { 957 if (archive.files[currentEntryIndex].getSize() == 0) { 958 return new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY); 959 } 960 if (deferredBlockStreams.isEmpty()) { 961 throw new IllegalStateException("No current 7z entry (call getNextEntry() first)."); 962 } 963 while (deferredBlockStreams.size() > 1) { 964 // In solid compression mode we need to decompress all leading folder' 965 // streams to get access to an entry. We defer this until really needed 966 // so that entire blocks can be skipped without wasting time for decompression. 967 try (InputStream stream = deferredBlockStreams.remove(0)) { 968 org.apache.commons.io.IOUtils.skip(stream, Long.MAX_VALUE, org.apache.commons.io.IOUtils::byteArray); 969 } 970 compressedBytesReadFromCurrentEntry = 0; 971 } 972 return deferredBlockStreams.get(0); 973 } 974 975 /** 976 * Gets a default file name from the archive name - if known. 977 * <p> 978 * This implements the same heuristics the 7z tools use. In 7z's case if an archive contains entries without a name - i.e. 979 * {@link SevenZArchiveEntry#getName} returns {@code null} - then its command line and GUI tools will use this default name when extracting the entries. 980 * </p> 981 * 982 * @return null if the name of the archive is unknown. Otherwise, if the name of the archive has got any extension, it is stripped and the remainder 983 * returned. Finally, if the name of the archive hasn't got any extension, then a {@code ~} character is appended to the archive name. 984 * @since 1.19 985 */ 986 public String getDefaultName() { 987 if (DEFAULT_FILE_NAME.equals(fileName) || fileName == null) { 988 return null; 989 } 990 991 final String lastSegment = new File(fileName).getName(); 992 final int dotPos = lastSegment.lastIndexOf("."); 993 if (dotPos > 0) { // if the file starts with a dot then this is not an extension 994 return lastSegment.substring(0, dotPos); 995 } 996 return lastSegment + "~"; 997 } 998 999 /** 1000 * Gets a copy of meta-data of all archive entries. 1001 * <p> 1002 * This method only provides meta-data, the entries cannot be used to read the contents, you still need to process all entries in order using 1003 * {@link #getNextEntry} for that. 1004 * </p> 1005 * <p> 1006 * The content methods are only available for entries that have already been reached via {@link #getNextEntry}. 1007 * </p> 1008 * 1009 * @return a copy of meta-data of all archive entries. 1010 * @since 1.11 1011 */ 1012 public Iterable<SevenZArchiveEntry> getEntries() { 1013 return new ArrayList<>(Arrays.asList(archive.files)); 1014 } 1015 1016 /** 1017 * Gets an InputStream for reading the contents of the given entry. 1018 * <p> 1019 * For archives using solid compression randomly accessing entries will be significantly slower than reading the archive sequentially. 1020 * </p> 1021 * 1022 * @param entry the entry to get the stream for. 1023 * @return a stream to read the entry from. 1024 * @throws IOException if unable to create an input stream from the entry 1025 * @since 1.20 1026 */ 1027 public InputStream getInputStream(final SevenZArchiveEntry entry) throws IOException { 1028 int entryIndex = -1; 1029 for (int i = 0; i < archive.files.length; i++) { 1030 if (entry == archive.files[i]) { 1031 entryIndex = i; 1032 break; 1033 } 1034 } 1035 1036 if (entryIndex < 0) { 1037 throw new IllegalArgumentException("Can not find " + entry.getName() + " in " + fileName); 1038 } 1039 1040 buildDecodingStream(entryIndex, true); 1041 currentEntryIndex = entryIndex; 1042 currentFolderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 1043 return getCurrentStream(); 1044 } 1045 1046 /** 1047 * Gets the next Archive Entry in this archive. 1048 * 1049 * @return the next entry, or {@code null} if there are no more entries 1050 * @throws IOException if the next entry could not be read 1051 */ 1052 public SevenZArchiveEntry getNextEntry() throws IOException { 1053 if (currentEntryIndex >= archive.files.length - 1) { 1054 return null; 1055 } 1056 ++currentEntryIndex; 1057 final SevenZArchiveEntry entry = archive.files[currentEntryIndex]; 1058 if (entry.getName() == null && useDefaultNameForUnnamedEntries) { 1059 entry.setName(getDefaultName()); 1060 } 1061 buildDecodingStream(currentEntryIndex, false); 1062 uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0; 1063 return entry; 1064 } 1065 1066 /** 1067 * Gets statistics for bytes read from the current entry. 1068 * 1069 * @return statistics for bytes read from the current entry 1070 * @since 1.17 1071 */ 1072 public InputStreamStatistics getStatisticsForCurrentEntry() { 1073 return new InputStreamStatistics() { 1074 @Override 1075 public long getCompressedCount() { 1076 return compressedBytesReadFromCurrentEntry; 1077 } 1078 1079 @Override 1080 public long getUncompressedCount() { 1081 return uncompressedBytesReadFromCurrentEntry; 1082 } 1083 }; 1084 } 1085 1086 /** 1087 * Tests if any data of current entry has been read or not. This is achieved by comparing the bytes remaining to read and the size of the file. 1088 * 1089 * @return true if any data of current entry has been read 1090 * @since 1.21 1091 */ 1092 private boolean hasCurrentEntryBeenRead() { 1093 boolean hasCurrentEntryBeenRead = false; 1094 if (!deferredBlockStreams.isEmpty()) { 1095 final InputStream currentEntryInputStream = deferredBlockStreams.get(deferredBlockStreams.size() - 1); 1096 // get the bytes remaining to read, and compare it with the size of 1097 // the file to figure out if the file has been read 1098 if (currentEntryInputStream instanceof ChecksumInputStream) { 1099 hasCurrentEntryBeenRead = ((ChecksumInputStream) currentEntryInputStream).getRemaining() != archive.files[currentEntryIndex].getSize(); 1100 } else if (currentEntryInputStream instanceof BoundedInputStream) { 1101 hasCurrentEntryBeenRead = ((BoundedInputStream) currentEntryInputStream).getRemaining() != archive.files[currentEntryIndex].getSize(); 1102 } 1103 } 1104 return hasCurrentEntryBeenRead; 1105 } 1106 1107 private Archive initializeArchive(final StartHeader startHeader, final byte[] password, final boolean verifyCrc) throws IOException { 1108 assertFitsIntoNonNegativeInt("nextHeaderSize", startHeader.nextHeaderSize); 1109 final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize; 1110 channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); 1111 if (verifyCrc) { 1112 final long position = channel.position(); 1113 final CheckedInputStream cis = new CheckedInputStream(Channels.newInputStream(channel), new CRC32()); 1114 if (cis.skip(nextHeaderSizeInt) != nextHeaderSizeInt) { 1115 throw new IOException("Problem computing NextHeader CRC-32"); 1116 } 1117 if (startHeader.nextHeaderCrc != cis.getChecksum().getValue()) { 1118 throw new IOException("NextHeader CRC-32 mismatch"); 1119 } 1120 channel.position(position); 1121 } 1122 Archive archive = new Archive(); 1123 ByteBuffer buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN); 1124 readFully(buf); 1125 int nid = getUnsignedByte(buf); 1126 if (nid == NID.kEncodedHeader) { 1127 buf = readEncodedHeader(buf, archive, password); 1128 // Archive gets rebuilt with the new header 1129 archive = new Archive(); 1130 nid = getUnsignedByte(buf); 1131 } 1132 if (nid != NID.kHeader) { 1133 throw new IOException("Broken or unsupported archive: no Header"); 1134 } 1135 readHeader(buf, archive); 1136 archive.subStreamsInfo = null; 1137 return archive; 1138 } 1139 1140 /** 1141 * Reads a byte of data. 1142 * 1143 * @return the byte read, or -1 if end of input is reached 1144 * @throws IOException if an I/O error has occurred 1145 */ 1146 public int read() throws IOException { 1147 @SuppressWarnings("resource") // does not allocate 1148 final int b = getCurrentStream().read(); 1149 if (b >= 0) { 1150 uncompressedBytesReadFromCurrentEntry++; 1151 } 1152 return b; 1153 } 1154 1155 /** 1156 * Reads data into an array of bytes. 1157 * 1158 * @param b the array to write data to 1159 * @return the number of bytes read, or -1 if end of input is reached 1160 * @throws IOException if an I/O error has occurred 1161 */ 1162 public int read(final byte[] b) throws IOException { 1163 return read(b, 0, b.length); 1164 } 1165 1166 /** 1167 * Reads data into an array of bytes. 1168 * 1169 * @param b the array to write data to 1170 * @param off offset into the buffer to start filling at 1171 * @param len of bytes to read 1172 * @return the number of bytes read, or -1 if end of input is reached 1173 * @throws IOException if an I/O error has occurred 1174 */ 1175 public int read(final byte[] b, final int off, final int len) throws IOException { 1176 if (len == 0) { 1177 return 0; 1178 } 1179 @SuppressWarnings("resource") // does not allocate 1180 final int current = getCurrentStream().read(b, off, len); 1181 if (current > 0) { 1182 uncompressedBytesReadFromCurrentEntry += current; 1183 } 1184 return current; 1185 } 1186 1187 private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException { 1188 final int areAllDefined = getUnsignedByte(header); 1189 final BitSet bits; 1190 if (areAllDefined != 0) { 1191 bits = new BitSet(size); 1192 for (int i = 0; i < size; i++) { 1193 bits.set(i, true); 1194 } 1195 } else { 1196 bits = readBits(header, size); 1197 } 1198 return bits; 1199 } 1200 1201 private void readArchiveProperties(final ByteBuffer input) throws IOException { 1202 // FIXME: the reference implementation just throws them away? 1203 long nid = readUint64(input); 1204 while (nid != NID.kEnd) { 1205 final long propertySize = readUint64(input); 1206 final byte[] property = new byte[(int) propertySize]; 1207 get(input, property); 1208 nid = readUint64(input); 1209 } 1210 } 1211 1212 private BitSet readBits(final ByteBuffer header, final int size) throws IOException { 1213 final BitSet bits = new BitSet(size); 1214 int mask = 0; 1215 int cache = 0; 1216 for (int i = 0; i < size; i++) { 1217 if (mask == 0) { 1218 mask = 0x80; 1219 cache = getUnsignedByte(header); 1220 } 1221 bits.set(i, (cache & mask) != 0); 1222 mask >>>= 1; 1223 } 1224 return bits; 1225 } 1226 1227 private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive, final byte[] password) throws IOException { 1228 final int pos = header.position(); 1229 final ArchiveStatistics stats = new ArchiveStatistics(); 1230 sanityCheckStreamsInfo(header, stats); 1231 stats.assertValidity(maxMemoryLimitKiB); 1232 header.position(pos); 1233 1234 readStreamsInfo(header, archive); 1235 1236 if (ArrayUtils.isEmpty(archive.folders)) { 1237 throw new IOException("no folders, can't read encoded header"); 1238 } 1239 if (ArrayUtils.isEmpty(archive.packSizes)) { 1240 throw new IOException("no packed streams, can't read encoded header"); 1241 } 1242 1243 // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? 1244 final Folder folder = archive.folders[0]; 1245 final int firstPackStreamIndex = 0; 1246 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 0; 1247 1248 channel.position(folderOffset); 1249 InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, archive.packSizes[firstPackStreamIndex]); 1250 for (final Coder coder : folder.getOrderedCoders()) { 1251 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 1252 throw new IOException("Multi input/output stream coders are not yet supported"); 1253 } 1254 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, // NOSONAR 1255 folder.getUnpackSizeForCoder(coder), coder, password, maxMemoryLimitKiB); 1256 } 1257 if (folder.hasCrc) { 1258 // @formatter:off 1259 inputStreamStack = ChecksumInputStream.builder() 1260 .setChecksum(new CRC32()) 1261 .setInputStream(inputStreamStack) 1262 .setCountThreshold(folder.getUnpackSize()) 1263 .setExpectedChecksumValue(folder.crc) 1264 .get(); 1265 // @formatter:on 1266 } 1267 final int unpackSize = assertFitsIntoNonNegativeInt("unpackSize", folder.getUnpackSize()); 1268 final byte[] nextHeader = IOUtils.readRange(inputStreamStack, unpackSize); 1269 if (nextHeader.length < unpackSize) { 1270 throw new IOException("premature end of stream"); 1271 } 1272 inputStreamStack.close(); 1273 return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN); 1274 } 1275 1276 private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { 1277 final int numFilesInt = (int) readUint64(header); 1278 final Map<Integer, SevenZArchiveEntry> fileMap = new LinkedHashMap<>(); 1279 BitSet isEmptyStream = null; 1280 BitSet isEmptyFile = null; 1281 BitSet isAnti = null; 1282 while (true) { 1283 final int propertyType = getUnsignedByte(header); 1284 if (propertyType == 0) { 1285 break; 1286 } 1287 final long size = readUint64(header); 1288 switch (propertyType) { 1289 case NID.kEmptyStream: { 1290 isEmptyStream = readBits(header, numFilesInt); 1291 break; 1292 } 1293 case NID.kEmptyFile: { 1294 isEmptyFile = readBits(header, ArchiveException.requireNonNull(isEmptyStream, () -> "isEmptyStream for " + archive).cardinality()); 1295 break; 1296 } 1297 case NID.kAnti: { 1298 isAnti = readBits(header, ArchiveException.requireNonNull(isEmptyStream, () -> "isEmptyStream for " + archive).cardinality()); 1299 break; 1300 } 1301 case NID.kName: { 1302 /* final int external = */ getUnsignedByte(header); 1303 final byte[] names = new byte[(int) (size - 1)]; 1304 final int namesLength = names.length; 1305 get(header, names); 1306 int nextFile = 0; 1307 int nextName = 0; 1308 for (int i = 0; i < namesLength; i += 2) { 1309 if (names[i] == 0 && names[i + 1] == 0) { 1310 checkEntryIsInitialized(fileMap, nextFile); 1311 fileMap.get(nextFile).setName(new String(names, nextName, i - nextName, UTF_16LE)); 1312 nextName = i + 2; 1313 nextFile++; 1314 } 1315 } 1316 if (nextName != namesLength || nextFile != numFilesInt) { 1317 throw new IOException("Error parsing file names"); 1318 } 1319 break; 1320 } 1321 case NID.kCTime: { 1322 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1323 /* final int external = */ getUnsignedByte(header); 1324 for (int i = 0; i < numFilesInt; i++) { 1325 checkEntryIsInitialized(fileMap, i); 1326 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1327 entryAtIndex.setHasCreationDate(timesDefined.get(i)); 1328 if (entryAtIndex.getHasCreationDate()) { 1329 entryAtIndex.setCreationDate(getLong(header)); 1330 } 1331 } 1332 break; 1333 } 1334 case NID.kATime: { 1335 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1336 /* final int external = */ getUnsignedByte(header); 1337 for (int i = 0; i < numFilesInt; i++) { 1338 checkEntryIsInitialized(fileMap, i); 1339 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1340 entryAtIndex.setHasAccessDate(timesDefined.get(i)); 1341 if (entryAtIndex.getHasAccessDate()) { 1342 entryAtIndex.setAccessDate(getLong(header)); 1343 } 1344 } 1345 break; 1346 } 1347 case NID.kMTime: { 1348 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1349 /* final int external = */ getUnsignedByte(header); 1350 for (int i = 0; i < numFilesInt; i++) { 1351 checkEntryIsInitialized(fileMap, i); 1352 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1353 entryAtIndex.setHasLastModifiedDate(timesDefined.get(i)); 1354 if (entryAtIndex.getHasLastModifiedDate()) { 1355 entryAtIndex.setLastModifiedDate(getLong(header)); 1356 } 1357 } 1358 break; 1359 } 1360 case NID.kWinAttributes: { 1361 final BitSet attributesDefined = readAllOrBits(header, numFilesInt); 1362 /* final int external = */ getUnsignedByte(header); 1363 for (int i = 0; i < numFilesInt; i++) { 1364 checkEntryIsInitialized(fileMap, i); 1365 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1366 entryAtIndex.setHasWindowsAttributes(attributesDefined.get(i)); 1367 if (entryAtIndex.getHasWindowsAttributes()) { 1368 entryAtIndex.setWindowsAttributes(getInt(header)); 1369 } 1370 } 1371 break; 1372 } 1373 case NID.kDummy: { 1374 // 7z 9.20 asserts the content is all zeros and ignores the property 1375 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1376 skipBytesFully(header, size); 1377 break; 1378 } 1379 default: { 1380 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1381 skipBytesFully(header, size); 1382 break; 1383 } 1384 } 1385 } 1386 int nonEmptyFileCounter = 0; 1387 int emptyFileCounter = 0; 1388 for (int i = 0; i < numFilesInt; i++) { 1389 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1390 if (entryAtIndex == null) { 1391 continue; 1392 } 1393 entryAtIndex.setHasStream(isEmptyStream == null || !isEmptyStream.get(i)); 1394 if (entryAtIndex.hasStream()) { 1395 if (archive.subStreamsInfo == null) { 1396 throw new IOException("Archive contains file with streams but no subStreamsInfo"); 1397 } 1398 entryAtIndex.setDirectory(false); 1399 entryAtIndex.setAntiItem(false); 1400 entryAtIndex.setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); 1401 entryAtIndex.setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); 1402 entryAtIndex.setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); 1403 if (entryAtIndex.getSize() < 0) { 1404 throw new IOException("broken archive, entry with negative size"); 1405 } 1406 ++nonEmptyFileCounter; 1407 } else { 1408 entryAtIndex.setDirectory(isEmptyFile == null || !isEmptyFile.get(emptyFileCounter)); 1409 entryAtIndex.setAntiItem(isAnti != null && isAnti.get(emptyFileCounter)); 1410 entryAtIndex.setHasCrc(false); 1411 entryAtIndex.setSize(0); 1412 ++emptyFileCounter; 1413 } 1414 } 1415 archive.files = fileMap.values().stream().filter(Objects::nonNull).toArray(SevenZArchiveEntry[]::new); 1416 calculateStreamMap(archive); 1417 } 1418 1419 private Folder readFolder(final ByteBuffer header) throws IOException { 1420 final Folder folder = new Folder(); 1421 1422 final long numCoders = readUint64(header); 1423 final Coder[] coders = new Coder[(int) numCoders]; 1424 long totalInStreams = 0; 1425 long totalOutStreams = 0; 1426 for (int i = 0; i < coders.length; i++) { 1427 final int bits = getUnsignedByte(header); 1428 final int idSize = bits & 0xf; 1429 final boolean isSimple = (bits & 0x10) == 0; 1430 final boolean hasAttributes = (bits & 0x20) != 0; 1431 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1432 1433 final byte[] decompressionMethodId = new byte[idSize]; 1434 get(header, decompressionMethodId); 1435 final long numInStreams; 1436 final long numOutStreams; 1437 if (isSimple) { 1438 numInStreams = 1; 1439 numOutStreams = 1; 1440 } else { 1441 numInStreams = readUint64(header); 1442 numOutStreams = readUint64(header); 1443 } 1444 totalInStreams += numInStreams; 1445 totalOutStreams += numOutStreams; 1446 byte[] properties = null; 1447 if (hasAttributes) { 1448 final long propertiesSize = readUint64(header); 1449 properties = new byte[(int) propertiesSize]; 1450 get(header, properties); 1451 } 1452 // would need to keep looping as above: 1453 if (moreAlternativeMethods) { 1454 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1455 "The reference implementation doesn't support them either."); 1456 } 1457 coders[i] = new Coder(decompressionMethodId, numInStreams, numOutStreams, properties); 1458 } 1459 folder.coders = coders; 1460 folder.totalInputStreams = totalInStreams; 1461 folder.totalOutputStreams = totalOutStreams; 1462 1463 final long numBindPairs = totalOutStreams - 1; 1464 final BindPair[] bindPairs = new BindPair[(int) numBindPairs]; 1465 for (int i = 0; i < bindPairs.length; i++) { 1466 bindPairs[i] = new BindPair(readUint64(header), readUint64(header)); 1467 } 1468 folder.bindPairs = bindPairs; 1469 1470 final long numPackedStreams = totalInStreams - numBindPairs; 1471 final long[] packedStreams = new long[(int) numPackedStreams]; 1472 if (numPackedStreams == 1) { 1473 int i; 1474 for (i = 0; i < (int) totalInStreams; i++) { 1475 if (folder.findBindPairForInStream(i) < 0) { 1476 break; 1477 } 1478 } 1479 packedStreams[0] = i; 1480 } else { 1481 for (int i = 0; i < (int) numPackedStreams; i++) { 1482 packedStreams[i] = readUint64(header); 1483 } 1484 } 1485 folder.packedStreams = packedStreams; 1486 1487 return folder; 1488 } 1489 1490 private void readFully(final ByteBuffer buf) throws IOException { 1491 buf.rewind(); 1492 IOUtils.readFully(channel, buf); 1493 buf.flip(); 1494 } 1495 1496 private void readHeader(final ByteBuffer header, final Archive archive) throws IOException { 1497 final int pos = header.position(); 1498 final ArchiveStatistics stats = sanityCheckAndCollectStatistics(header); 1499 stats.assertValidity(maxMemoryLimitKiB); 1500 header.position(pos); 1501 1502 int nid = getUnsignedByte(header); 1503 1504 if (nid == NID.kArchiveProperties) { 1505 readArchiveProperties(header); 1506 nid = getUnsignedByte(header); 1507 } 1508 1509 if (nid == NID.kAdditionalStreamsInfo) { 1510 throw new IOException("Additional streams unsupported"); 1511 // nid = getUnsignedByte(header); 1512 } 1513 1514 if (nid == NID.kMainStreamsInfo) { 1515 readStreamsInfo(header, archive); 1516 nid = getUnsignedByte(header); 1517 } 1518 1519 if (nid == NID.kFilesInfo) { 1520 readFilesInfo(header, archive); 1521 nid = getUnsignedByte(header); 1522 } 1523 } 1524 1525 private Archive readHeaders(final byte[] password) throws IOException { 1526 final ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */).order(ByteOrder.LITTLE_ENDIAN); 1527 readFully(buf); 1528 final byte[] signature = new byte[6]; 1529 buf.get(signature); 1530 if (!Arrays.equals(signature, sevenZSignature)) { 1531 throw new IOException("Bad 7z signature"); 1532 } 1533 // 7zFormat.txt has it wrong - it's first major then minor 1534 final byte archiveVersionMajor = buf.get(); 1535 final byte archiveVersionMinor = buf.get(); 1536 if (archiveVersionMajor != 0) { 1537 throw new IOException(String.format("Unsupported 7z version (%d,%d)", archiveVersionMajor, archiveVersionMinor)); 1538 } 1539 1540 boolean headerLooksValid = false; // See https://www.7-zip.org/recover.html - "There is no correct End Header at the end of archive" 1541 final long startHeaderCrc = 0xffffFFFFL & buf.getInt(); 1542 if (startHeaderCrc == 0) { 1543 // This is an indication of a corrupt header - peek the next 20 bytes 1544 final long currentPosition = channel.position(); 1545 final ByteBuffer peekBuf = ByteBuffer.allocate(20); 1546 readFully(peekBuf); 1547 channel.position(currentPosition); 1548 // Header invalid if all data is 0 1549 while (peekBuf.hasRemaining()) { 1550 if (peekBuf.get() != 0) { 1551 headerLooksValid = true; 1552 break; 1553 } 1554 } 1555 } else { 1556 headerLooksValid = true; 1557 } 1558 1559 if (headerLooksValid) { 1560 return initializeArchive(readStartHeader(startHeaderCrc), password, true); 1561 } 1562 // No valid header found - probably first file of multipart archive was removed too early. Scan for end header. 1563 if (tryToRecoverBrokenArchives) { 1564 return tryToLocateEndHeader(password); 1565 } 1566 throw new IOException("archive seems to be invalid.\nYou may want to retry and enable the" 1567 + " tryToRecoverBrokenArchives if the archive could be a multi volume archive that has been closed prematurely."); 1568 } 1569 1570 private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException { 1571 archive.packPos = readUint64(header); 1572 final int numPackStreamsInt = (int) readUint64(header); 1573 int nid = getUnsignedByte(header); 1574 if (nid == NID.kSize) { 1575 archive.packSizes = new long[numPackStreamsInt]; 1576 for (int i = 0; i < archive.packSizes.length; i++) { 1577 archive.packSizes[i] = readUint64(header); 1578 } 1579 nid = getUnsignedByte(header); 1580 } 1581 1582 if (nid == NID.kCRC) { 1583 archive.packCrcsDefined = readAllOrBits(header, numPackStreamsInt); 1584 archive.packCrcs = new long[numPackStreamsInt]; 1585 for (int i = 0; i < numPackStreamsInt; i++) { 1586 if (archive.packCrcsDefined.get(i)) { 1587 archive.packCrcs[i] = 0xffffFFFFL & getInt(header); 1588 } 1589 } 1590 // read one more 1591 getUnsignedByte(header); 1592 } 1593 } 1594 1595 private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { 1596 // using Stream rather than ByteBuffer for the benefit of the built-in CRC check 1597 try (DataInputStream dataInputStream = new DataInputStream(ChecksumInputStream.builder() 1598 // @formatter:off 1599 .setChecksum(new CRC32()) 1600 .setInputStream(new BoundedSeekableByteChannelInputStream(channel, 20)) 1601 .setCountThreshold(20L) 1602 .setExpectedChecksumValue(startHeaderCrc) 1603 .get())) { 1604 // @formatter:on 1605 final long nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); 1606 if (nextHeaderOffset < 0 || nextHeaderOffset + SIGNATURE_HEADER_SIZE > channel.size()) { 1607 throw new IOException("nextHeaderOffset is out of bounds"); 1608 } 1609 final long nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); 1610 final long nextHeaderEnd = nextHeaderOffset + nextHeaderSize; 1611 if (nextHeaderEnd < nextHeaderOffset || nextHeaderEnd + SIGNATURE_HEADER_SIZE > channel.size()) { 1612 throw new IOException("nextHeaderSize is out of bounds"); 1613 } 1614 final long nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); 1615 return new StartHeader(nextHeaderOffset, nextHeaderSize, nextHeaderCrc); 1616 } 1617 } 1618 1619 private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 1620 int nid = getUnsignedByte(header); 1621 1622 if (nid == NID.kPackInfo) { 1623 readPackInfo(header, archive); 1624 nid = getUnsignedByte(header); 1625 } 1626 1627 if (nid == NID.kUnpackInfo) { 1628 readUnpackInfo(header, archive); 1629 nid = getUnsignedByte(header); 1630 } else { 1631 // archive without unpack/coders info 1632 archive.folders = Folder.EMPTY_FOLDER_ARRAY; 1633 } 1634 1635 if (nid == NID.kSubStreamsInfo) { 1636 readSubStreamsInfo(header, archive); 1637 nid = getUnsignedByte(header); 1638 } 1639 } 1640 1641 private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 1642 for (final Folder folder : archive.folders) { 1643 folder.numUnpackSubStreams = 1; 1644 } 1645 long unpackStreamsCount = archive.folders.length; 1646 1647 int nid = getUnsignedByte(header); 1648 if (nid == NID.kNumUnpackStream) { 1649 unpackStreamsCount = 0; 1650 for (final Folder folder : archive.folders) { 1651 final long numStreams = readUint64(header); 1652 folder.numUnpackSubStreams = (int) numStreams; 1653 unpackStreamsCount += numStreams; 1654 } 1655 nid = getUnsignedByte(header); 1656 } 1657 1658 final int totalUnpackStreams = (int) unpackStreamsCount; 1659 final SubStreamsInfo subStreamsInfo = new SubStreamsInfo(totalUnpackStreams); 1660 int nextUnpackStream = 0; 1661 for (final Folder folder : archive.folders) { 1662 if (folder.numUnpackSubStreams == 0) { 1663 continue; 1664 } 1665 long sum = 0; 1666 if (nid == NID.kSize) { 1667 for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) { 1668 final long size = readUint64(header); 1669 subStreamsInfo.unpackSizes[nextUnpackStream++] = size; 1670 sum += size; 1671 } 1672 } 1673 if (sum > folder.getUnpackSize()) { 1674 throw new IOException("sum of unpack sizes of folder exceeds total unpack size"); 1675 } 1676 subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum; 1677 } 1678 if (nid == NID.kSize) { 1679 nid = getUnsignedByte(header); 1680 } 1681 1682 int numDigests = 0; 1683 for (final Folder folder : archive.folders) { 1684 if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) { 1685 numDigests += folder.numUnpackSubStreams; 1686 } 1687 } 1688 1689 if (nid == NID.kCRC) { 1690 final BitSet hasMissingCrc = readAllOrBits(header, numDigests); 1691 final long[] missingCrcs = new long[numDigests]; 1692 for (int i = 0; i < numDigests; i++) { 1693 if (hasMissingCrc.get(i)) { 1694 missingCrcs[i] = 0xffffFFFFL & getInt(header); 1695 } 1696 } 1697 int nextCrc = 0; 1698 int nextMissingCrc = 0; 1699 for (final Folder folder : archive.folders) { 1700 if (folder.numUnpackSubStreams == 1 && folder.hasCrc) { 1701 subStreamsInfo.hasCrc.set(nextCrc, true); 1702 subStreamsInfo.crcs[nextCrc] = folder.crc; 1703 ++nextCrc; 1704 } else { 1705 for (int i = 0; i < folder.numUnpackSubStreams; i++) { 1706 subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc)); 1707 subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc]; 1708 ++nextCrc; 1709 ++nextMissingCrc; 1710 } 1711 } 1712 } 1713 1714 nid = getUnsignedByte(header); 1715 } 1716 1717 archive.subStreamsInfo = subStreamsInfo; 1718 } 1719 1720 private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException { 1721 int nid = getUnsignedByte(header); 1722 final int numFoldersInt = (int) readUint64(header); 1723 final Folder[] folders = new Folder[numFoldersInt]; 1724 archive.folders = folders; 1725 /* final int external = */ getUnsignedByte(header); 1726 for (int i = 0; i < numFoldersInt; i++) { 1727 folders[i] = readFolder(header); 1728 } 1729 1730 nid = getUnsignedByte(header); 1731 for (final Folder folder : folders) { 1732 assertFitsIntoNonNegativeInt("totalOutputStreams", folder.totalOutputStreams); 1733 folder.unpackSizes = new long[(int) folder.totalOutputStreams]; 1734 for (int i = 0; i < folder.totalOutputStreams; i++) { 1735 folder.unpackSizes[i] = readUint64(header); 1736 } 1737 } 1738 1739 nid = getUnsignedByte(header); 1740 if (nid == NID.kCRC) { 1741 final BitSet crcsDefined = readAllOrBits(header, numFoldersInt); 1742 for (int i = 0; i < numFoldersInt; i++) { 1743 if (crcsDefined.get(i)) { 1744 folders[i].hasCrc = true; 1745 folders[i].crc = 0xffffFFFFL & getInt(header); 1746 } else { 1747 folders[i].hasCrc = false; 1748 } 1749 } 1750 1751 nid = getUnsignedByte(header); 1752 } 1753 } 1754 1755 /** 1756 * Discard any queued streams/ folder stream, and reopen the current folder input stream. 1757 * 1758 * @param folderIndex the index of the folder to reopen 1759 * @param file the 7z entry to read 1760 * @throws IOException if exceptions occur when reading the 7z file 1761 */ 1762 private void reopenFolderInputStream(final int folderIndex, final SevenZArchiveEntry file) throws IOException { 1763 deferredBlockStreams.clear(); 1764 if (currentFolderInputStream != null) { 1765 currentFolderInputStream.close(); 1766 currentFolderInputStream = null; 1767 } 1768 final Folder folder = archive.folders[folderIndex]; 1769 final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; 1770 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + archive.streamMap.packStreamOffsets[firstPackStreamIndex]; 1771 1772 currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); 1773 } 1774 1775 private ArchiveStatistics sanityCheckAndCollectStatistics(final ByteBuffer header) throws IOException { 1776 final ArchiveStatistics stats = new ArchiveStatistics(); 1777 1778 int nid = getUnsignedByte(header); 1779 1780 if (nid == NID.kArchiveProperties) { 1781 sanityCheckArchiveProperties(header); 1782 nid = getUnsignedByte(header); 1783 } 1784 1785 if (nid == NID.kAdditionalStreamsInfo) { 1786 throw new IOException("Additional streams unsupported"); 1787 // nid = getUnsignedByte(header); 1788 } 1789 1790 if (nid == NID.kMainStreamsInfo) { 1791 sanityCheckStreamsInfo(header, stats); 1792 nid = getUnsignedByte(header); 1793 } 1794 1795 if (nid == NID.kFilesInfo) { 1796 sanityCheckFilesInfo(header, stats); 1797 nid = getUnsignedByte(header); 1798 } 1799 1800 if (nid != NID.kEnd) { 1801 throw new IOException("Badly terminated header, found " + nid); 1802 } 1803 1804 return stats; 1805 } 1806 1807 private void sanityCheckArchiveProperties(final ByteBuffer header) throws IOException { 1808 long nid = readUint64(header); 1809 while (nid != NID.kEnd) { 1810 final int propertySize = assertFitsIntoNonNegativeInt("propertySize", readUint64(header)); 1811 if (skipBytesFully(header, propertySize) < propertySize) { 1812 throw new IOException("invalid property size"); 1813 } 1814 nid = readUint64(header); 1815 } 1816 } 1817 1818 private void sanityCheckFilesInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1819 stats.numberOfEntries = assertFitsIntoNonNegativeInt("numFiles", readUint64(header)); 1820 1821 int emptyStreams = -1; 1822 while (true) { 1823 final int propertyType = getUnsignedByte(header); 1824 if (propertyType == 0) { 1825 break; 1826 } 1827 final long size = readUint64(header); 1828 switch (propertyType) { 1829 case NID.kEmptyStream: { 1830 emptyStreams = readBits(header, stats.numberOfEntries).cardinality(); 1831 break; 1832 } 1833 case NID.kEmptyFile: { 1834 if (emptyStreams == -1) { 1835 throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); 1836 } 1837 readBits(header, emptyStreams); 1838 break; 1839 } 1840 case NID.kAnti: { 1841 if (emptyStreams == -1) { 1842 throw new IOException("Header format error: kEmptyStream must appear before kAnti"); 1843 } 1844 readBits(header, emptyStreams); 1845 break; 1846 } 1847 case NID.kName: { 1848 final int external = getUnsignedByte(header); 1849 if (external != 0) { 1850 throw new IOException("Not implemented"); 1851 } 1852 final int namesLength = assertFitsIntoNonNegativeInt("file names length", size - 1); 1853 if ((namesLength & 1) != 0) { 1854 throw new IOException("File names length invalid"); 1855 } 1856 1857 int filesSeen = 0; 1858 for (int i = 0; i < namesLength; i += 2) { 1859 final char c = getChar(header); 1860 if (c == 0) { 1861 filesSeen++; 1862 } 1863 } 1864 if (filesSeen != stats.numberOfEntries) { 1865 throw new IOException("Invalid number of file names (" + filesSeen + " instead of " + stats.numberOfEntries + ")"); 1866 } 1867 break; 1868 } 1869 case NID.kCTime: { 1870 final int timesDefined = readAllOrBits(header, stats.numberOfEntries).cardinality(); 1871 final int external = getUnsignedByte(header); 1872 if (external != 0) { 1873 throw new IOException("Not implemented"); 1874 } 1875 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1876 throw new IOException("invalid creation dates size"); 1877 } 1878 break; 1879 } 1880 case NID.kATime: { 1881 final int timesDefined = readAllOrBits(header, stats.numberOfEntries).cardinality(); 1882 final int external = getUnsignedByte(header); 1883 if (external != 0) { 1884 throw new IOException("Not implemented"); 1885 } 1886 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1887 throw new IOException("invalid access dates size"); 1888 } 1889 break; 1890 } 1891 case NID.kMTime: { 1892 final int timesDefined = readAllOrBits(header, stats.numberOfEntries).cardinality(); 1893 final int external = getUnsignedByte(header); 1894 if (external != 0) { 1895 throw new IOException("Not implemented"); 1896 } 1897 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1898 throw new IOException("invalid modification dates size"); 1899 } 1900 break; 1901 } 1902 case NID.kWinAttributes: { 1903 final int attributesDefined = readAllOrBits(header, stats.numberOfEntries).cardinality(); 1904 final int external = getUnsignedByte(header); 1905 if (external != 0) { 1906 throw new IOException("Not implemented"); 1907 } 1908 if (skipBytesFully(header, 4 * attributesDefined) < 4 * attributesDefined) { 1909 throw new IOException("invalid windows attributes size"); 1910 } 1911 break; 1912 } 1913 case NID.kStartPos: { 1914 throw new IOException("kStartPos is unsupported, please report"); 1915 } 1916 case NID.kDummy: { 1917 // 7z 9.20 asserts the content is all zeros and ignores the property 1918 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1919 1920 if (skipBytesFully(header, size) < size) { 1921 throw new IOException("Incomplete kDummy property"); 1922 } 1923 break; 1924 } 1925 1926 default: { 1927 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1928 if (skipBytesFully(header, size) < size) { 1929 throw new IOException("Incomplete property of type " + propertyType); 1930 } 1931 break; 1932 } 1933 } 1934 } 1935 stats.numberOfEntriesWithStream = stats.numberOfEntries - Math.max(emptyStreams, 0); 1936 } 1937 1938 private int sanityCheckFolder(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1939 1940 final int numCoders = assertFitsIntoNonNegativeInt("numCoders", readUint64(header)); 1941 if (numCoders == 0) { 1942 throw new IOException("Folder without coders"); 1943 } 1944 stats.numberOfCoders += numCoders; 1945 1946 long totalOutStreams = 0; 1947 long totalInStreams = 0; 1948 for (int i = 0; i < numCoders; i++) { 1949 final int bits = getUnsignedByte(header); 1950 final int idSize = bits & 0xf; 1951 get(header, new byte[idSize]); 1952 1953 final boolean isSimple = (bits & 0x10) == 0; 1954 final boolean hasAttributes = (bits & 0x20) != 0; 1955 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1956 if (moreAlternativeMethods) { 1957 throw new IOException("Alternative methods are unsupported, please report. The reference implementation doesn't support them either."); 1958 } 1959 1960 if (isSimple) { 1961 totalInStreams++; 1962 totalOutStreams++; 1963 } else { 1964 totalInStreams += assertFitsIntoNonNegativeInt("numInStreams", readUint64(header)); 1965 totalOutStreams += assertFitsIntoNonNegativeInt("numOutStreams", readUint64(header)); 1966 } 1967 1968 if (hasAttributes) { 1969 final int propertiesSize = assertFitsIntoNonNegativeInt("propertiesSize", readUint64(header)); 1970 if (skipBytesFully(header, propertiesSize) < propertiesSize) { 1971 throw new IOException("invalid propertiesSize in folder"); 1972 } 1973 } 1974 } 1975 assertFitsIntoNonNegativeInt("totalInStreams", totalInStreams); 1976 assertFitsIntoNonNegativeInt("totalOutStreams", totalOutStreams); 1977 stats.numberOfOutStreams += totalOutStreams; 1978 stats.numberOfInStreams += totalInStreams; 1979 1980 if (totalOutStreams == 0) { 1981 throw new IOException("Total output streams can't be 0"); 1982 } 1983 1984 final int numBindPairs = assertFitsIntoNonNegativeInt("numBindPairs", totalOutStreams - 1); 1985 if (totalInStreams < numBindPairs) { 1986 throw new IOException("Total input streams can't be less than the number of bind pairs"); 1987 } 1988 final BitSet inStreamsBound = new BitSet((int) totalInStreams); 1989 for (int i = 0; i < numBindPairs; i++) { 1990 final int inIndex = assertFitsIntoNonNegativeInt("inIndex", readUint64(header)); 1991 if (totalInStreams <= inIndex) { 1992 throw new IOException("inIndex is bigger than number of inStreams"); 1993 } 1994 inStreamsBound.set(inIndex); 1995 final int outIndex = assertFitsIntoNonNegativeInt("outIndex", readUint64(header)); 1996 if (totalOutStreams <= outIndex) { 1997 throw new IOException("outIndex is bigger than number of outStreams"); 1998 } 1999 } 2000 2001 final int numPackedStreams = assertFitsIntoNonNegativeInt("numPackedStreams", totalInStreams - numBindPairs); 2002 2003 if (numPackedStreams == 1) { 2004 if (inStreamsBound.nextClearBit(0) == -1) { 2005 throw new IOException("Couldn't find stream's bind pair index"); 2006 } 2007 } else { 2008 for (int i = 0; i < numPackedStreams; i++) { 2009 final int packedStreamIndex = assertFitsIntoNonNegativeInt("packedStreamIndex", readUint64(header)); 2010 if (packedStreamIndex >= totalInStreams) { 2011 throw new IOException("packedStreamIndex is bigger than number of totalInStreams"); 2012 } 2013 } 2014 } 2015 2016 return (int) totalOutStreams; 2017 } 2018 2019 private void sanityCheckPackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 2020 final long packPos = readUint64(header); 2021 if (packPos < 0 || SIGNATURE_HEADER_SIZE + packPos > channel.size() || SIGNATURE_HEADER_SIZE + packPos < 0) { 2022 throw new IOException("packPos (" + packPos + ") is out of range"); 2023 } 2024 final long numPackStreams = readUint64(header); 2025 stats.numberOfPackedStreams = assertFitsIntoNonNegativeInt("numPackStreams", numPackStreams); 2026 int nid = getUnsignedByte(header); 2027 if (nid == NID.kSize) { 2028 long totalPackSizes = 0; 2029 for (int i = 0; i < stats.numberOfPackedStreams; i++) { 2030 final long packSize = readUint64(header); 2031 totalPackSizes += packSize; 2032 final long endOfPackStreams = SIGNATURE_HEADER_SIZE + packPos + totalPackSizes; 2033 if (packSize < 0 || endOfPackStreams > channel.size() || endOfPackStreams < packPos) { 2034 throw new IOException("packSize (" + packSize + ") is out of range"); 2035 } 2036 } 2037 nid = getUnsignedByte(header); 2038 } 2039 2040 if (nid == NID.kCRC) { 2041 final int crcsDefined = readAllOrBits(header, stats.numberOfPackedStreams).cardinality(); 2042 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 2043 throw new IOException("invalid number of CRCs in PackInfo"); 2044 } 2045 nid = getUnsignedByte(header); 2046 } 2047 2048 if (nid != NID.kEnd) { 2049 throw new IOException("Badly terminated PackInfo (" + nid + ")"); 2050 } 2051 } 2052 2053 private void sanityCheckStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 2054 int nid = getUnsignedByte(header); 2055 2056 if (nid == NID.kPackInfo) { 2057 sanityCheckPackInfo(header, stats); 2058 nid = getUnsignedByte(header); 2059 } 2060 2061 if (nid == NID.kUnpackInfo) { 2062 sanityCheckUnpackInfo(header, stats); 2063 nid = getUnsignedByte(header); 2064 } 2065 2066 if (nid == NID.kSubStreamsInfo) { 2067 sanityCheckSubStreamsInfo(header, stats); 2068 nid = getUnsignedByte(header); 2069 } 2070 2071 if (nid != NID.kEnd) { 2072 throw new IOException("Badly terminated StreamsInfo"); 2073 } 2074 } 2075 2076 private void sanityCheckSubStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 2077 2078 int nid = getUnsignedByte(header); 2079 final List<Integer> numUnpackSubStreamsPerFolder = new LinkedList<>(); 2080 if (nid == NID.kNumUnpackStream) { 2081 for (int i = 0; i < stats.numberOfFolders; i++) { 2082 numUnpackSubStreamsPerFolder.add(assertFitsIntoNonNegativeInt("numStreams", readUint64(header))); 2083 } 2084 stats.numberOfUnpackSubStreams = numUnpackSubStreamsPerFolder.stream().mapToLong(Integer::longValue).sum(); 2085 nid = getUnsignedByte(header); 2086 } else { 2087 stats.numberOfUnpackSubStreams = stats.numberOfFolders; 2088 } 2089 2090 assertFitsIntoNonNegativeInt("totalUnpackStreams", stats.numberOfUnpackSubStreams); 2091 2092 if (nid == NID.kSize) { 2093 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 2094 if (numUnpackSubStreams == 0) { 2095 continue; 2096 } 2097 for (int i = 0; i < numUnpackSubStreams - 1; i++) { 2098 final long size = readUint64(header); 2099 if (size < 0) { 2100 throw new IOException("negative unpackSize"); 2101 } 2102 } 2103 } 2104 nid = getUnsignedByte(header); 2105 } 2106 2107 int numDigests = 0; 2108 if (numUnpackSubStreamsPerFolder.isEmpty()) { 2109 numDigests = stats.folderHasCrc == null ? stats.numberOfFolders : stats.numberOfFolders - stats.folderHasCrc.cardinality(); 2110 } else { 2111 int folderIdx = 0; 2112 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 2113 if (numUnpackSubStreams != 1 || stats.folderHasCrc == null || !stats.folderHasCrc.get(folderIdx++)) { 2114 numDigests += numUnpackSubStreams; 2115 } 2116 } 2117 } 2118 2119 if (nid == NID.kCRC) { 2120 assertFitsIntoNonNegativeInt("numDigests", numDigests); 2121 final int missingCrcs = readAllOrBits(header, numDigests).cardinality(); 2122 if (skipBytesFully(header, 4 * missingCrcs) < 4 * missingCrcs) { 2123 throw new IOException("invalid number of missing CRCs in SubStreamInfo"); 2124 } 2125 nid = getUnsignedByte(header); 2126 } 2127 2128 if (nid != NID.kEnd) { 2129 throw new IOException("Badly terminated SubStreamsInfo"); 2130 } 2131 } 2132 2133 private void sanityCheckUnpackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 2134 int nid = getUnsignedByte(header); 2135 if (nid != NID.kFolder) { 2136 throw new IOException("Expected kFolder, got " + nid); 2137 } 2138 final long numFolders = readUint64(header); 2139 stats.numberOfFolders = assertFitsIntoNonNegativeInt("numFolders", numFolders); 2140 final int external = getUnsignedByte(header); 2141 if (external != 0) { 2142 throw new IOException("External unsupported"); 2143 } 2144 2145 final List<Integer> numberOfOutputStreamsPerFolder = new LinkedList<>(); 2146 for (int i = 0; i < stats.numberOfFolders; i++) { 2147 numberOfOutputStreamsPerFolder.add(sanityCheckFolder(header, stats)); 2148 } 2149 2150 final long totalNumberOfBindPairs = stats.numberOfOutStreams - stats.numberOfFolders; 2151 final long packedStreamsRequiredByFolders = stats.numberOfInStreams - totalNumberOfBindPairs; 2152 if (packedStreamsRequiredByFolders < stats.numberOfPackedStreams) { 2153 throw new IOException("archive doesn't contain enough packed streams"); 2154 } 2155 2156 nid = getUnsignedByte(header); 2157 if (nid != NID.kCodersUnpackSize) { 2158 throw new IOException("Expected kCodersUnpackSize, got " + nid); 2159 } 2160 2161 for (final int numberOfOutputStreams : numberOfOutputStreamsPerFolder) { 2162 for (int i = 0; i < numberOfOutputStreams; i++) { 2163 final long unpackSize = readUint64(header); 2164 if (unpackSize < 0) { 2165 throw new IllegalArgumentException("negative unpackSize"); 2166 } 2167 } 2168 } 2169 2170 nid = getUnsignedByte(header); 2171 if (nid == NID.kCRC) { 2172 stats.folderHasCrc = readAllOrBits(header, stats.numberOfFolders); 2173 final int crcsDefined = stats.folderHasCrc.cardinality(); 2174 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 2175 throw new IOException("invalid number of CRCs in UnpackInfo"); 2176 } 2177 nid = getUnsignedByte(header); 2178 } 2179 2180 if (nid != NID.kEnd) { 2181 throw new IOException("Badly terminated UnpackInfo"); 2182 } 2183 } 2184 2185 /** 2186 * Skips all the entries if needed. Entries need to be skipped when: 2187 * <p> 2188 * 1. it's a random access 2. one of these 2 condition is meet : 2189 * </p> 2190 * <p> 2191 * 2.1 currentEntryIndex != entryIndex : this means there are some entries to be skipped(currentEntryIndex < entryIndex) or the entry has already been 2192 * read(currentEntryIndex > entryIndex) 2193 * </p> 2194 * <p> 2195 * 2.2 currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead: if the entry to be read is the current entry, but some data of it has been read before, 2196 * then we need to reopen the stream of the folder and skip all the entries before the current entries 2197 * </p> 2198 * 2199 * @param entryIndex the entry to be read 2200 * @param isInSameFolder are the entry to be read and the current entry in the same folder 2201 * @param folderIndex the index of the folder which contains the entry 2202 * @return true if there are entries actually skipped 2203 * @throws IOException there are exceptions when skipping entries 2204 * @since 1.21 2205 */ 2206 private boolean skipEntriesWhenNeeded(final int entryIndex, final boolean isInSameFolder, final int folderIndex) throws IOException { 2207 final SevenZArchiveEntry file = archive.files[entryIndex]; 2208 // if the entry to be read is the current entry, and the entry has not 2209 // been read yet, then there's nothing we need to do 2210 if (currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead()) { 2211 return false; 2212 } 2213 2214 // 1. if currentEntryIndex < entryIndex : 2215 // this means there are some entries to be skipped(currentEntryIndex < entryIndex) 2216 // 2. if currentEntryIndex > entryIndex || (currentEntryIndex == entryIndex && hasCurrentEntryBeenRead) : 2217 // this means the entry has already been read before, and we need to reopen the 2218 // stream of the folder and skip all the entries before the current entries 2219 int filesToSkipStartIndex = archive.streamMap.folderFirstFileIndex[currentFolderIndex]; 2220 if (isInSameFolder) { 2221 if (currentEntryIndex < entryIndex) { 2222 // the entries between filesToSkipStartIndex and currentEntryIndex had already been skipped 2223 filesToSkipStartIndex = currentEntryIndex + 1; 2224 } else { 2225 // the entry is in the same folder of current entry, but it has already been read before, we need to reset 2226 // the position of the currentFolderInputStream to the beginning of folder, and then skip the files 2227 // from the start entry of the folder again 2228 reopenFolderInputStream(folderIndex, file); 2229 } 2230 } 2231 2232 for (int i = filesToSkipStartIndex; i < entryIndex; i++) { 2233 final SevenZArchiveEntry fileToSkip = archive.files[i]; 2234 InputStream fileStreamToSkip = BoundedInputStream.builder() 2235 .setInputStream(currentFolderInputStream) 2236 .setMaxCount(fileToSkip.getSize()) 2237 .setPropagateClose(false) 2238 .get(); 2239 if (fileToSkip.getHasCrc()) { 2240 // @formatter:off 2241 fileStreamToSkip = ChecksumInputStream.builder() 2242 .setChecksum(new CRC32()) 2243 .setInputStream(fileStreamToSkip) 2244 .setCountThreshold(fileToSkip.getSize()) 2245 .setExpectedChecksumValue(fileToSkip.getCrcValue()) 2246 .get(); 2247 // @formatter:on 2248 } 2249 deferredBlockStreams.add(fileStreamToSkip); 2250 2251 // set the content methods as well, it equals to file.getContentMethods() because they are in same folder 2252 fileToSkip.setContentMethods(file.getContentMethods()); 2253 } 2254 return true; 2255 } 2256 2257 @Override 2258 public String toString() { 2259 return archive.toString(); 2260 } 2261 2262 private Archive tryToLocateEndHeader(final byte[] password) throws IOException { 2263 final ByteBuffer nidBuf = ByteBuffer.allocate(1); 2264 final long searchLimit = 1024L * 1024 * 1; 2265 // Main header, plus bytes that readStartHeader would read 2266 final long previousDataSize = channel.position() + 20; 2267 final long minPos; 2268 // Determine minimal position - can't start before current position 2269 if (channel.position() + searchLimit > channel.size()) { 2270 minPos = channel.position(); 2271 } else { 2272 minPos = channel.size() - searchLimit; 2273 } 2274 long pos = channel.size() - 1; 2275 // Loop: Try from end of archive 2276 while (pos > minPos) { 2277 pos--; 2278 channel.position(pos); 2279 nidBuf.rewind(); 2280 if (channel.read(nidBuf) < 1) { 2281 throw new EOFException(); 2282 } 2283 final int nid = nidBuf.array()[0]; 2284 // First indicator: Byte equals one of these header identifiers 2285 if (nid == NID.kEncodedHeader || nid == NID.kHeader) { 2286 try { 2287 // Try to initialize Archive structure from here 2288 final long nextHeaderOffset = pos - previousDataSize; 2289 final long nextHeaderSize = channel.size() - pos; 2290 final StartHeader startHeader = new StartHeader(nextHeaderOffset, nextHeaderSize, 0); 2291 final Archive result = initializeArchive(startHeader, password, false); 2292 // Sanity check: There must be some data... 2293 if (result.packSizes.length > 0 && result.files.length > 0) { 2294 return result; 2295 } 2296 } catch (final Exception ignored) { 2297 // Wrong guess... 2298 } 2299 } 2300 } 2301 throw new IOException("Start header corrupt and unable to guess end header"); 2302 } 2303}