ArjArchiveInputStream.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one
  3.  * or more contributor license agreements.  See the NOTICE file
  4.  * distributed with this work for additional information
  5.  * regarding copyright ownership.  The ASF licenses this file
  6.  * to you under the Apache License, Version 2.0 (the
  7.  * "License"); you may not use this file except in compliance
  8.  * with the License.  You may obtain a copy of the License at
  9.  *
  10.  *   https://www.apache.org/licenses/LICENSE-2.0
  11.  *
  12.  * Unless required by applicable law or agreed to in writing,
  13.  * software distributed under the License is distributed on an
  14.  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15.  * KIND, either express or implied.  See the License for the
  16.  * specific language governing permissions and limitations
  17.  * under the License.
  18.  */
  19. package org.apache.commons.compress.archivers.arj;

  20. import java.io.ByteArrayInputStream;
  21. import java.io.ByteArrayOutputStream;
  22. import java.io.DataInputStream;
  23. import java.io.EOFException;
  24. import java.io.IOException;
  25. import java.io.InputStream;
  26. import java.util.ArrayList;
  27. import java.util.zip.CRC32;

  28. import org.apache.commons.compress.archivers.ArchiveEntry;
  29. import org.apache.commons.compress.archivers.ArchiveException;
  30. import org.apache.commons.compress.archivers.ArchiveInputStream;
  31. import org.apache.commons.compress.utils.IOUtils;
  32. import org.apache.commons.io.input.BoundedInputStream;
  33. import org.apache.commons.io.input.ChecksumInputStream;

  34. /**
  35.  * Implements the "arj" archive format as an InputStream.
  36.  * <ul>
  37.  * <li><a href="https://github.com/FarGroup/FarManager/blob/master/plugins/multiarc/arc.doc/arj.txt">Reference 1</a></li>
  38.  * <li><a href="http://www.fileformat.info/format/arj/corion.htm">Reference 2</a></li>
  39.  * </ul>
  40.  *
  41.  * @NotThreadSafe
  42.  * @since 1.6
  43.  */
  44. public class ArjArchiveInputStream extends ArchiveInputStream<ArjArchiveEntry> {

  45.     private static final String ENCODING_NAME = "CP437";
  46.     private static final int ARJ_MAGIC_1 = 0x60;
  47.     private static final int ARJ_MAGIC_2 = 0xEA;

  48.     /**
  49.      * Checks if the signature matches what is expected for an arj file.
  50.      *
  51.      * @param signature the bytes to check
  52.      * @param length    the number of bytes to check
  53.      * @return true, if this stream is an arj archive stream, false otherwise
  54.      */
  55.     public static boolean matches(final byte[] signature, final int length) {
  56.         return length >= 2 && (0xff & signature[0]) == ARJ_MAGIC_1 && (0xff & signature[1]) == ARJ_MAGIC_2;
  57.     }

  58.     private final DataInputStream dis;
  59.     private final MainHeader mainHeader;
  60.     private LocalFileHeader currentLocalFileHeader;
  61.     private InputStream currentInputStream;

  62.     /**
  63.      * Constructs the ArjInputStream, taking ownership of the inputStream that is passed in, and using the CP437 character encoding.
  64.      *
  65.      * @param inputStream the underlying stream, whose ownership is taken
  66.      * @throws ArchiveException if an exception occurs while reading
  67.      */
  68.     public ArjArchiveInputStream(final InputStream inputStream) throws ArchiveException {
  69.         this(inputStream, ENCODING_NAME);
  70.     }

  71.     /**
  72.      * Constructs the ArjInputStream, taking ownership of the inputStream that is passed in.
  73.      *
  74.      * @param inputStream the underlying stream, whose ownership is taken
  75.      * @param charsetName the charset used for file names and comments in the archive. May be {@code null} to use the platform default.
  76.      * @throws ArchiveException if an exception occurs while reading
  77.      */
  78.     public ArjArchiveInputStream(final InputStream inputStream, final String charsetName) throws ArchiveException {
  79.         super(inputStream, charsetName);
  80.         in = dis = new DataInputStream(inputStream);
  81.         try {
  82.             mainHeader = readMainHeader();
  83.             if ((mainHeader.arjFlags & MainHeader.Flags.GARBLED) != 0) {
  84.                 throw new ArchiveException("Encrypted ARJ files are unsupported");
  85.             }
  86.             if ((mainHeader.arjFlags & MainHeader.Flags.VOLUME) != 0) {
  87.                 throw new ArchiveException("Multi-volume ARJ files are unsupported");
  88.             }
  89.         } catch (final IOException e) {
  90.             throw new ArchiveException(e.getMessage(), (Throwable) e);
  91.         }
  92.     }

  93.     @Override
  94.     public boolean canReadEntryData(final ArchiveEntry ae) {
  95.         return ae instanceof ArjArchiveEntry && ((ArjArchiveEntry) ae).getMethod() == LocalFileHeader.Methods.STORED;
  96.     }

  97.     @Override
  98.     public void close() throws IOException {
  99.         dis.close();
  100.     }

  101.     /**
  102.      * Gets the archive's comment.
  103.      *
  104.      * @return the archive's comment
  105.      */
  106.     public String getArchiveComment() {
  107.         return mainHeader.comment;
  108.     }

  109.     /**
  110.      * Gets the archive's recorded name.
  111.      *
  112.      * @return the archive's name
  113.      */
  114.     public String getArchiveName() {
  115.         return mainHeader.name;
  116.     }

  117.     @Override
  118.     public ArjArchiveEntry getNextEntry() throws IOException {
  119.         if (currentInputStream != null) {
  120.             // return value ignored as IOUtils.skip ensures the stream is drained completely
  121.             final InputStream input = currentInputStream;
  122.             org.apache.commons.io.IOUtils.skip(input, Long.MAX_VALUE);
  123.             currentInputStream.close();
  124.             currentLocalFileHeader = null;
  125.             currentInputStream = null;
  126.         }

  127.         currentLocalFileHeader = readLocalFileHeader();
  128.         if (currentLocalFileHeader != null) {
  129.             // @formatter:off
  130.             currentInputStream = BoundedInputStream.builder()
  131.                     .setInputStream(dis)
  132.                     .setMaxCount(currentLocalFileHeader.compressedSize)
  133.                     .setPropagateClose(false)
  134.                     .get();
  135.             // @formatter:on
  136.             if (currentLocalFileHeader.method == LocalFileHeader.Methods.STORED) {
  137.                 // @formatter:off
  138.                 currentInputStream = ChecksumInputStream.builder()
  139.                         .setChecksum(new CRC32())
  140.                         .setInputStream(currentInputStream)
  141.                         .setCountThreshold(currentLocalFileHeader.originalSize)
  142.                         .setExpectedChecksumValue(currentLocalFileHeader.originalCrc32)
  143.                         .get();
  144.                 // @formatter:on
  145.             }
  146.             return new ArjArchiveEntry(currentLocalFileHeader);
  147.         }
  148.         currentInputStream = null;
  149.         return null;
  150.     }

  151.     @Override
  152.     public int read(final byte[] b, final int off, final int len) throws IOException {
  153.         if (len == 0) {
  154.             return 0;
  155.         }
  156.         if (currentLocalFileHeader == null) {
  157.             throw new IllegalStateException("No current arj entry");
  158.         }
  159.         if (currentLocalFileHeader.method != LocalFileHeader.Methods.STORED) {
  160.             throw new IOException("Unsupported compression method " + currentLocalFileHeader.method);
  161.         }
  162.         return currentInputStream.read(b, off, len);
  163.     }

  164.     private int read16(final DataInputStream dataIn) throws IOException {
  165.         final int value = dataIn.readUnsignedShort();
  166.         count(2);
  167.         return Integer.reverseBytes(value) >>> 16;
  168.     }

  169.     private int read32(final DataInputStream dataIn) throws IOException {
  170.         final int value = dataIn.readInt();
  171.         count(4);
  172.         return Integer.reverseBytes(value);
  173.     }

  174.     private int read8(final DataInputStream dataIn) throws IOException {
  175.         final int value = dataIn.readUnsignedByte();
  176.         count(1);
  177.         return value;
  178.     }

  179.     private void readExtraData(final int firstHeaderSize, final DataInputStream firstHeader, final LocalFileHeader localFileHeader) throws IOException {
  180.         if (firstHeaderSize >= 33) {
  181.             localFileHeader.extendedFilePosition = read32(firstHeader);
  182.             if (firstHeaderSize >= 45) {
  183.                 localFileHeader.dateTimeAccessed = read32(firstHeader);
  184.                 localFileHeader.dateTimeCreated = read32(firstHeader);
  185.                 localFileHeader.originalSizeEvenForVolumes = read32(firstHeader);
  186.                 pushedBackBytes(12);
  187.             }
  188.             pushedBackBytes(4);
  189.         }
  190.     }

  191.     private byte[] readHeader() throws IOException {
  192.         boolean found = false;
  193.         byte[] basicHeaderBytes = null;
  194.         do {
  195.             int first;
  196.             int second = read8(dis);
  197.             do {
  198.                 first = second;
  199.                 second = read8(dis);
  200.             } while (first != ARJ_MAGIC_1 && second != ARJ_MAGIC_2);
  201.             final int basicHeaderSize = read16(dis);
  202.             if (basicHeaderSize == 0) {
  203.                 // end of archive
  204.                 return null;
  205.             }
  206.             if (basicHeaderSize <= 2600) {
  207.                 basicHeaderBytes = readRange(dis, basicHeaderSize);
  208.                 final long basicHeaderCrc32 = read32(dis) & 0xFFFFFFFFL;
  209.                 final CRC32 crc32 = new CRC32();
  210.                 crc32.update(basicHeaderBytes);
  211.                 if (basicHeaderCrc32 == crc32.getValue()) {
  212.                     found = true;
  213.                 }
  214.             }
  215.         } while (!found);
  216.         return basicHeaderBytes;
  217.     }

  218.     private LocalFileHeader readLocalFileHeader() throws IOException {
  219.         final byte[] basicHeaderBytes = readHeader();
  220.         if (basicHeaderBytes == null) {
  221.             return null;
  222.         }
  223.         try (DataInputStream basicHeader = new DataInputStream(new ByteArrayInputStream(basicHeaderBytes))) {

  224.             final int firstHeaderSize = basicHeader.readUnsignedByte();
  225.             final byte[] firstHeaderBytes = readRange(basicHeader, firstHeaderSize - 1);
  226.             pushedBackBytes(firstHeaderBytes.length);
  227.             try (DataInputStream firstHeader = new DataInputStream(new ByteArrayInputStream(firstHeaderBytes))) {

  228.                 final LocalFileHeader localFileHeader = new LocalFileHeader();
  229.                 localFileHeader.archiverVersionNumber = firstHeader.readUnsignedByte();
  230.                 localFileHeader.minVersionToExtract = firstHeader.readUnsignedByte();
  231.                 localFileHeader.hostOS = firstHeader.readUnsignedByte();
  232.                 localFileHeader.arjFlags = firstHeader.readUnsignedByte();
  233.                 localFileHeader.method = firstHeader.readUnsignedByte();
  234.                 localFileHeader.fileType = firstHeader.readUnsignedByte();
  235.                 localFileHeader.reserved = firstHeader.readUnsignedByte();
  236.                 localFileHeader.dateTimeModified = read32(firstHeader);
  237.                 localFileHeader.compressedSize = 0xffffFFFFL & read32(firstHeader);
  238.                 localFileHeader.originalSize = 0xffffFFFFL & read32(firstHeader);
  239.                 localFileHeader.originalCrc32 = 0xffffFFFFL & read32(firstHeader);
  240.                 localFileHeader.fileSpecPosition = read16(firstHeader);
  241.                 localFileHeader.fileAccessMode = read16(firstHeader);
  242.                 pushedBackBytes(20);
  243.                 localFileHeader.firstChapter = firstHeader.readUnsignedByte();
  244.                 localFileHeader.lastChapter = firstHeader.readUnsignedByte();

  245.                 readExtraData(firstHeaderSize, firstHeader, localFileHeader);

  246.                 localFileHeader.name = readString(basicHeader);
  247.                 localFileHeader.comment = readString(basicHeader);

  248.                 final ArrayList<byte[]> extendedHeaders = new ArrayList<>();
  249.                 int extendedHeaderSize;
  250.                 while ((extendedHeaderSize = read16(dis)) > 0) {
  251.                     final byte[] extendedHeaderBytes = readRange(dis, extendedHeaderSize);
  252.                     final long extendedHeaderCrc32 = 0xffffFFFFL & read32(dis);
  253.                     final CRC32 crc32 = new CRC32();
  254.                     crc32.update(extendedHeaderBytes);
  255.                     if (extendedHeaderCrc32 != crc32.getValue()) {
  256.                         throw new IOException("Extended header CRC32 verification failure");
  257.                     }
  258.                     extendedHeaders.add(extendedHeaderBytes);
  259.                 }
  260.                 localFileHeader.extendedHeaders = extendedHeaders.toArray(new byte[0][]);

  261.                 return localFileHeader;
  262.             }
  263.         }
  264.     }

  265.     private MainHeader readMainHeader() throws IOException {
  266.         final byte[] basicHeaderBytes = readHeader();
  267.         if (basicHeaderBytes == null) {
  268.             throw new IOException("Archive ends without any headers");
  269.         }
  270.         final DataInputStream basicHeader = new DataInputStream(new ByteArrayInputStream(basicHeaderBytes));

  271.         final int firstHeaderSize = basicHeader.readUnsignedByte();
  272.         final byte[] firstHeaderBytes = readRange(basicHeader, firstHeaderSize - 1);
  273.         pushedBackBytes(firstHeaderBytes.length);

  274.         final DataInputStream firstHeader = new DataInputStream(new ByteArrayInputStream(firstHeaderBytes));

  275.         final MainHeader header = new MainHeader();
  276.         header.archiverVersionNumber = firstHeader.readUnsignedByte();
  277.         header.minVersionToExtract = firstHeader.readUnsignedByte();
  278.         header.hostOS = firstHeader.readUnsignedByte();
  279.         header.arjFlags = firstHeader.readUnsignedByte();
  280.         header.securityVersion = firstHeader.readUnsignedByte();
  281.         header.fileType = firstHeader.readUnsignedByte();
  282.         header.reserved = firstHeader.readUnsignedByte();
  283.         header.dateTimeCreated = read32(firstHeader);
  284.         header.dateTimeModified = read32(firstHeader);
  285.         header.archiveSize = 0xffffFFFFL & read32(firstHeader);
  286.         header.securityEnvelopeFilePosition = read32(firstHeader);
  287.         header.fileSpecPosition = read16(firstHeader);
  288.         header.securityEnvelopeLength = read16(firstHeader);
  289.         pushedBackBytes(20); // count has already counted them via readRange
  290.         header.encryptionVersion = firstHeader.readUnsignedByte();
  291.         header.lastChapter = firstHeader.readUnsignedByte();

  292.         if (firstHeaderSize >= 33) {
  293.             header.arjProtectionFactor = firstHeader.readUnsignedByte();
  294.             header.arjFlags2 = firstHeader.readUnsignedByte();
  295.             firstHeader.readUnsignedByte();
  296.             firstHeader.readUnsignedByte();
  297.         }

  298.         header.name = readString(basicHeader);
  299.         header.comment = readString(basicHeader);

  300.         final int extendedHeaderSize = read16(dis);
  301.         if (extendedHeaderSize > 0) {
  302.             header.extendedHeaderBytes = readRange(dis, extendedHeaderSize);
  303.             final long extendedHeaderCrc32 = 0xffffFFFFL & read32(dis);
  304.             final CRC32 crc32 = new CRC32();
  305.             crc32.update(header.extendedHeaderBytes);
  306.             if (extendedHeaderCrc32 != crc32.getValue()) {
  307.                 throw new IOException("Extended header CRC32 verification failure");
  308.             }
  309.         }

  310.         return header;
  311.     }

  312.     private byte[] readRange(final InputStream in, final int len) throws IOException {
  313.         final byte[] b = IOUtils.readRange(in, len);
  314.         count(b.length);
  315.         if (b.length < len) {
  316.             throw new EOFException();
  317.         }
  318.         return b;
  319.     }

  320.     private String readString(final DataInputStream dataIn) throws IOException {
  321.         try (ByteArrayOutputStream buffer = new ByteArrayOutputStream()) {
  322.             int nextByte;
  323.             while ((nextByte = dataIn.readUnsignedByte()) != 0) {
  324.                 buffer.write(nextByte);
  325.             }
  326.             return buffer.toString(getCharset().name());
  327.         }
  328.     }
  329. }