ArjArchiveInputStream.java

  1. /*
  2.  *  Licensed to the Apache Software Foundation (ASF) under one or more
  3.  *  contributor license agreements.  See the NOTICE file distributed with
  4.  *  this work for additional information regarding copyright ownership.
  5.  *  The ASF licenses this file to You under the Apache License, Version 2.0
  6.  *  (the "License"); you may not use this file except in compliance with
  7.  *  the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  *  Unless required by applicable law or agreed to in writing, software
  12.  *  distributed under the License is distributed on an "AS IS" BASIS,
  13.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  *  See the License for the specific language governing permissions and
  15.  *  limitations under the License.
  16.  */
  17. package org.apache.commons.compress.archivers.arj;

  18. import java.io.ByteArrayInputStream;
  19. import java.io.ByteArrayOutputStream;
  20. import java.io.DataInputStream;
  21. import java.io.EOFException;
  22. import java.io.IOException;
  23. import java.io.InputStream;
  24. import java.util.ArrayList;
  25. import java.util.zip.CRC32;

  26. import org.apache.commons.compress.archivers.ArchiveEntry;
  27. import org.apache.commons.compress.archivers.ArchiveException;
  28. import org.apache.commons.compress.archivers.ArchiveInputStream;
  29. import org.apache.commons.compress.utils.IOUtils;
  30. import org.apache.commons.io.input.BoundedInputStream;
  31. import org.apache.commons.io.input.ChecksumInputStream;

  32. /**
  33.  * Implements the "arj" archive format as an InputStream.
  34.  * <ul>
  35.  * <li><a href="https://github.com/FarGroup/FarManager/blob/master/plugins/multiarc/arc.doc/arj.txt">Reference 1</a></li>
  36.  * <li><a href="http://www.fileformat.info/format/arj/corion.htm">Reference 2</a></li>
  37.  * </ul>
  38.  *
  39.  * @NotThreadSafe
  40.  * @since 1.6
  41.  */
  42. public class ArjArchiveInputStream extends ArchiveInputStream<ArjArchiveEntry> {

  43.     private static final String ENCODING_NAME = "CP437";
  44.     private static final int ARJ_MAGIC_1 = 0x60;
  45.     private static final int ARJ_MAGIC_2 = 0xEA;

  46.     /**
  47.      * Checks if the signature matches what is expected for an arj file.
  48.      *
  49.      * @param signature the bytes to check
  50.      * @param length    the number of bytes to check
  51.      * @return true, if this stream is an arj archive stream, false otherwise
  52.      */
  53.     public static boolean matches(final byte[] signature, final int length) {
  54.         return length >= 2 && (0xff & signature[0]) == ARJ_MAGIC_1 && (0xff & signature[1]) == ARJ_MAGIC_2;
  55.     }

  56.     private final DataInputStream dis;
  57.     private final MainHeader mainHeader;
  58.     private LocalFileHeader currentLocalFileHeader;
  59.     private InputStream currentInputStream;

  60.     /**
  61.      * Constructs the ArjInputStream, taking ownership of the inputStream that is passed in, and using the CP437 character encoding.
  62.      *
  63.      * @param inputStream the underlying stream, whose ownership is taken
  64.      * @throws ArchiveException if an exception occurs while reading
  65.      */
  66.     public ArjArchiveInputStream(final InputStream inputStream) throws ArchiveException {
  67.         this(inputStream, ENCODING_NAME);
  68.     }

  69.     /**
  70.      * Constructs the ArjInputStream, taking ownership of the inputStream that is passed in.
  71.      *
  72.      * @param inputStream the underlying stream, whose ownership is taken
  73.      * @param charsetName the charset used for file names and comments in the archive. May be {@code null} to use the platform default.
  74.      * @throws ArchiveException if an exception occurs while reading
  75.      */
  76.     public ArjArchiveInputStream(final InputStream inputStream, final String charsetName) throws ArchiveException {
  77.         super(inputStream, charsetName);
  78.         in = dis = new DataInputStream(inputStream);
  79.         try {
  80.             mainHeader = readMainHeader();
  81.             if ((mainHeader.arjFlags & MainHeader.Flags.GARBLED) != 0) {
  82.                 throw new ArchiveException("Encrypted ARJ files are unsupported");
  83.             }
  84.             if ((mainHeader.arjFlags & MainHeader.Flags.VOLUME) != 0) {
  85.                 throw new ArchiveException("Multi-volume ARJ files are unsupported");
  86.             }
  87.         } catch (final IOException ioException) {
  88.             throw new ArchiveException(ioException.getMessage(), ioException);
  89.         }
  90.     }

  91.     @Override
  92.     public boolean canReadEntryData(final ArchiveEntry ae) {
  93.         return ae instanceof ArjArchiveEntry && ((ArjArchiveEntry) ae).getMethod() == LocalFileHeader.Methods.STORED;
  94.     }

  95.     @Override
  96.     public void close() throws IOException {
  97.         dis.close();
  98.     }

  99.     /**
  100.      * Gets the archive's comment.
  101.      *
  102.      * @return the archive's comment
  103.      */
  104.     public String getArchiveComment() {
  105.         return mainHeader.comment;
  106.     }

  107.     /**
  108.      * Gets the archive's recorded name.
  109.      *
  110.      * @return the archive's name
  111.      */
  112.     public String getArchiveName() {
  113.         return mainHeader.name;
  114.     }

  115.     @Override
  116.     public ArjArchiveEntry getNextEntry() throws IOException {
  117.         if (currentInputStream != null) {
  118.             // return value ignored as IOUtils.skip ensures the stream is drained completely
  119.             final InputStream input = currentInputStream;
  120.             org.apache.commons.io.IOUtils.skip(input, Long.MAX_VALUE);
  121.             currentInputStream.close();
  122.             currentLocalFileHeader = null;
  123.             currentInputStream = null;
  124.         }

  125.         currentLocalFileHeader = readLocalFileHeader();
  126.         if (currentLocalFileHeader != null) {
  127.             // @formatter:off
  128.             currentInputStream = BoundedInputStream.builder()
  129.                     .setInputStream(dis)
  130.                     .setMaxCount(currentLocalFileHeader.compressedSize)
  131.                     .setPropagateClose(false)
  132.                     .get();
  133.             // @formatter:on
  134.             if (currentLocalFileHeader.method == LocalFileHeader.Methods.STORED) {
  135.                 // @formatter:off
  136.                 currentInputStream = ChecksumInputStream.builder()
  137.                         .setChecksum(new CRC32())
  138.                         .setInputStream(currentInputStream)
  139.                         .setCountThreshold(currentLocalFileHeader.originalSize)
  140.                         .setExpectedChecksumValue(currentLocalFileHeader.originalCrc32)
  141.                         .get();
  142.                 // @formatter:on
  143.             }
  144.             return new ArjArchiveEntry(currentLocalFileHeader);
  145.         }
  146.         currentInputStream = null;
  147.         return null;
  148.     }

  149.     @Override
  150.     public int read(final byte[] b, final int off, final int len) throws IOException {
  151.         if (len == 0) {
  152.             return 0;
  153.         }
  154.         if (currentLocalFileHeader == null) {
  155.             throw new IllegalStateException("No current arj entry");
  156.         }
  157.         if (currentLocalFileHeader.method != LocalFileHeader.Methods.STORED) {
  158.             throw new IOException("Unsupported compression method " + currentLocalFileHeader.method);
  159.         }
  160.         return currentInputStream.read(b, off, len);
  161.     }

  162.     private int read16(final DataInputStream dataIn) throws IOException {
  163.         final int value = dataIn.readUnsignedShort();
  164.         count(2);
  165.         return Integer.reverseBytes(value) >>> 16;
  166.     }

  167.     private int read32(final DataInputStream dataIn) throws IOException {
  168.         final int value = dataIn.readInt();
  169.         count(4);
  170.         return Integer.reverseBytes(value);
  171.     }

  172.     private int read8(final DataInputStream dataIn) throws IOException {
  173.         final int value = dataIn.readUnsignedByte();
  174.         count(1);
  175.         return value;
  176.     }

  177.     private void readExtraData(final int firstHeaderSize, final DataInputStream firstHeader, final LocalFileHeader localFileHeader) throws IOException {
  178.         if (firstHeaderSize >= 33) {
  179.             localFileHeader.extendedFilePosition = read32(firstHeader);
  180.             if (firstHeaderSize >= 45) {
  181.                 localFileHeader.dateTimeAccessed = read32(firstHeader);
  182.                 localFileHeader.dateTimeCreated = read32(firstHeader);
  183.                 localFileHeader.originalSizeEvenForVolumes = read32(firstHeader);
  184.                 pushedBackBytes(12);
  185.             }
  186.             pushedBackBytes(4);
  187.         }
  188.     }

  189.     private byte[] readHeader() throws IOException {
  190.         boolean found = false;
  191.         byte[] basicHeaderBytes = null;
  192.         do {
  193.             int first;
  194.             int second = read8(dis);
  195.             do {
  196.                 first = second;
  197.                 second = read8(dis);
  198.             } while (first != ARJ_MAGIC_1 && second != ARJ_MAGIC_2);
  199.             final int basicHeaderSize = read16(dis);
  200.             if (basicHeaderSize == 0) {
  201.                 // end of archive
  202.                 return null;
  203.             }
  204.             if (basicHeaderSize <= 2600) {
  205.                 basicHeaderBytes = readRange(dis, basicHeaderSize);
  206.                 final long basicHeaderCrc32 = read32(dis) & 0xFFFFFFFFL;
  207.                 final CRC32 crc32 = new CRC32();
  208.                 crc32.update(basicHeaderBytes);
  209.                 if (basicHeaderCrc32 == crc32.getValue()) {
  210.                     found = true;
  211.                 }
  212.             }
  213.         } while (!found);
  214.         return basicHeaderBytes;
  215.     }

  216.     private LocalFileHeader readLocalFileHeader() throws IOException {
  217.         final byte[] basicHeaderBytes = readHeader();
  218.         if (basicHeaderBytes == null) {
  219.             return null;
  220.         }
  221.         try (DataInputStream basicHeader = new DataInputStream(new ByteArrayInputStream(basicHeaderBytes))) {

  222.             final int firstHeaderSize = basicHeader.readUnsignedByte();
  223.             final byte[] firstHeaderBytes = readRange(basicHeader, firstHeaderSize - 1);
  224.             pushedBackBytes(firstHeaderBytes.length);
  225.             try (DataInputStream firstHeader = new DataInputStream(new ByteArrayInputStream(firstHeaderBytes))) {

  226.                 final LocalFileHeader localFileHeader = new LocalFileHeader();
  227.                 localFileHeader.archiverVersionNumber = firstHeader.readUnsignedByte();
  228.                 localFileHeader.minVersionToExtract = firstHeader.readUnsignedByte();
  229.                 localFileHeader.hostOS = firstHeader.readUnsignedByte();
  230.                 localFileHeader.arjFlags = firstHeader.readUnsignedByte();
  231.                 localFileHeader.method = firstHeader.readUnsignedByte();
  232.                 localFileHeader.fileType = firstHeader.readUnsignedByte();
  233.                 localFileHeader.reserved = firstHeader.readUnsignedByte();
  234.                 localFileHeader.dateTimeModified = read32(firstHeader);
  235.                 localFileHeader.compressedSize = 0xffffFFFFL & read32(firstHeader);
  236.                 localFileHeader.originalSize = 0xffffFFFFL & read32(firstHeader);
  237.                 localFileHeader.originalCrc32 = 0xffffFFFFL & read32(firstHeader);
  238.                 localFileHeader.fileSpecPosition = read16(firstHeader);
  239.                 localFileHeader.fileAccessMode = read16(firstHeader);
  240.                 pushedBackBytes(20);
  241.                 localFileHeader.firstChapter = firstHeader.readUnsignedByte();
  242.                 localFileHeader.lastChapter = firstHeader.readUnsignedByte();

  243.                 readExtraData(firstHeaderSize, firstHeader, localFileHeader);

  244.                 localFileHeader.name = readString(basicHeader);
  245.                 localFileHeader.comment = readString(basicHeader);

  246.                 final ArrayList<byte[]> extendedHeaders = new ArrayList<>();
  247.                 int extendedHeaderSize;
  248.                 while ((extendedHeaderSize = read16(dis)) > 0) {
  249.                     final byte[] extendedHeaderBytes = readRange(dis, extendedHeaderSize);
  250.                     final long extendedHeaderCrc32 = 0xffffFFFFL & read32(dis);
  251.                     final CRC32 crc32 = new CRC32();
  252.                     crc32.update(extendedHeaderBytes);
  253.                     if (extendedHeaderCrc32 != crc32.getValue()) {
  254.                         throw new IOException("Extended header CRC32 verification failure");
  255.                     }
  256.                     extendedHeaders.add(extendedHeaderBytes);
  257.                 }
  258.                 localFileHeader.extendedHeaders = extendedHeaders.toArray(new byte[0][]);

  259.                 return localFileHeader;
  260.             }
  261.         }
  262.     }

  263.     private MainHeader readMainHeader() throws IOException {
  264.         final byte[] basicHeaderBytes = readHeader();
  265.         if (basicHeaderBytes == null) {
  266.             throw new IOException("Archive ends without any headers");
  267.         }
  268.         final DataInputStream basicHeader = new DataInputStream(new ByteArrayInputStream(basicHeaderBytes));

  269.         final int firstHeaderSize = basicHeader.readUnsignedByte();
  270.         final byte[] firstHeaderBytes = readRange(basicHeader, firstHeaderSize - 1);
  271.         pushedBackBytes(firstHeaderBytes.length);

  272.         final DataInputStream firstHeader = new DataInputStream(new ByteArrayInputStream(firstHeaderBytes));

  273.         final MainHeader hdr = new MainHeader();
  274.         hdr.archiverVersionNumber = firstHeader.readUnsignedByte();
  275.         hdr.minVersionToExtract = firstHeader.readUnsignedByte();
  276.         hdr.hostOS = firstHeader.readUnsignedByte();
  277.         hdr.arjFlags = firstHeader.readUnsignedByte();
  278.         hdr.securityVersion = firstHeader.readUnsignedByte();
  279.         hdr.fileType = firstHeader.readUnsignedByte();
  280.         hdr.reserved = firstHeader.readUnsignedByte();
  281.         hdr.dateTimeCreated = read32(firstHeader);
  282.         hdr.dateTimeModified = read32(firstHeader);
  283.         hdr.archiveSize = 0xffffFFFFL & read32(firstHeader);
  284.         hdr.securityEnvelopeFilePosition = read32(firstHeader);
  285.         hdr.fileSpecPosition = read16(firstHeader);
  286.         hdr.securityEnvelopeLength = read16(firstHeader);
  287.         pushedBackBytes(20); // count has already counted them via readRange
  288.         hdr.encryptionVersion = firstHeader.readUnsignedByte();
  289.         hdr.lastChapter = firstHeader.readUnsignedByte();

  290.         if (firstHeaderSize >= 33) {
  291.             hdr.arjProtectionFactor = firstHeader.readUnsignedByte();
  292.             hdr.arjFlags2 = firstHeader.readUnsignedByte();
  293.             firstHeader.readUnsignedByte();
  294.             firstHeader.readUnsignedByte();
  295.         }

  296.         hdr.name = readString(basicHeader);
  297.         hdr.comment = readString(basicHeader);

  298.         final int extendedHeaderSize = read16(dis);
  299.         if (extendedHeaderSize > 0) {
  300.             hdr.extendedHeaderBytes = readRange(dis, extendedHeaderSize);
  301.             final long extendedHeaderCrc32 = 0xffffFFFFL & read32(dis);
  302.             final CRC32 crc32 = new CRC32();
  303.             crc32.update(hdr.extendedHeaderBytes);
  304.             if (extendedHeaderCrc32 != crc32.getValue()) {
  305.                 throw new IOException("Extended header CRC32 verification failure");
  306.             }
  307.         }

  308.         return hdr;
  309.     }

  310.     private byte[] readRange(final InputStream in, final int len) throws IOException {
  311.         final byte[] b = IOUtils.readRange(in, len);
  312.         count(b.length);
  313.         if (b.length < len) {
  314.             throw new EOFException();
  315.         }
  316.         return b;
  317.     }

  318.     private String readString(final DataInputStream dataIn) throws IOException {
  319.         try (ByteArrayOutputStream buffer = new ByteArrayOutputStream()) {
  320.             int nextByte;
  321.             while ((nextByte = dataIn.readUnsignedByte()) != 0) {
  322.                 buffer.write(nextByte);
  323.             }
  324.             return buffer.toString(getCharset().name());
  325.         }
  326.     }
  327. }