ArArchiveInputStream.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one
  3.  * or more contributor license agreements.  See the NOTICE file
  4.  * distributed with this work for additional information
  5.  * regarding copyright ownership.  The ASF licenses this file
  6.  * to you under the Apache License, Version 2.0 (the
  7.  * "License"); you may not use this file except in compliance
  8.  * with the License.  You may obtain a copy of the License at
  9.  *
  10.  * http://www.apache.org/licenses/LICENSE-2.0
  11.  *
  12.  * Unless required by applicable law or agreed to in writing,
  13.  * software distributed under the License is distributed on an
  14.  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15.  * KIND, either express or implied.  See the License for the
  16.  * specific language governing permissions and limitations
  17.  * under the License.
  18.  */
  19. package org.apache.commons.compress.archivers.ar;

  20. import java.io.EOFException;
  21. import java.io.IOException;
  22. import java.io.InputStream;
  23. import java.nio.charset.StandardCharsets;
  24. import java.util.Arrays;
  25. import java.util.regex.Pattern;

  26. import org.apache.commons.compress.archivers.ArchiveInputStream;
  27. import org.apache.commons.compress.utils.ArchiveUtils;
  28. import org.apache.commons.compress.utils.IOUtils;
  29. import org.apache.commons.compress.utils.ParsingUtils;

  30. /**
  31.  * Implements the "ar" archive format as an input stream.
  32.  *
  33.  * @NotThreadSafe
  34.  */
  35. public class ArArchiveInputStream extends ArchiveInputStream<ArArchiveEntry> {

  36.     // offsets and length of meta data parts
  37.     private static final int NAME_OFFSET = 0;
  38.     private static final int NAME_LEN = 16;
  39.     private static final int LAST_MODIFIED_OFFSET = NAME_LEN;

  40.     private static final int LAST_MODIFIED_LEN = 12;

  41.     private static final int USER_ID_OFFSET = LAST_MODIFIED_OFFSET + LAST_MODIFIED_LEN;

  42.     private static final int USER_ID_LEN = 6;

  43.     private static final int GROUP_ID_OFFSET = USER_ID_OFFSET + USER_ID_LEN;
  44.     private static final int GROUP_ID_LEN = 6;
  45.     private static final int FILE_MODE_OFFSET = GROUP_ID_OFFSET + GROUP_ID_LEN;
  46.     private static final int FILE_MODE_LEN = 8;
  47.     private static final int LENGTH_OFFSET = FILE_MODE_OFFSET + FILE_MODE_LEN;
  48.     private static final int LENGTH_LEN = 10;
  49.     static final String BSD_LONGNAME_PREFIX = "#1/";
  50.     private static final int BSD_LONGNAME_PREFIX_LEN = BSD_LONGNAME_PREFIX.length();
  51.     private static final Pattern BSD_LONGNAME_PATTERN = Pattern.compile("^" + BSD_LONGNAME_PREFIX + "\\d+");
  52.     private static final String GNU_STRING_TABLE_NAME = "//";
  53.     private static final Pattern GNU_LONGNAME_PATTERN = Pattern.compile("^/\\d+");

  54.     /**
  55.      * Does the name look like it is a long name (or a name containing spaces) as encoded by BSD ar?
  56.      * <p>
  57.      * From the FreeBSD ar(5) man page:
  58.      * </p>
  59.      * <pre>
  60.      * BSD   In the BSD variant, names that are shorter than 16
  61.      *       characters and without embedded spaces are stored
  62.      *       directly in this field.  If a name has an embedded
  63.      *       space, or if it is longer than 16 characters, then
  64.      *       the string "#1/" followed by the decimal represen-
  65.      *       tation of the length of the file name is placed in
  66.      *       this field. The actual file name is stored immedi-
  67.      *       ately after the archive header.  The content of the
  68.      *       archive member follows the file name.  The ar_size
  69.      *       field of the header (see below) will then hold the
  70.      *       sum of the size of the file name and the size of
  71.      *       the member.
  72.      * </pre>
  73.      *
  74.      * @since 1.3
  75.      */
  76.     private static boolean isBSDLongName(final String name) {
  77.         return name != null && BSD_LONGNAME_PATTERN.matcher(name).matches();
  78.     }

  79.     /**
  80.      * Is this the name of the "Archive String Table" as used by SVR4/GNU to store long file names?
  81.      * <p>
  82.      * GNU ar stores multiple extended file names in the data section of a file with the name "//", this record is referred to by future headers.
  83.      * </p>
  84.      * <p>
  85.      * A header references an extended file name by storing a "/" followed by a decimal offset to the start of the file name in the extended file name data
  86.      * section.
  87.      * </p>
  88.      * <p>
  89.      * The format of the "//" file itself is simply a list of the long file names, each separated by one or more LF characters. Note that the decimal offsets
  90.      * are number of characters, not line or string number within the "//" file.
  91.      * </p>
  92.      */
  93.     private static boolean isGNUStringTable(final String name) {
  94.         return GNU_STRING_TABLE_NAME.equals(name);
  95.     }

  96.     /**
  97.      * Checks if the signature matches ASCII "!&lt;arch&gt;" followed by a single LF control character
  98.      *
  99.      * @param signature the bytes to check
  100.      * @param length    the number of bytes to check
  101.      * @return true, if this stream is an Ar archive stream, false otherwise
  102.      */
  103.     public static boolean matches(final byte[] signature, final int length) {
  104.         // 3c21 7261 6863 0a3e
  105.         // @formatter:off
  106.         return length >= 8 &&
  107.                 signature[0] == 0x21 &&
  108.                 signature[1] == 0x3c &&
  109.                 signature[2] == 0x61 &&
  110.                 signature[3] == 0x72 &&
  111.                 signature[4] == 0x63 &&
  112.                 signature[5] == 0x68 &&
  113.                 signature[6] == 0x3e &&
  114.                 signature[7] == 0x0a;
  115.         // @formatter:on
  116.     }

  117.     private long offset;

  118.     private boolean closed;

  119.     /*
  120.      * If getNextEntry has been called, the entry metadata is stored in currentEntry.
  121.      */
  122.     private ArArchiveEntry currentEntry;

  123.     /** Storage area for extra long names (GNU ar). */
  124.     private byte[] namebuffer;

  125.     /**
  126.      * The offset where the current entry started. -1 if no entry has been called
  127.      */
  128.     private long entryOffset = -1;

  129.     /** Cached buffer for meta data - must only be used locally in the class (COMPRESS-172 - reduce garbage collection). */
  130.     private final byte[] metaData = new byte[NAME_LEN + LAST_MODIFIED_LEN + USER_ID_LEN + GROUP_ID_LEN + FILE_MODE_LEN + LENGTH_LEN];

  131.     /**
  132.      * Constructs an Ar input stream with the referenced stream
  133.      *
  134.      * @param inputStream the ar input stream
  135.      */
  136.     public ArArchiveInputStream(final InputStream inputStream) {
  137.         super(inputStream, StandardCharsets.US_ASCII.name());
  138.     }

  139.     private int asInt(final byte[] byteArray, final int offset, final int len) throws IOException {
  140.         return asInt(byteArray, offset, len, 10, false);
  141.     }

  142.     private int asInt(final byte[] byteArray, final int offset, final int len, final boolean treatBlankAsZero) throws IOException {
  143.         return asInt(byteArray, offset, len, 10, treatBlankAsZero);
  144.     }

  145.     private int asInt(final byte[] byteArray, final int offset, final int len, final int base) throws IOException {
  146.         return asInt(byteArray, offset, len, base, false);
  147.     }

  148.     private int asInt(final byte[] byteArray, final int offset, final int len, final int base, final boolean treatBlankAsZero) throws IOException {
  149.         final String string = ArchiveUtils.toAsciiString(byteArray, offset, len).trim();
  150.         if (string.isEmpty() && treatBlankAsZero) {
  151.             return 0;
  152.         }
  153.         return ParsingUtils.parseIntValue(string, base);
  154.     }

  155.     private long asLong(final byte[] byteArray, final int offset, final int len) throws IOException {
  156.         return ParsingUtils.parseLongValue(ArchiveUtils.toAsciiString(byteArray, offset, len).trim());
  157.     }

  158.     /*
  159.      * (non-Javadoc)
  160.      *
  161.      * @see java.io.InputStream#close()
  162.      */
  163.     @Override
  164.     public void close() throws IOException {
  165.         if (!closed) {
  166.             closed = true;
  167.             in.close();
  168.         }
  169.         currentEntry = null;
  170.     }

  171.     /**
  172.      * Reads the real name from the current stream assuming the very first bytes to be read are the real file name.
  173.      *
  174.      * @see #isBSDLongName
  175.      *
  176.      * @since 1.3
  177.      */
  178.     private String getBSDLongName(final String bsdLongName) throws IOException {
  179.         final int nameLen = ParsingUtils.parseIntValue(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN));
  180.         final byte[] name = IOUtils.readRange(in, nameLen);
  181.         final int read = name.length;
  182.         trackReadBytes(read);
  183.         if (read != nameLen) {
  184.             throw new EOFException();
  185.         }
  186.         return ArchiveUtils.toAsciiString(name);
  187.     }

  188.     /**
  189.      * Gets an extended name from the GNU extended name buffer.
  190.      *
  191.      * @param offset pointer to entry within the buffer
  192.      * @return the extended file name; without trailing "/" if present.
  193.      * @throws IOException if name not found or buffer not set up
  194.      */
  195.     private String getExtendedName(final int offset) throws IOException {
  196.         if (namebuffer == null) {
  197.             throw new IOException("Cannot process GNU long file name as no // record was found");
  198.         }
  199.         for (int i = offset; i < namebuffer.length; i++) {
  200.             if (namebuffer[i] == '\012' || namebuffer[i] == 0) {
  201.                 // Avoid array errors
  202.                 if (i == 0) {
  203.                     break;
  204.                 }
  205.                 if (namebuffer[i - 1] == '/') {
  206.                     i--; // drop trailing /
  207.                 }
  208.                 // Check there is a something to return, otherwise break out of the loop
  209.                 if (i - offset > 0) {
  210.                     return ArchiveUtils.toAsciiString(namebuffer, offset, i - offset);
  211.                 }
  212.                 break;
  213.             }
  214.         }
  215.         throw new IOException("Failed to read entry: " + offset);
  216.     }

  217.     /**
  218.      * Returns the next AR entry in this stream.
  219.      *
  220.      * @return the next AR entry.
  221.      * @throws IOException if the entry could not be read
  222.      * @deprecated Use {@link #getNextEntry()}.
  223.      */
  224.     @Deprecated
  225.     public ArArchiveEntry getNextArEntry() throws IOException {
  226.         if (currentEntry != null) {
  227.             final long entryEnd = entryOffset + currentEntry.getLength();
  228.             final long skipped = org.apache.commons.io.IOUtils.skip(in, entryEnd - offset);
  229.             trackReadBytes(skipped);
  230.             currentEntry = null;
  231.         }
  232.         if (offset == 0) {
  233.             final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER);
  234.             final byte[] realized = IOUtils.readRange(in, expected.length);
  235.             final int read = realized.length;
  236.             trackReadBytes(read);
  237.             if (read != expected.length) {
  238.                 throw new IOException("Failed to read header. Occurred at byte: " + getBytesRead());
  239.             }
  240.             if (!Arrays.equals(expected, realized)) {
  241.                 throw new IOException("Invalid header " + ArchiveUtils.toAsciiString(realized));
  242.             }
  243.         }
  244.         if (offset % 2 != 0) {
  245.             if (in.read() < 0) {
  246.                 // hit eof
  247.                 return null;
  248.             }
  249.             trackReadBytes(1);
  250.         }
  251.         {
  252.             final int read = IOUtils.readFully(in, metaData);
  253.             trackReadBytes(read);
  254.             if (read == 0) {
  255.                 return null;
  256.             }
  257.             if (read < metaData.length) {
  258.                 throw new IOException("Truncated ar archive");
  259.             }
  260.         }
  261.         {
  262.             final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER);
  263.             final byte[] realized = IOUtils.readRange(in, expected.length);
  264.             final int read = realized.length;
  265.             trackReadBytes(read);
  266.             if (read != expected.length) {
  267.                 throw new IOException("Failed to read entry trailer. Occurred at byte: " + getBytesRead());
  268.             }
  269.             if (!Arrays.equals(expected, realized)) {
  270.                 throw new IOException("Invalid entry trailer. not read the content? Occurred at byte: " + getBytesRead());
  271.             }
  272.         }

  273.         entryOffset = offset;
  274.         // GNU ar uses a '/' to mark the end of the file name; this allows for the use of spaces without the use of an extended file name.
  275.         // entry name is stored as ASCII string
  276.         String temp = ArchiveUtils.toAsciiString(metaData, NAME_OFFSET, NAME_LEN).trim();
  277.         if (isGNUStringTable(temp)) { // GNU extended file names entry
  278.             currentEntry = readGNUStringTable(metaData, LENGTH_OFFSET, LENGTH_LEN);
  279.             return getNextArEntry();
  280.         }
  281.         long len;
  282.         try {
  283.             len = asLong(metaData, LENGTH_OFFSET, LENGTH_LEN);
  284.         } catch (final NumberFormatException ex) {
  285.             throw new IOException("Broken archive, unable to parse ar_size field as a number", ex);
  286.         }
  287.         if (temp.endsWith("/")) { // GNU terminator
  288.             temp = temp.substring(0, temp.length() - 1);
  289.         } else if (isGNULongName(temp)) {
  290.             final int off = ParsingUtils.parseIntValue(temp.substring(1)); // get the offset
  291.             temp = getExtendedName(off); // convert to the long name
  292.         } else if (isBSDLongName(temp)) {
  293.             temp = getBSDLongName(temp);
  294.             // entry length contained the length of the file name in
  295.             // addition to the real length of the entry.
  296.             // assume file name was ASCII, there is no "standard" otherwise
  297.             final int nameLen = temp.length();
  298.             len -= nameLen;
  299.             entryOffset += nameLen;
  300.         }
  301.         if (len < 0) {
  302.             throw new IOException("broken archive, entry with negative size");
  303.         }
  304.         try {
  305.             currentEntry = new ArArchiveEntry(temp, len, asInt(metaData, USER_ID_OFFSET, USER_ID_LEN, true),
  306.                     asInt(metaData, GROUP_ID_OFFSET, GROUP_ID_LEN, true), asInt(metaData, FILE_MODE_OFFSET, FILE_MODE_LEN, 8),
  307.                     asLong(metaData, LAST_MODIFIED_OFFSET, LAST_MODIFIED_LEN));
  308.             return currentEntry;
  309.         } catch (final NumberFormatException ex) {
  310.             throw new IOException("Broken archive, unable to parse entry metadata fields as numbers", ex);
  311.         }
  312.     }

  313.     /*
  314.      * (non-Javadoc)
  315.      *
  316.      * @see org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry()
  317.      */
  318.     @Override
  319.     public ArArchiveEntry getNextEntry() throws IOException {
  320.         return getNextArEntry();
  321.     }

  322.     /**
  323.      * Does the name look like it is a long name (or a name containing spaces) as encoded by SVR4/GNU ar?
  324.      *
  325.      * @see #isGNUStringTable
  326.      */
  327.     private boolean isGNULongName(final String name) {
  328.         return name != null && GNU_LONGNAME_PATTERN.matcher(name).matches();
  329.     }

  330.     /*
  331.      * (non-Javadoc)
  332.      *
  333.      * @see java.io.InputStream#read(byte[], int, int)
  334.      */
  335.     @Override
  336.     public int read(final byte[] b, final int off, final int len) throws IOException {
  337.         if (len == 0) {
  338.             return 0;
  339.         }
  340.         if (currentEntry == null) {
  341.             throw new IllegalStateException("No current ar entry");
  342.         }
  343.         final long entryEnd = entryOffset + currentEntry.getLength();
  344.         if (len < 0 || offset >= entryEnd) {
  345.             return -1;
  346.         }
  347.         final int toRead = (int) Math.min(len, entryEnd - offset);
  348.         final int ret = this.in.read(b, off, toRead);
  349.         trackReadBytes(ret);
  350.         return ret;
  351.     }

  352.     /**
  353.      * Reads the GNU archive String Table.
  354.      *
  355.      * @see #isGNUStringTable
  356.      */
  357.     private ArArchiveEntry readGNUStringTable(final byte[] length, final int offset, final int len) throws IOException {
  358.         int bufflen;
  359.         try {
  360.             bufflen = asInt(length, offset, len); // Assume length will fit in an int
  361.         } catch (final NumberFormatException ex) {
  362.             throw new IOException("Broken archive, unable to parse GNU string table length field as a number", ex);
  363.         }
  364.         namebuffer = IOUtils.readRange(in, bufflen);
  365.         final int read = namebuffer.length;
  366.         trackReadBytes(read);
  367.         if (read != bufflen) {
  368.             throw new IOException("Failed to read complete // record: expected=" + bufflen + " read=" + read);
  369.         }
  370.         return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen);
  371.     }

  372.     private void trackReadBytes(final long read) {
  373.         count(read);
  374.         if (read > 0) {
  375.             offset += read;
  376.         }
  377.     }
  378. }