View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.archivers.ar;
20  
21  import java.io.EOFException;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.nio.charset.StandardCharsets;
25  import java.util.Arrays;
26  import java.util.regex.Pattern;
27  
28  import org.apache.commons.compress.archivers.ArchiveInputStream;
29  import org.apache.commons.compress.utils.ArchiveUtils;
30  import org.apache.commons.compress.utils.IOUtils;
31  import org.apache.commons.compress.utils.ParsingUtils;
32  
33  /**
34   * Implements the "ar" archive format as an input stream.
35   *
36   * @NotThreadSafe
37   */
38  public class ArArchiveInputStream extends ArchiveInputStream<ArArchiveEntry> {
39  
40      // offsets and length of meta data parts
41      private static final int NAME_OFFSET = 0;
42      private static final int NAME_LEN = 16;
43      private static final int LAST_MODIFIED_OFFSET = NAME_LEN;
44  
45      private static final int LAST_MODIFIED_LEN = 12;
46  
47      private static final int USER_ID_OFFSET = LAST_MODIFIED_OFFSET + LAST_MODIFIED_LEN;
48  
49      private static final int USER_ID_LEN = 6;
50  
51      private static final int GROUP_ID_OFFSET = USER_ID_OFFSET + USER_ID_LEN;
52      private static final int GROUP_ID_LEN = 6;
53      private static final int FILE_MODE_OFFSET = GROUP_ID_OFFSET + GROUP_ID_LEN;
54      private static final int FILE_MODE_LEN = 8;
55      private static final int LENGTH_OFFSET = FILE_MODE_OFFSET + FILE_MODE_LEN;
56      private static final int LENGTH_LEN = 10;
57      static final String BSD_LONGNAME_PREFIX = "#1/";
58      private static final int BSD_LONGNAME_PREFIX_LEN = BSD_LONGNAME_PREFIX.length();
59      private static final Pattern BSD_LONGNAME_PATTERN = Pattern.compile("^" + BSD_LONGNAME_PREFIX + "\\d+");
60      private static final String GNU_STRING_TABLE_NAME = "//";
61      private static final Pattern GNU_LONGNAME_PATTERN = Pattern.compile("^/\\d+");
62  
63      /**
64       * Does the name look like it is a long name (or a name containing spaces) as encoded by BSD ar?
65       * <p>
66       * From the FreeBSD ar(5) man page:
67       * </p>
68       * <pre>
69       * BSD   In the BSD variant, names that are shorter than 16
70       *       characters and without embedded spaces are stored
71       *       directly in this field.  If a name has an embedded
72       *       space, or if it is longer than 16 characters, then
73       *       the string "#1/" followed by the decimal represen-
74       *       tation of the length of the file name is placed in
75       *       this field. The actual file name is stored immedi-
76       *       ately after the archive header.  The content of the
77       *       archive member follows the file name.  The ar_size
78       *       field of the header (see below) will then hold the
79       *       sum of the size of the file name and the size of
80       *       the member.
81       * </pre>
82       *
83       * @since 1.3
84       */
85      private static boolean isBSDLongName(final String name) {
86          return name != null && BSD_LONGNAME_PATTERN.matcher(name).matches();
87      }
88  
89      /**
90       * Is this the name of the "Archive String Table" as used by SVR4/GNU to store long file names?
91       * <p>
92       * GNU ar stores multiple extended file names in the data section of a file with the name "//", this record is referred to by future headers.
93       * </p>
94       * <p>
95       * A header references an extended file name by storing a "/" followed by a decimal offset to the start of the file name in the extended file name data
96       * section.
97       * </p>
98       * <p>
99       * The format of the "//" file itself is simply a list of the long file names, each separated by one or more LF characters. Note that the decimal offsets
100      * are number of characters, not line or string number within the "//" file.
101      * </p>
102      */
103     private static boolean isGNUStringTable(final String name) {
104         return GNU_STRING_TABLE_NAME.equals(name);
105     }
106 
107     /**
108      * Checks if the signature matches ASCII "!&lt;arch&gt;" followed by a single LF control character
109      *
110      * @param signature the bytes to check
111      * @param length    the number of bytes to check
112      * @return true, if this stream is an Ar archive stream, false otherwise
113      */
114     public static boolean matches(final byte[] signature, final int length) {
115         // 3c21 7261 6863 0a3e
116         // @formatter:off
117         return length >= 8 &&
118                 signature[0] == 0x21 &&
119                 signature[1] == 0x3c &&
120                 signature[2] == 0x61 &&
121                 signature[3] == 0x72 &&
122                 signature[4] == 0x63 &&
123                 signature[5] == 0x68 &&
124                 signature[6] == 0x3e &&
125                 signature[7] == 0x0a;
126         // @formatter:on
127     }
128 
129     private long offset;
130 
131     private boolean closed;
132 
133     /*
134      * If getNextEntry has been called, the entry metadata is stored in currentEntry.
135      */
136     private ArArchiveEntry currentEntry;
137 
138     /** Storage area for extra long names (GNU ar). */
139     private byte[] namebuffer;
140 
141     /**
142      * The offset where the current entry started. -1 if no entry has been called
143      */
144     private long entryOffset = -1;
145 
146     /** Cached buffer for meta data - must only be used locally in the class (COMPRESS-172 - reduce garbage collection). */
147     private final byte[] metaData = new byte[NAME_LEN + LAST_MODIFIED_LEN + USER_ID_LEN + GROUP_ID_LEN + FILE_MODE_LEN + LENGTH_LEN];
148 
149     /**
150      * Constructs an Ar input stream with the referenced stream
151      *
152      * @param inputStream the ar input stream
153      */
154     public ArArchiveInputStream(final InputStream inputStream) {
155         super(inputStream, StandardCharsets.US_ASCII.name());
156     }
157 
158     private int asInt(final byte[] byteArray, final int offset, final int len) throws IOException {
159         return asInt(byteArray, offset, len, 10, false);
160     }
161 
162     private int asInt(final byte[] byteArray, final int offset, final int len, final boolean treatBlankAsZero) throws IOException {
163         return asInt(byteArray, offset, len, 10, treatBlankAsZero);
164     }
165 
166     private int asInt(final byte[] byteArray, final int offset, final int len, final int base) throws IOException {
167         return asInt(byteArray, offset, len, base, false);
168     }
169 
170     private int asInt(final byte[] byteArray, final int offset, final int len, final int base, final boolean treatBlankAsZero) throws IOException {
171         final String string = ArchiveUtils.toAsciiString(byteArray, offset, len).trim();
172         if (string.isEmpty() && treatBlankAsZero) {
173             return 0;
174         }
175         return ParsingUtils.parseIntValue(string, base);
176     }
177 
178     private long asLong(final byte[] byteArray, final int offset, final int len) throws IOException {
179         return ParsingUtils.parseLongValue(ArchiveUtils.toAsciiString(byteArray, offset, len).trim());
180     }
181 
182     /*
183      * (non-Javadoc)
184      *
185      * @see java.io.InputStream#close()
186      */
187     @Override
188     public void close() throws IOException {
189         if (!closed) {
190             closed = true;
191             in.close();
192         }
193         currentEntry = null;
194     }
195 
196     /**
197      * Reads the real name from the current stream assuming the very first bytes to be read are the real file name.
198      *
199      * @see #isBSDLongName
200      * @since 1.3
201      */
202     private String getBSDLongName(final String bsdLongName) throws IOException {
203         final int nameLen = ParsingUtils.parseIntValue(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN));
204         final byte[] name = IOUtils.readRange(in, nameLen);
205         final int read = name.length;
206         trackReadBytes(read);
207         if (read != nameLen) {
208             throw new EOFException(bsdLongName);
209         }
210         return ArchiveUtils.toAsciiString(name);
211     }
212 
213     /**
214      * Gets an extended name from the GNU extended name buffer.
215      *
216      * @param offset pointer to entry within the buffer
217      * @return the extended file name; without trailing "/" if present.
218      * @throws IOException if name not found or buffer not set up
219      */
220     private String getExtendedName(final int offset) throws IOException {
221         if (namebuffer == null) {
222             throw new IOException("Cannot process GNU long file name as no // record was found");
223         }
224         for (int i = offset; i < namebuffer.length; i++) {
225             if (namebuffer[i] == '\012' || namebuffer[i] == 0) {
226                 // Avoid array errors
227                 if (i == 0) {
228                     break;
229                 }
230                 if (namebuffer[i - 1] == '/') {
231                     i--; // drop trailing /
232                 }
233                 // Check there is a something to return, otherwise break out of the loop
234                 if (i - offset > 0) {
235                     return ArchiveUtils.toAsciiString(namebuffer, offset, i - offset);
236                 }
237                 break;
238             }
239         }
240         throw new IOException("Failed to read entry: " + offset);
241     }
242 
243     /**
244      * Returns the next AR entry in this stream.
245      *
246      * @return the next AR entry.
247      * @throws IOException if the entry could not be read
248      * @deprecated Use {@link #getNextEntry()}.
249      */
250     @Deprecated
251     public ArArchiveEntry getNextArEntry() throws IOException {
252         if (currentEntry != null) {
253             final long entryEnd = entryOffset + currentEntry.getLength();
254             final long skipped = org.apache.commons.io.IOUtils.skip(in, entryEnd - offset);
255             trackReadBytes(skipped);
256             currentEntry = null;
257         }
258         if (offset == 0) {
259             final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER);
260             final byte[] realized = IOUtils.readRange(in, expected.length);
261             final int read = realized.length;
262             trackReadBytes(read);
263             if (read != expected.length) {
264                 throw new IOException("Failed to read header. Occurred at byte: " + getBytesRead());
265             }
266             if (!Arrays.equals(expected, realized)) {
267                 throw new IOException("Invalid header " + ArchiveUtils.toAsciiString(realized));
268             }
269         }
270         if (offset % 2 != 0) {
271             if (in.read() < 0) {
272                 // hit eof
273                 return null;
274             }
275             trackReadBytes(1);
276         }
277         {
278             final int read = IOUtils.readFully(in, metaData);
279             trackReadBytes(read);
280             if (read == 0) {
281                 return null;
282             }
283             if (read < metaData.length) {
284                 throw new IOException("Truncated ar archive");
285             }
286         }
287         {
288             final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER);
289             final byte[] realized = IOUtils.readRange(in, expected.length);
290             final int read = realized.length;
291             trackReadBytes(read);
292             if (read != expected.length) {
293                 throw new IOException("Failed to read entry trailer. Occurred at byte: " + getBytesRead());
294             }
295             if (!Arrays.equals(expected, realized)) {
296                 throw new IOException("Invalid entry trailer. not read the content? Occurred at byte: " + getBytesRead());
297             }
298         }
299 
300         entryOffset = offset;
301         // GNU ar uses a '/' to mark the end of the file name; this allows for the use of spaces without the use of an extended file name.
302         // entry name is stored as ASCII string
303         String temp = ArchiveUtils.toAsciiString(metaData, NAME_OFFSET, NAME_LEN).trim();
304         if (isGNUStringTable(temp)) { // GNU extended file names entry
305             currentEntry = readGNUStringTable(metaData, LENGTH_OFFSET, LENGTH_LEN);
306             return getNextArEntry();
307         }
308         long len;
309         try {
310             len = asLong(metaData, LENGTH_OFFSET, LENGTH_LEN);
311         } catch (final NumberFormatException ex) {
312             throw new IOException("Broken archive, unable to parse ar_size field as a number", ex);
313         }
314         if (temp.endsWith("/")) { // GNU terminator
315             temp = temp.substring(0, temp.length() - 1);
316         } else if (isGNULongName(temp)) {
317             final int off = ParsingUtils.parseIntValue(temp.substring(1)); // get the offset
318             temp = getExtendedName(off); // convert to the long name
319         } else if (isBSDLongName(temp)) {
320             temp = getBSDLongName(temp);
321             // entry length contained the length of the file name in
322             // addition to the real length of the entry.
323             // assume file name was ASCII, there is no "standard" otherwise
324             final int nameLen = temp.length();
325             len -= nameLen;
326             entryOffset += nameLen;
327         }
328         if (len < 0) {
329             throw new IOException("broken archive, entry with negative size");
330         }
331         try {
332             currentEntry = new ArArchiveEntry(temp, len, asInt(metaData, USER_ID_OFFSET, USER_ID_LEN, true),
333                     asInt(metaData, GROUP_ID_OFFSET, GROUP_ID_LEN, true), asInt(metaData, FILE_MODE_OFFSET, FILE_MODE_LEN, 8),
334                     asLong(metaData, LAST_MODIFIED_OFFSET, LAST_MODIFIED_LEN));
335             return currentEntry;
336         } catch (final NumberFormatException ex) {
337             throw new IOException("Broken archive, unable to parse entry metadata fields as numbers", ex);
338         }
339     }
340 
341     /*
342      * (non-Javadoc)
343      *
344      * @see org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry()
345      */
346     @Override
347     public ArArchiveEntry getNextEntry() throws IOException {
348         return getNextArEntry();
349     }
350 
351     /**
352      * Does the name look like it is a long name (or a name containing spaces) as encoded by SVR4/GNU ar?
353      *
354      * @see #isGNUStringTable
355      */
356     private boolean isGNULongName(final String name) {
357         return name != null && GNU_LONGNAME_PATTERN.matcher(name).matches();
358     }
359 
360     /*
361      * (non-Javadoc)
362      *
363      * @see java.io.InputStream#read(byte[], int, int)
364      */
365     @Override
366     public int read(final byte[] b, final int off, final int len) throws IOException {
367         if (len == 0) {
368             return 0;
369         }
370         if (currentEntry == null) {
371             throw new IllegalStateException("No current ar entry");
372         }
373         final long entryEnd = entryOffset + currentEntry.getLength();
374         if (len < 0 || offset >= entryEnd) {
375             return -1;
376         }
377         final int toRead = (int) Math.min(len, entryEnd - offset);
378         final int ret = this.in.read(b, off, toRead);
379         trackReadBytes(ret);
380         return ret;
381     }
382 
383     /**
384      * Reads the GNU archive String Table.
385      *
386      * @see #isGNUStringTable
387      */
388     private ArArchiveEntry readGNUStringTable(final byte[] length, final int offset, final int len) throws IOException {
389         final int bufflen;
390         try {
391             bufflen = asInt(length, offset, len); // Assume length will fit in an int
392         } catch (final NumberFormatException ex) {
393             throw new IOException("Broken archive, unable to parse GNU string table length field as a number", ex);
394         }
395         namebuffer = IOUtils.readRange(in, bufflen);
396         final int read = namebuffer.length;
397         trackReadBytes(read);
398         if (read != bufflen) {
399             throw new IOException("Failed to read complete // record: expected=" + bufflen + " read=" + read);
400         }
401         return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen);
402     }
403 
404     private void trackReadBytes(final long read) {
405         count(read);
406         if (read > 0) {
407             offset += read;
408         }
409     }
410 }