001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.ar;
020
021import java.io.EOFException;
022import java.io.IOException;
023import java.io.InputStream;
024import java.nio.charset.StandardCharsets;
025import java.util.Arrays;
026import java.util.regex.Pattern;
027
028import org.apache.commons.compress.archivers.ArchiveInputStream;
029import org.apache.commons.compress.utils.ArchiveUtils;
030import org.apache.commons.compress.utils.IOUtils;
031import org.apache.commons.compress.utils.ParsingUtils;
032
033/**
034 * Implements the "ar" archive format as an input stream.
035 *
036 * @NotThreadSafe
037 */
038public class ArArchiveInputStream extends ArchiveInputStream<ArArchiveEntry> {
039
040    // offsets and length of meta data parts
041    private static final int NAME_OFFSET = 0;
042    private static final int NAME_LEN = 16;
043    private static final int LAST_MODIFIED_OFFSET = NAME_LEN;
044
045    private static final int LAST_MODIFIED_LEN = 12;
046
047    private static final int USER_ID_OFFSET = LAST_MODIFIED_OFFSET + LAST_MODIFIED_LEN;
048
049    private static final int USER_ID_LEN = 6;
050
051    private static final int GROUP_ID_OFFSET = USER_ID_OFFSET + USER_ID_LEN;
052    private static final int GROUP_ID_LEN = 6;
053    private static final int FILE_MODE_OFFSET = GROUP_ID_OFFSET + GROUP_ID_LEN;
054    private static final int FILE_MODE_LEN = 8;
055    private static final int LENGTH_OFFSET = FILE_MODE_OFFSET + FILE_MODE_LEN;
056    private static final int LENGTH_LEN = 10;
057    static final String BSD_LONGNAME_PREFIX = "#1/";
058    private static final int BSD_LONGNAME_PREFIX_LEN = BSD_LONGNAME_PREFIX.length();
059    private static final Pattern BSD_LONGNAME_PATTERN = Pattern.compile("^" + BSD_LONGNAME_PREFIX + "\\d+");
060    private static final String GNU_STRING_TABLE_NAME = "//";
061    private static final Pattern GNU_LONGNAME_PATTERN = Pattern.compile("^/\\d+");
062
063    /**
064     * Does the name look like it is a long name (or a name containing spaces) as encoded by BSD ar?
065     * <p>
066     * From the FreeBSD ar(5) man page:
067     * </p>
068     * <pre>
069     * BSD   In the BSD variant, names that are shorter than 16
070     *       characters and without embedded spaces are stored
071     *       directly in this field.  If a name has an embedded
072     *       space, or if it is longer than 16 characters, then
073     *       the string "#1/" followed by the decimal represen-
074     *       tation of the length of the file name is placed in
075     *       this field. The actual file name is stored immedi-
076     *       ately after the archive header.  The content of the
077     *       archive member follows the file name.  The ar_size
078     *       field of the header (see below) will then hold the
079     *       sum of the size of the file name and the size of
080     *       the member.
081     * </pre>
082     *
083     * @since 1.3
084     */
085    private static boolean isBSDLongName(final String name) {
086        return name != null && BSD_LONGNAME_PATTERN.matcher(name).matches();
087    }
088
089    /**
090     * Is this the name of the "Archive String Table" as used by SVR4/GNU to store long file names?
091     * <p>
092     * GNU ar stores multiple extended file names in the data section of a file with the name "//", this record is referred to by future headers.
093     * </p>
094     * <p>
095     * A header references an extended file name by storing a "/" followed by a decimal offset to the start of the file name in the extended file name data
096     * section.
097     * </p>
098     * <p>
099     * The format of the "//" file itself is simply a list of the long file names, each separated by one or more LF characters. Note that the decimal offsets
100     * are number of characters, not line or string number within the "//" file.
101     * </p>
102     */
103    private static boolean isGNUStringTable(final String name) {
104        return GNU_STRING_TABLE_NAME.equals(name);
105    }
106
107    /**
108     * Checks if the signature matches ASCII "!&lt;arch&gt;" followed by a single LF control character
109     *
110     * @param signature the bytes to check
111     * @param length    the number of bytes to check
112     * @return true, if this stream is an Ar archive stream, false otherwise
113     */
114    public static boolean matches(final byte[] signature, final int length) {
115        // 3c21 7261 6863 0a3e
116        // @formatter:off
117        return length >= 8 &&
118                signature[0] == 0x21 &&
119                signature[1] == 0x3c &&
120                signature[2] == 0x61 &&
121                signature[3] == 0x72 &&
122                signature[4] == 0x63 &&
123                signature[5] == 0x68 &&
124                signature[6] == 0x3e &&
125                signature[7] == 0x0a;
126        // @formatter:on
127    }
128
129    private long offset;
130
131    private boolean closed;
132
133    /*
134     * If getNextEntry has been called, the entry metadata is stored in currentEntry.
135     */
136    private ArArchiveEntry currentEntry;
137
138    /** Storage area for extra long names (GNU ar). */
139    private byte[] namebuffer;
140
141    /**
142     * The offset where the current entry started. -1 if no entry has been called
143     */
144    private long entryOffset = -1;
145
146    /** Cached buffer for meta data - must only be used locally in the class (COMPRESS-172 - reduce garbage collection). */
147    private final byte[] metaData = new byte[NAME_LEN + LAST_MODIFIED_LEN + USER_ID_LEN + GROUP_ID_LEN + FILE_MODE_LEN + LENGTH_LEN];
148
149    /**
150     * Constructs an Ar input stream with the referenced stream
151     *
152     * @param inputStream the ar input stream
153     */
154    public ArArchiveInputStream(final InputStream inputStream) {
155        super(inputStream, StandardCharsets.US_ASCII.name());
156    }
157
158    private int asInt(final byte[] byteArray, final int offset, final int len) throws IOException {
159        return asInt(byteArray, offset, len, 10, false);
160    }
161
162    private int asInt(final byte[] byteArray, final int offset, final int len, final boolean treatBlankAsZero) throws IOException {
163        return asInt(byteArray, offset, len, 10, treatBlankAsZero);
164    }
165
166    private int asInt(final byte[] byteArray, final int offset, final int len, final int base) throws IOException {
167        return asInt(byteArray, offset, len, base, false);
168    }
169
170    private int asInt(final byte[] byteArray, final int offset, final int len, final int base, final boolean treatBlankAsZero) throws IOException {
171        final String string = ArchiveUtils.toAsciiString(byteArray, offset, len).trim();
172        if (string.isEmpty() && treatBlankAsZero) {
173            return 0;
174        }
175        return ParsingUtils.parseIntValue(string, base);
176    }
177
178    private long asLong(final byte[] byteArray, final int offset, final int len) throws IOException {
179        return ParsingUtils.parseLongValue(ArchiveUtils.toAsciiString(byteArray, offset, len).trim());
180    }
181
182    /*
183     * (non-Javadoc)
184     *
185     * @see java.io.InputStream#close()
186     */
187    @Override
188    public void close() throws IOException {
189        if (!closed) {
190            closed = true;
191            in.close();
192        }
193        currentEntry = null;
194    }
195
196    /**
197     * Reads the real name from the current stream assuming the very first bytes to be read are the real file name.
198     *
199     * @see #isBSDLongName
200     * @since 1.3
201     */
202    private String getBSDLongName(final String bsdLongName) throws IOException {
203        final int nameLen = ParsingUtils.parseIntValue(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN));
204        final byte[] name = IOUtils.readRange(in, nameLen);
205        final int read = name.length;
206        trackReadBytes(read);
207        if (read != nameLen) {
208            throw new EOFException(bsdLongName);
209        }
210        return ArchiveUtils.toAsciiString(name);
211    }
212
213    /**
214     * Gets an extended name from the GNU extended name buffer.
215     *
216     * @param offset pointer to entry within the buffer
217     * @return the extended file name; without trailing "/" if present.
218     * @throws IOException if name not found or buffer not set up
219     */
220    private String getExtendedName(final int offset) throws IOException {
221        if (namebuffer == null) {
222            throw new IOException("Cannot process GNU long file name as no // record was found");
223        }
224        for (int i = offset; i < namebuffer.length; i++) {
225            if (namebuffer[i] == '\012' || namebuffer[i] == 0) {
226                // Avoid array errors
227                if (i == 0) {
228                    break;
229                }
230                if (namebuffer[i - 1] == '/') {
231                    i--; // drop trailing /
232                }
233                // Check there is a something to return, otherwise break out of the loop
234                if (i - offset > 0) {
235                    return ArchiveUtils.toAsciiString(namebuffer, offset, i - offset);
236                }
237                break;
238            }
239        }
240        throw new IOException("Failed to read entry: " + offset);
241    }
242
243    /**
244     * Returns the next AR entry in this stream.
245     *
246     * @return the next AR entry.
247     * @throws IOException if the entry could not be read
248     * @deprecated Use {@link #getNextEntry()}.
249     */
250    @Deprecated
251    public ArArchiveEntry getNextArEntry() throws IOException {
252        if (currentEntry != null) {
253            final long entryEnd = entryOffset + currentEntry.getLength();
254            final long skipped = org.apache.commons.io.IOUtils.skip(in, entryEnd - offset);
255            trackReadBytes(skipped);
256            currentEntry = null;
257        }
258        if (offset == 0) {
259            final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER);
260            final byte[] realized = IOUtils.readRange(in, expected.length);
261            final int read = realized.length;
262            trackReadBytes(read);
263            if (read != expected.length) {
264                throw new IOException("Failed to read header. Occurred at byte: " + getBytesRead());
265            }
266            if (!Arrays.equals(expected, realized)) {
267                throw new IOException("Invalid header " + ArchiveUtils.toAsciiString(realized));
268            }
269        }
270        if (offset % 2 != 0) {
271            if (in.read() < 0) {
272                // hit eof
273                return null;
274            }
275            trackReadBytes(1);
276        }
277        {
278            final int read = IOUtils.readFully(in, metaData);
279            trackReadBytes(read);
280            if (read == 0) {
281                return null;
282            }
283            if (read < metaData.length) {
284                throw new IOException("Truncated ar archive");
285            }
286        }
287        {
288            final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER);
289            final byte[] realized = IOUtils.readRange(in, expected.length);
290            final int read = realized.length;
291            trackReadBytes(read);
292            if (read != expected.length) {
293                throw new IOException("Failed to read entry trailer. Occurred at byte: " + getBytesRead());
294            }
295            if (!Arrays.equals(expected, realized)) {
296                throw new IOException("Invalid entry trailer. not read the content? Occurred at byte: " + getBytesRead());
297            }
298        }
299
300        entryOffset = offset;
301        // GNU ar uses a '/' to mark the end of the file name; this allows for the use of spaces without the use of an extended file name.
302        // entry name is stored as ASCII string
303        String temp = ArchiveUtils.toAsciiString(metaData, NAME_OFFSET, NAME_LEN).trim();
304        if (isGNUStringTable(temp)) { // GNU extended file names entry
305            currentEntry = readGNUStringTable(metaData, LENGTH_OFFSET, LENGTH_LEN);
306            return getNextArEntry();
307        }
308        long len;
309        try {
310            len = asLong(metaData, LENGTH_OFFSET, LENGTH_LEN);
311        } catch (final NumberFormatException ex) {
312            throw new IOException("Broken archive, unable to parse ar_size field as a number", ex);
313        }
314        if (temp.endsWith("/")) { // GNU terminator
315            temp = temp.substring(0, temp.length() - 1);
316        } else if (isGNULongName(temp)) {
317            final int off = ParsingUtils.parseIntValue(temp.substring(1)); // get the offset
318            temp = getExtendedName(off); // convert to the long name
319        } else if (isBSDLongName(temp)) {
320            temp = getBSDLongName(temp);
321            // entry length contained the length of the file name in
322            // addition to the real length of the entry.
323            // assume file name was ASCII, there is no "standard" otherwise
324            final int nameLen = temp.length();
325            len -= nameLen;
326            entryOffset += nameLen;
327        }
328        if (len < 0) {
329            throw new IOException("broken archive, entry with negative size");
330        }
331        try {
332            currentEntry = new ArArchiveEntry(temp, len, asInt(metaData, USER_ID_OFFSET, USER_ID_LEN, true),
333                    asInt(metaData, GROUP_ID_OFFSET, GROUP_ID_LEN, true), asInt(metaData, FILE_MODE_OFFSET, FILE_MODE_LEN, 8),
334                    asLong(metaData, LAST_MODIFIED_OFFSET, LAST_MODIFIED_LEN));
335            return currentEntry;
336        } catch (final NumberFormatException ex) {
337            throw new IOException("Broken archive, unable to parse entry metadata fields as numbers", ex);
338        }
339    }
340
341    /*
342     * (non-Javadoc)
343     *
344     * @see org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry()
345     */
346    @Override
347    public ArArchiveEntry getNextEntry() throws IOException {
348        return getNextArEntry();
349    }
350
351    /**
352     * Does the name look like it is a long name (or a name containing spaces) as encoded by SVR4/GNU ar?
353     *
354     * @see #isGNUStringTable
355     */
356    private boolean isGNULongName(final String name) {
357        return name != null && GNU_LONGNAME_PATTERN.matcher(name).matches();
358    }
359
360    /*
361     * (non-Javadoc)
362     *
363     * @see java.io.InputStream#read(byte[], int, int)
364     */
365    @Override
366    public int read(final byte[] b, final int off, final int len) throws IOException {
367        if (len == 0) {
368            return 0;
369        }
370        if (currentEntry == null) {
371            throw new IllegalStateException("No current ar entry");
372        }
373        final long entryEnd = entryOffset + currentEntry.getLength();
374        if (len < 0 || offset >= entryEnd) {
375            return -1;
376        }
377        final int toRead = (int) Math.min(len, entryEnd - offset);
378        final int ret = this.in.read(b, off, toRead);
379        trackReadBytes(ret);
380        return ret;
381    }
382
383    /**
384     * Reads the GNU archive String Table.
385     *
386     * @see #isGNUStringTable
387     */
388    private ArArchiveEntry readGNUStringTable(final byte[] length, final int offset, final int len) throws IOException {
389        final int bufflen;
390        try {
391            bufflen = asInt(length, offset, len); // Assume length will fit in an int
392        } catch (final NumberFormatException ex) {
393            throw new IOException("Broken archive, unable to parse GNU string table length field as a number", ex);
394        }
395        namebuffer = IOUtils.readRange(in, bufflen);
396        final int read = namebuffer.length;
397        trackReadBytes(read);
398        if (read != bufflen) {
399            throw new IOException("Failed to read complete // record: expected=" + bufflen + " read=" + read);
400        }
401        return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen);
402    }
403
404    private void trackReadBytes(final long read) {
405        count(read);
406        if (read > 0) {
407            offset += read;
408        }
409    }
410}