001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.ar; 020 021import java.io.EOFException; 022import java.io.IOException; 023import java.io.InputStream; 024import java.nio.charset.StandardCharsets; 025import java.util.Arrays; 026import java.util.regex.Pattern; 027 028import org.apache.commons.compress.archivers.ArchiveInputStream; 029import org.apache.commons.compress.utils.ArchiveUtils; 030import org.apache.commons.compress.utils.IOUtils; 031import org.apache.commons.compress.utils.ParsingUtils; 032 033/** 034 * Implements the "ar" archive format as an input stream. 035 * 036 * @NotThreadSafe 037 */ 038public class ArArchiveInputStream extends ArchiveInputStream<ArArchiveEntry> { 039 040 // offsets and length of meta data parts 041 private static final int NAME_OFFSET = 0; 042 private static final int NAME_LEN = 16; 043 private static final int LAST_MODIFIED_OFFSET = NAME_LEN; 044 045 private static final int LAST_MODIFIED_LEN = 12; 046 047 private static final int USER_ID_OFFSET = LAST_MODIFIED_OFFSET + LAST_MODIFIED_LEN; 048 049 private static final int USER_ID_LEN = 6; 050 051 private static final int GROUP_ID_OFFSET = USER_ID_OFFSET + USER_ID_LEN; 052 private static final int GROUP_ID_LEN = 6; 053 private static final int FILE_MODE_OFFSET = GROUP_ID_OFFSET + GROUP_ID_LEN; 054 private static final int FILE_MODE_LEN = 8; 055 private static final int LENGTH_OFFSET = FILE_MODE_OFFSET + FILE_MODE_LEN; 056 private static final int LENGTH_LEN = 10; 057 static final String BSD_LONGNAME_PREFIX = "#1/"; 058 private static final int BSD_LONGNAME_PREFIX_LEN = BSD_LONGNAME_PREFIX.length(); 059 private static final Pattern BSD_LONGNAME_PATTERN = Pattern.compile("^" + BSD_LONGNAME_PREFIX + "\\d+"); 060 private static final String GNU_STRING_TABLE_NAME = "//"; 061 private static final Pattern GNU_LONGNAME_PATTERN = Pattern.compile("^/\\d+"); 062 063 /** 064 * Does the name look like it is a long name (or a name containing spaces) as encoded by BSD ar? 065 * <p> 066 * From the FreeBSD ar(5) man page: 067 * </p> 068 * <pre> 069 * BSD In the BSD variant, names that are shorter than 16 070 * characters and without embedded spaces are stored 071 * directly in this field. If a name has an embedded 072 * space, or if it is longer than 16 characters, then 073 * the string "#1/" followed by the decimal represen- 074 * tation of the length of the file name is placed in 075 * this field. The actual file name is stored immedi- 076 * ately after the archive header. The content of the 077 * archive member follows the file name. The ar_size 078 * field of the header (see below) will then hold the 079 * sum of the size of the file name and the size of 080 * the member. 081 * </pre> 082 * 083 * @since 1.3 084 */ 085 private static boolean isBSDLongName(final String name) { 086 return name != null && BSD_LONGNAME_PATTERN.matcher(name).matches(); 087 } 088 089 /** 090 * Is this the name of the "Archive String Table" as used by SVR4/GNU to store long file names? 091 * <p> 092 * GNU ar stores multiple extended file names in the data section of a file with the name "//", this record is referred to by future headers. 093 * </p> 094 * <p> 095 * A header references an extended file name by storing a "/" followed by a decimal offset to the start of the file name in the extended file name data 096 * section. 097 * </p> 098 * <p> 099 * The format of the "//" file itself is simply a list of the long file names, each separated by one or more LF characters. Note that the decimal offsets 100 * are number of characters, not line or string number within the "//" file. 101 * </p> 102 */ 103 private static boolean isGNUStringTable(final String name) { 104 return GNU_STRING_TABLE_NAME.equals(name); 105 } 106 107 /** 108 * Checks if the signature matches ASCII "!<arch>" followed by a single LF control character 109 * 110 * @param signature the bytes to check 111 * @param length the number of bytes to check 112 * @return true, if this stream is an Ar archive stream, false otherwise 113 */ 114 public static boolean matches(final byte[] signature, final int length) { 115 // 3c21 7261 6863 0a3e 116 // @formatter:off 117 return length >= 8 && 118 signature[0] == 0x21 && 119 signature[1] == 0x3c && 120 signature[2] == 0x61 && 121 signature[3] == 0x72 && 122 signature[4] == 0x63 && 123 signature[5] == 0x68 && 124 signature[6] == 0x3e && 125 signature[7] == 0x0a; 126 // @formatter:on 127 } 128 129 private long offset; 130 131 private boolean closed; 132 133 /* 134 * If getNextEntry has been called, the entry metadata is stored in currentEntry. 135 */ 136 private ArArchiveEntry currentEntry; 137 138 /** Storage area for extra long names (GNU ar). */ 139 private byte[] namebuffer; 140 141 /** 142 * The offset where the current entry started. -1 if no entry has been called 143 */ 144 private long entryOffset = -1; 145 146 /** Cached buffer for meta data - must only be used locally in the class (COMPRESS-172 - reduce garbage collection). */ 147 private final byte[] metaData = new byte[NAME_LEN + LAST_MODIFIED_LEN + USER_ID_LEN + GROUP_ID_LEN + FILE_MODE_LEN + LENGTH_LEN]; 148 149 /** 150 * Constructs an Ar input stream with the referenced stream 151 * 152 * @param inputStream the ar input stream 153 */ 154 public ArArchiveInputStream(final InputStream inputStream) { 155 super(inputStream, StandardCharsets.US_ASCII.name()); 156 } 157 158 private int asInt(final byte[] byteArray, final int offset, final int len) throws IOException { 159 return asInt(byteArray, offset, len, 10, false); 160 } 161 162 private int asInt(final byte[] byteArray, final int offset, final int len, final boolean treatBlankAsZero) throws IOException { 163 return asInt(byteArray, offset, len, 10, treatBlankAsZero); 164 } 165 166 private int asInt(final byte[] byteArray, final int offset, final int len, final int base) throws IOException { 167 return asInt(byteArray, offset, len, base, false); 168 } 169 170 private int asInt(final byte[] byteArray, final int offset, final int len, final int base, final boolean treatBlankAsZero) throws IOException { 171 final String string = ArchiveUtils.toAsciiString(byteArray, offset, len).trim(); 172 if (string.isEmpty() && treatBlankAsZero) { 173 return 0; 174 } 175 return ParsingUtils.parseIntValue(string, base); 176 } 177 178 private long asLong(final byte[] byteArray, final int offset, final int len) throws IOException { 179 return ParsingUtils.parseLongValue(ArchiveUtils.toAsciiString(byteArray, offset, len).trim()); 180 } 181 182 /* 183 * (non-Javadoc) 184 * 185 * @see java.io.InputStream#close() 186 */ 187 @Override 188 public void close() throws IOException { 189 if (!closed) { 190 closed = true; 191 in.close(); 192 } 193 currentEntry = null; 194 } 195 196 /** 197 * Reads the real name from the current stream assuming the very first bytes to be read are the real file name. 198 * 199 * @see #isBSDLongName 200 * @since 1.3 201 */ 202 private String getBSDLongName(final String bsdLongName) throws IOException { 203 final int nameLen = ParsingUtils.parseIntValue(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN)); 204 final byte[] name = IOUtils.readRange(in, nameLen); 205 final int read = name.length; 206 trackReadBytes(read); 207 if (read != nameLen) { 208 throw new EOFException(bsdLongName); 209 } 210 return ArchiveUtils.toAsciiString(name); 211 } 212 213 /** 214 * Gets an extended name from the GNU extended name buffer. 215 * 216 * @param offset pointer to entry within the buffer 217 * @return the extended file name; without trailing "/" if present. 218 * @throws IOException if name not found or buffer not set up 219 */ 220 private String getExtendedName(final int offset) throws IOException { 221 if (namebuffer == null) { 222 throw new IOException("Cannot process GNU long file name as no // record was found"); 223 } 224 for (int i = offset; i < namebuffer.length; i++) { 225 if (namebuffer[i] == '\012' || namebuffer[i] == 0) { 226 // Avoid array errors 227 if (i == 0) { 228 break; 229 } 230 if (namebuffer[i - 1] == '/') { 231 i--; // drop trailing / 232 } 233 // Check there is a something to return, otherwise break out of the loop 234 if (i - offset > 0) { 235 return ArchiveUtils.toAsciiString(namebuffer, offset, i - offset); 236 } 237 break; 238 } 239 } 240 throw new IOException("Failed to read entry: " + offset); 241 } 242 243 /** 244 * Returns the next AR entry in this stream. 245 * 246 * @return the next AR entry. 247 * @throws IOException if the entry could not be read 248 * @deprecated Use {@link #getNextEntry()}. 249 */ 250 @Deprecated 251 public ArArchiveEntry getNextArEntry() throws IOException { 252 if (currentEntry != null) { 253 final long entryEnd = entryOffset + currentEntry.getLength(); 254 final long skipped = org.apache.commons.io.IOUtils.skip(in, entryEnd - offset); 255 trackReadBytes(skipped); 256 currentEntry = null; 257 } 258 if (offset == 0) { 259 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER); 260 final byte[] realized = IOUtils.readRange(in, expected.length); 261 final int read = realized.length; 262 trackReadBytes(read); 263 if (read != expected.length) { 264 throw new IOException("Failed to read header. Occurred at byte: " + getBytesRead()); 265 } 266 if (!Arrays.equals(expected, realized)) { 267 throw new IOException("Invalid header " + ArchiveUtils.toAsciiString(realized)); 268 } 269 } 270 if (offset % 2 != 0) { 271 if (in.read() < 0) { 272 // hit eof 273 return null; 274 } 275 trackReadBytes(1); 276 } 277 { 278 final int read = IOUtils.readFully(in, metaData); 279 trackReadBytes(read); 280 if (read == 0) { 281 return null; 282 } 283 if (read < metaData.length) { 284 throw new IOException("Truncated ar archive"); 285 } 286 } 287 { 288 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER); 289 final byte[] realized = IOUtils.readRange(in, expected.length); 290 final int read = realized.length; 291 trackReadBytes(read); 292 if (read != expected.length) { 293 throw new IOException("Failed to read entry trailer. Occurred at byte: " + getBytesRead()); 294 } 295 if (!Arrays.equals(expected, realized)) { 296 throw new IOException("Invalid entry trailer. not read the content? Occurred at byte: " + getBytesRead()); 297 } 298 } 299 300 entryOffset = offset; 301 // GNU ar uses a '/' to mark the end of the file name; this allows for the use of spaces without the use of an extended file name. 302 // entry name is stored as ASCII string 303 String temp = ArchiveUtils.toAsciiString(metaData, NAME_OFFSET, NAME_LEN).trim(); 304 if (isGNUStringTable(temp)) { // GNU extended file names entry 305 currentEntry = readGNUStringTable(metaData, LENGTH_OFFSET, LENGTH_LEN); 306 return getNextArEntry(); 307 } 308 long len; 309 try { 310 len = asLong(metaData, LENGTH_OFFSET, LENGTH_LEN); 311 } catch (final NumberFormatException ex) { 312 throw new IOException("Broken archive, unable to parse ar_size field as a number", ex); 313 } 314 if (temp.endsWith("/")) { // GNU terminator 315 temp = temp.substring(0, temp.length() - 1); 316 } else if (isGNULongName(temp)) { 317 final int off = ParsingUtils.parseIntValue(temp.substring(1)); // get the offset 318 temp = getExtendedName(off); // convert to the long name 319 } else if (isBSDLongName(temp)) { 320 temp = getBSDLongName(temp); 321 // entry length contained the length of the file name in 322 // addition to the real length of the entry. 323 // assume file name was ASCII, there is no "standard" otherwise 324 final int nameLen = temp.length(); 325 len -= nameLen; 326 entryOffset += nameLen; 327 } 328 if (len < 0) { 329 throw new IOException("broken archive, entry with negative size"); 330 } 331 try { 332 currentEntry = new ArArchiveEntry(temp, len, asInt(metaData, USER_ID_OFFSET, USER_ID_LEN, true), 333 asInt(metaData, GROUP_ID_OFFSET, GROUP_ID_LEN, true), asInt(metaData, FILE_MODE_OFFSET, FILE_MODE_LEN, 8), 334 asLong(metaData, LAST_MODIFIED_OFFSET, LAST_MODIFIED_LEN)); 335 return currentEntry; 336 } catch (final NumberFormatException ex) { 337 throw new IOException("Broken archive, unable to parse entry metadata fields as numbers", ex); 338 } 339 } 340 341 /* 342 * (non-Javadoc) 343 * 344 * @see org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry() 345 */ 346 @Override 347 public ArArchiveEntry getNextEntry() throws IOException { 348 return getNextArEntry(); 349 } 350 351 /** 352 * Does the name look like it is a long name (or a name containing spaces) as encoded by SVR4/GNU ar? 353 * 354 * @see #isGNUStringTable 355 */ 356 private boolean isGNULongName(final String name) { 357 return name != null && GNU_LONGNAME_PATTERN.matcher(name).matches(); 358 } 359 360 /* 361 * (non-Javadoc) 362 * 363 * @see java.io.InputStream#read(byte[], int, int) 364 */ 365 @Override 366 public int read(final byte[] b, final int off, final int len) throws IOException { 367 if (len == 0) { 368 return 0; 369 } 370 if (currentEntry == null) { 371 throw new IllegalStateException("No current ar entry"); 372 } 373 final long entryEnd = entryOffset + currentEntry.getLength(); 374 if (len < 0 || offset >= entryEnd) { 375 return -1; 376 } 377 final int toRead = (int) Math.min(len, entryEnd - offset); 378 final int ret = this.in.read(b, off, toRead); 379 trackReadBytes(ret); 380 return ret; 381 } 382 383 /** 384 * Reads the GNU archive String Table. 385 * 386 * @see #isGNUStringTable 387 */ 388 private ArArchiveEntry readGNUStringTable(final byte[] length, final int offset, final int len) throws IOException { 389 final int bufflen; 390 try { 391 bufflen = asInt(length, offset, len); // Assume length will fit in an int 392 } catch (final NumberFormatException ex) { 393 throw new IOException("Broken archive, unable to parse GNU string table length field as a number", ex); 394 } 395 namebuffer = IOUtils.readRange(in, bufflen); 396 final int read = namebuffer.length; 397 trackReadBytes(read); 398 if (read != bufflen) { 399 throw new IOException("Failed to read complete // record: expected=" + bufflen + " read=" + read); 400 } 401 return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen); 402 } 403 404 private void trackReadBytes(final long read) { 405 count(read); 406 if (read > 0) { 407 offset += read; 408 } 409 } 410}