001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.dump; 020 021import java.io.EOFException; 022import java.io.IOException; 023import java.io.InputStream; 024import java.util.Arrays; 025import java.util.BitSet; 026import java.util.HashMap; 027import java.util.Map; 028import java.util.PriorityQueue; 029import java.util.Queue; 030import java.util.Stack; 031 032import org.apache.commons.compress.archivers.ArchiveException; 033import org.apache.commons.compress.archivers.ArchiveInputStream; 034import org.apache.commons.compress.archivers.zip.ZipEncoding; 035import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 036import org.apache.commons.compress.utils.IOUtils; 037 038/** 039 * The DumpArchiveInputStream reads a Unix dump archive as an InputStream. Methods are provided to position at each successive entry in the archive, and the 040 * read each entry as a normal input stream using read(). 041 * <p> 042 * There doesn't seem to exist a hint on the encoding of string values in any piece documentation. Given the main purpose of dump/restore is backing up a system 043 * it seems very likely the format uses the current default encoding of the system. 044 * </p> 045 * @NotThreadSafe 046 * @since 1.3 047 */ 048public class DumpArchiveInputStream extends ArchiveInputStream<DumpArchiveEntry> { 049 050 private static final String CURRENT_PATH_SEGMENT = "."; 051 private static final String PARENT_PATH_SEGMENT = ".."; 052 053 /** 054 * Look at the first few bytes of the file to decide if it's a dump archive. With 32 bytes we can look at the magic value, with a full 1k we can verify the 055 * checksum. 056 * 057 * @param buffer data to match 058 * @param length length of data 059 * @return whether the buffer seems to contain dump data 060 */ 061 public static boolean matches(final byte[] buffer, final int length) { 062 // do we have enough of the header? 063 if (length < 32) { 064 return false; 065 } 066 067 // this is the best test 068 if (length >= DumpArchiveConstants.TP_SIZE) { 069 return DumpArchiveUtil.verify(buffer); 070 } 071 072 // this will work in a pinch. 073 return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer, 24); 074 } 075 076 private final DumpArchiveSummary summary; 077 private DumpArchiveEntry active; 078 private boolean isClosed; 079 private boolean hasHitEOF; 080 private long entrySize; 081 private long entryOffset; 082 private int readIdx; 083 private final byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE]; 084 private byte[] blockBuffer; 085 private int recordOffset; 086 private long filepos; 087 088 /** 089 * TapeInputStream is the raw input. 090 */ 091 protected TapeInputStream raw; 092 093 /** Map of ino -> dirent entry. We can use this to reconstruct full paths. */ 094 private final Map<Integer, Dirent> names = new HashMap<>(); 095 096 /** Map of ino -> (directory) entry when we're missing one or more elements in the path. */ 097 private final Map<Integer, DumpArchiveEntry> pending = new HashMap<>(); 098 099 /** Queue of (directory) entries where we now have the full path. */ 100 private final Queue<DumpArchiveEntry> queue; 101 102 /** 103 * The encoding to use for file names and labels. 104 */ 105 private final ZipEncoding zipEncoding; 106 107 /** 108 * Constructor using the platform's default encoding for file names. 109 * 110 * @param is stream to read from 111 * @throws ArchiveException on error 112 */ 113 public DumpArchiveInputStream(final InputStream is) throws ArchiveException { 114 this(is, null); 115 } 116 117 /** 118 * Constructs a new instance. 119 * 120 * @param is stream to read from 121 * @param encoding the encoding to use for file names, use null for the platform's default encoding 122 * @throws ArchiveException on error 123 * @since 1.6 124 */ 125 public DumpArchiveInputStream(final InputStream is, final String encoding) throws ArchiveException { 126 super(is, encoding); 127 this.raw = new TapeInputStream(is); 128 this.hasHitEOF = false; 129 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 130 131 try { 132 // read header, verify it's a dump archive. 133 final byte[] headerBytes = raw.readRecord(); 134 135 if (!DumpArchiveUtil.verify(headerBytes)) { 136 throw new UnrecognizedFormatException(); 137 } 138 139 // get summary information 140 summary = new DumpArchiveSummary(headerBytes, this.zipEncoding); 141 142 // reset buffer with actual block size. 143 raw.resetBlockSize(summary.getNTRec(), summary.isCompressed()); 144 145 // allocate our read buffer. 146 blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE]; 147 148 // skip past CLRI and BITS segments since we don't handle them yet. 149 readCLRI(); 150 readBITS(); 151 } catch (final IOException e) { 152 throw new ArchiveException(e.getMessage(), (Throwable) e); 153 } 154 155 // put in a dummy record for the root node. 156 final Dirent root = new Dirent(2, 2, 4, CURRENT_PATH_SEGMENT); 157 names.put(2, root); 158 159 // use priority based on queue to ensure parent directories are 160 // released first. 161 queue = new PriorityQueue<>(10, (p, q) -> { 162 if (p.getOriginalName() == null || q.getOriginalName() == null) { 163 return Integer.MAX_VALUE; 164 } 165 166 return p.getOriginalName().compareTo(q.getOriginalName()); 167 }); 168 } 169 170 /** 171 * Closes the stream for this entry. 172 */ 173 @Override 174 public void close() throws IOException { 175 if (!isClosed) { 176 isClosed = true; 177 raw.close(); 178 } 179 } 180 181 @Override 182 public long getBytesRead() { 183 return raw.getBytesRead(); 184 } 185 186 @Deprecated 187 @Override 188 public int getCount() { 189 return (int) getBytesRead(); 190 } 191 192 /** 193 * Reads the next entry. 194 * 195 * @return the next entry 196 * @throws IOException on error 197 * @deprecated Use {@link #getNextEntry()}. 198 */ 199 @Deprecated 200 public DumpArchiveEntry getNextDumpEntry() throws IOException { 201 return getNextEntry(); 202 } 203 204 @Override 205 public DumpArchiveEntry getNextEntry() throws IOException { 206 DumpArchiveEntry entry = null; 207 String path = null; 208 209 // is there anything in the queue? 210 if (!queue.isEmpty()) { 211 return queue.remove(); 212 } 213 214 while (entry == null) { 215 if (hasHitEOF) { 216 return null; 217 } 218 219 // skip any remaining records in this segment for prior file. 220 // we might still have holes... easiest to do it 221 // block by block. We may want to revisit this if 222 // the unnecessary decompression time adds up. 223 while (readIdx < active.getHeaderCount()) { 224 if (!active.isSparseRecord(readIdx++) && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) { 225 throw new EOFException(); 226 } 227 } 228 229 readIdx = 0; 230 filepos = raw.getBytesRead(); 231 232 byte[] headerBytes = raw.readRecord(); 233 234 if (!DumpArchiveUtil.verify(headerBytes)) { 235 throw new InvalidFormatException(); 236 } 237 238 active = DumpArchiveEntry.parse(headerBytes); 239 240 // skip any remaining segments for prior file. 241 while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) { 242 if (raw.skip((long) DumpArchiveConstants.TP_SIZE * (active.getHeaderCount() - active.getHeaderHoles())) == -1) { 243 throw new EOFException(); 244 } 245 246 filepos = raw.getBytesRead(); 247 headerBytes = raw.readRecord(); 248 249 if (!DumpArchiveUtil.verify(headerBytes)) { 250 throw new InvalidFormatException(); 251 } 252 253 active = DumpArchiveEntry.parse(headerBytes); 254 } 255 256 // check if this is an end-of-volume marker. 257 if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) { 258 hasHitEOF = true; 259 260 return null; 261 } 262 263 entry = active; 264 265 if (entry.isDirectory()) { 266 readDirectoryEntry(active); 267 268 // now we create an empty InputStream. 269 entryOffset = 0; 270 entrySize = 0; 271 readIdx = active.getHeaderCount(); 272 } else { 273 entryOffset = 0; 274 entrySize = active.getEntrySize(); 275 readIdx = 0; 276 } 277 278 recordOffset = readBuf.length; 279 280 path = getPath(entry); 281 282 if (path == null) { 283 entry = null; 284 } 285 } 286 287 entry.setName(path); 288 entry.setSimpleName(names.get(entry.getIno()).getName()); 289 entry.setOffset(filepos); 290 291 return entry; 292 } 293 294 /** 295 * Gets full path for specified archive entry, or null if there's a gap. 296 * 297 * @param entry 298 * @return full path for specified archive entry, or null if there's a gap. 299 * @throws DumpArchiveException Infinite loop detected in directory entries. 300 */ 301 private String getPath(final DumpArchiveEntry entry) throws DumpArchiveException { 302 // build the stack of elements. It's possible that we're 303 // still missing an intermediate value and if so we 304 final Stack<String> elements = new Stack<>(); 305 final BitSet visited = new BitSet(); 306 Dirent dirent = null; 307 for (int i = entry.getIno();; i = dirent.getParentIno()) { 308 if (!names.containsKey(i)) { 309 elements.clear(); 310 break; 311 } 312 if (visited.get(i)) { 313 throw new DumpArchiveException("Duplicate node " + i); 314 } 315 dirent = names.get(i); 316 visited.set(i); 317 elements.push(dirent.getName()); 318 if (dirent.getIno() == dirent.getParentIno()) { 319 break; 320 } 321 } 322 // if an element is missing defer the work and read next entry. 323 if (elements.isEmpty()) { 324 pending.put(entry.getIno(), entry); 325 return null; 326 } 327 // generate full path from stack of elements. 328 final StringBuilder sb = new StringBuilder(elements.pop()); 329 while (!elements.isEmpty()) { 330 sb.append('/'); 331 sb.append(elements.pop()); 332 } 333 return sb.toString(); 334 } 335 336 /** 337 * Gets the archive summary information. 338 * 339 * @return the summary 340 */ 341 public DumpArchiveSummary getSummary() { 342 return summary; 343 } 344 345 /** 346 * Reads bytes from the current dump archive entry. 347 * 348 * This method is aware of the boundaries of the current entry in the archive and will deal with them as if they were this stream's start and EOF. 349 * 350 * @param buf The buffer into which to place bytes read. 351 * @param off The offset at which to place bytes read. 352 * @param len The number of bytes to read. 353 * @return The number of bytes read, or -1 at EOF. 354 * @throws IOException on error 355 */ 356 @Override 357 public int read(final byte[] buf, int off, int len) throws IOException { 358 if (len == 0) { 359 return 0; 360 } 361 int totalRead = 0; 362 363 if (hasHitEOF || isClosed || entryOffset >= entrySize) { 364 return -1; 365 } 366 367 if (active == null) { 368 throw new IllegalStateException("No current dump entry"); 369 } 370 371 if (len + entryOffset > entrySize) { 372 len = (int) (entrySize - entryOffset); 373 } 374 375 while (len > 0) { 376 final int sz = Math.min(len, readBuf.length - recordOffset); 377 378 // copy any data we have 379 if (recordOffset + sz <= readBuf.length) { 380 System.arraycopy(readBuf, recordOffset, buf, off, sz); 381 totalRead += sz; 382 recordOffset += sz; 383 len -= sz; 384 off += sz; 385 } 386 387 // load next block if necessary. 388 if (len > 0) { 389 if (readIdx >= 512) { 390 final byte[] headerBytes = raw.readRecord(); 391 392 if (!DumpArchiveUtil.verify(headerBytes)) { 393 throw new InvalidFormatException(); 394 } 395 396 active = DumpArchiveEntry.parse(headerBytes); 397 readIdx = 0; 398 } 399 400 if (!active.isSparseRecord(readIdx++)) { 401 final int r = raw.read(readBuf, 0, readBuf.length); 402 if (r != readBuf.length) { 403 throw new EOFException(); 404 } 405 } else { 406 Arrays.fill(readBuf, (byte) 0); 407 } 408 409 recordOffset = 0; 410 } 411 } 412 413 entryOffset += totalRead; 414 415 return totalRead; 416 } 417 418 /** 419 * Reads BITS segment. 420 */ 421 private void readBITS() throws IOException { 422 final byte[] buffer = raw.readRecord(); 423 424 if (!DumpArchiveUtil.verify(buffer)) { 425 throw new InvalidFormatException(); 426 } 427 428 active = DumpArchiveEntry.parse(buffer); 429 430 if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) { 431 throw new InvalidFormatException(); 432 } 433 434 // we don't do anything with this yet. 435 if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) == -1) { 436 throw new EOFException(); 437 } 438 readIdx = active.getHeaderCount(); 439 } 440 441 /** 442 * Reads CLRI (deleted inode) segment. 443 */ 444 private void readCLRI() throws IOException { 445 final byte[] buffer = raw.readRecord(); 446 447 if (!DumpArchiveUtil.verify(buffer)) { 448 throw new InvalidFormatException(); 449 } 450 451 active = DumpArchiveEntry.parse(buffer); 452 453 if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) { 454 throw new InvalidFormatException(); 455 } 456 457 // we don't do anything with this yet. 458 if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) == -1) { 459 throw new EOFException(); 460 } 461 readIdx = active.getHeaderCount(); 462 } 463 464 /** 465 * Reads directory entry. 466 */ 467 private void readDirectoryEntry(DumpArchiveEntry entry) throws IOException { 468 long size = entry.getEntrySize(); 469 boolean first = true; 470 471 while (first || DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType()) { 472 // read the header that we just peeked at. 473 if (!first) { 474 raw.readRecord(); 475 } 476 477 if (!names.containsKey(entry.getIno()) && DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType()) { 478 pending.put(entry.getIno(), entry); 479 } 480 481 final int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount(); 482 483 if (blockBuffer.length < datalen) { 484 blockBuffer = IOUtils.readRange(raw, datalen); 485 if (blockBuffer.length != datalen) { 486 throw new EOFException(); 487 } 488 } else if (raw.read(blockBuffer, 0, datalen) != datalen) { 489 throw new EOFException(); 490 } 491 492 int reclen = 0; 493 494 for (int i = 0; i < datalen - 8 && i < size - 8; i += reclen) { 495 final int ino = DumpArchiveUtil.convert32(blockBuffer, i); 496 reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4); 497 if (reclen == 0) { 498 throw new DumpArchiveException("reclen cannot be 0"); 499 } 500 501 final byte type = blockBuffer[i + 6]; 502 503 final String name = DumpArchiveUtil.decode(zipEncoding, blockBuffer, i + 8, blockBuffer[i + 7]); 504 505 if (CURRENT_PATH_SEGMENT.equals(name) || PARENT_PATH_SEGMENT.equals(name)) { 506 // do nothing... 507 continue; 508 } 509 510 final Dirent d = new Dirent(ino, entry.getIno(), type, name); 511 512 /* 513 * if ((type == 4) && names.containsKey(ino)) { System.out.println("we already have ino: " + names.get(ino)); } 514 */ 515 516 names.put(ino, d); 517 518 // check whether this allows us to fill anything in the pending list. 519 for (final Map.Entry<Integer, DumpArchiveEntry> mapEntry : pending.entrySet()) { 520 final DumpArchiveEntry v = mapEntry.getValue(); 521 final String path = getPath(v); 522 if (path != null) { 523 v.setName(path); 524 v.setSimpleName(names.get(mapEntry.getKey()).getName()); 525 queue.add(v); 526 } 527 } 528 529 // remove anything that we found. (We can't do it earlier 530 // because of concurrent modification exceptions.) 531 queue.forEach(e -> pending.remove(e.getIno())); 532 } 533 534 final byte[] peekBytes = raw.peek(); 535 536 if (!DumpArchiveUtil.verify(peekBytes)) { 537 throw new InvalidFormatException(); 538 } 539 540 entry = DumpArchiveEntry.parse(peekBytes); 541 first = false; 542 size -= DumpArchiveConstants.TP_SIZE; 543 } 544 } 545 546}