DumpArchiveInputStream.java
- /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
- package org.apache.commons.compress.archivers.dump;
- import java.io.EOFException;
- import java.io.IOException;
- import java.io.InputStream;
- import java.util.Arrays;
- import java.util.BitSet;
- import java.util.HashMap;
- import java.util.Map;
- import java.util.PriorityQueue;
- import java.util.Queue;
- import java.util.Stack;
- import org.apache.commons.compress.archivers.ArchiveException;
- import org.apache.commons.compress.archivers.ArchiveInputStream;
- import org.apache.commons.compress.archivers.zip.ZipEncoding;
- import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
- import org.apache.commons.compress.utils.IOUtils;
- /**
- * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream. Methods are provided to position at each successive entry in the archive, and the
- * read each entry as a normal input stream using read().
- * <p>
- * There doesn't seem to exist a hint on the encoding of string values in any piece documentation. Given the main purpose of dump/restore is backing up a system
- * it seems very likely the format uses the current default encoding of the system.
- * </p>
- * @NotThreadSafe
- * @since 1.3
- */
- public class DumpArchiveInputStream extends ArchiveInputStream<DumpArchiveEntry> {
- private static final String CURRENT_PATH_SEGMENT = ".";
- private static final String PARENT_PATH_SEGMENT = "..";
- /**
- * Look at the first few bytes of the file to decide if it's a dump archive. With 32 bytes we can look at the magic value, with a full 1k we can verify the
- * checksum.
- *
- * @param buffer data to match
- * @param length length of data
- * @return whether the buffer seems to contain dump data
- */
- public static boolean matches(final byte[] buffer, final int length) {
- // do we have enough of the header?
- if (length < 32) {
- return false;
- }
- // this is the best test
- if (length >= DumpArchiveConstants.TP_SIZE) {
- return DumpArchiveUtil.verify(buffer);
- }
- // this will work in a pinch.
- return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer, 24);
- }
- private final DumpArchiveSummary summary;
- private DumpArchiveEntry active;
- private boolean isClosed;
- private boolean hasHitEOF;
- private long entrySize;
- private long entryOffset;
- private int readIdx;
- private final byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE];
- private byte[] blockBuffer;
- private int recordOffset;
- private long filepos;
- protected TapeInputStream raw;
- /** Map of ino -> dirent entry. We can use this to reconstruct full paths. */
- private final Map<Integer, Dirent> names = new HashMap<>();
- /** Map of ino -> (directory) entry when we're missing one or more elements in the path. */
- private final Map<Integer, DumpArchiveEntry> pending = new HashMap<>();
- /** Queue of (directory) entries where we now have the full path. */
- private final Queue<DumpArchiveEntry> queue;
- /**
- * The encoding to use for file names and labels.
- */
- private final ZipEncoding zipEncoding;
- /**
- * Constructor using the platform's default encoding for file names.
- *
- * @param is stream to read from
- * @throws ArchiveException on error
- */
- public DumpArchiveInputStream(final InputStream is) throws ArchiveException {
- this(is, null);
- }
- /**
- * Constructs a new instance.
- *
- * @param is stream to read from
- * @param encoding the encoding to use for file names, use null for the platform's default encoding
- * @since 1.6
- * @throws ArchiveException on error
- */
- public DumpArchiveInputStream(final InputStream is, final String encoding) throws ArchiveException {
- super(is, encoding);
- this.raw = new TapeInputStream(is);
- this.hasHitEOF = false;
- this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
- try {
- // read header, verify it's a dump archive.
- final byte[] headerBytes = raw.readRecord();
- if (!DumpArchiveUtil.verify(headerBytes)) {
- throw new UnrecognizedFormatException();
- }
- // get summary information
- summary = new DumpArchiveSummary(headerBytes, this.zipEncoding);
- // reset buffer with actual block size.
- raw.resetBlockSize(summary.getNTRec(), summary.isCompressed());
- // allocate our read buffer.
- blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE];
- // skip past CLRI and BITS segments since we don't handle them yet.
- readCLRI();
- readBITS();
- } catch (final IOException ex) {
- throw new ArchiveException(ex.getMessage(), ex);
- }
- // put in a dummy record for the root node.
- final Dirent root = new Dirent(2, 2, 4, CURRENT_PATH_SEGMENT);
- names.put(2, root);
- // use priority based on queue to ensure parent directories are
- // released first.
- queue = new PriorityQueue<>(10, (p, q) -> {
- if (p.getOriginalName() == null || q.getOriginalName() == null) {
- return Integer.MAX_VALUE;
- }
- return p.getOriginalName().compareTo(q.getOriginalName());
- });
- }
- /**
- * Closes the stream for this entry.
- */
- @Override
- public void close() throws IOException {
- if (!isClosed) {
- isClosed = true;
- raw.close();
- }
- }
- @Override
- public long getBytesRead() {
- return raw.getBytesRead();
- }
- @Deprecated
- @Override
- public int getCount() {
- return (int) getBytesRead();
- }
- /**
- * Reads the next entry.
- *
- * @return the next entry
- * @throws IOException on error
- * @deprecated Use {@link #getNextEntry()}.
- */
- @Deprecated
- public DumpArchiveEntry getNextDumpEntry() throws IOException {
- return getNextEntry();
- }
- @Override
- public DumpArchiveEntry getNextEntry() throws IOException {
- DumpArchiveEntry entry = null;
- String path = null;
- // is there anything in the queue?
- if (!queue.isEmpty()) {
- return queue.remove();
- }
- while (entry == null) {
- if (hasHitEOF) {
- return null;
- }
- // skip any remaining records in this segment for prior file.
- // we might still have holes... easiest to do it
- // block by block. We may want to revisit this if
- // the unnecessary decompression time adds up.
- while (readIdx < active.getHeaderCount()) {
- if (!active.isSparseRecord(readIdx++) && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) {
- throw new EOFException();
- }
- }
- readIdx = 0;
- filepos = raw.getBytesRead();
- byte[] headerBytes = raw.readRecord();
- if (!DumpArchiveUtil.verify(headerBytes)) {
- throw new InvalidFormatException();
- }
- active = DumpArchiveEntry.parse(headerBytes);
- // skip any remaining segments for prior file.
- while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) {
- if (raw.skip((long) DumpArchiveConstants.TP_SIZE * (active.getHeaderCount() - active.getHeaderHoles())) == -1) {
- throw new EOFException();
- }
- filepos = raw.getBytesRead();
- headerBytes = raw.readRecord();
- if (!DumpArchiveUtil.verify(headerBytes)) {
- throw new InvalidFormatException();
- }
- active = DumpArchiveEntry.parse(headerBytes);
- }
- // check if this is an end-of-volume marker.
- if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) {
- hasHitEOF = true;
- return null;
- }
- entry = active;
- if (entry.isDirectory()) {
- readDirectoryEntry(active);
- // now we create an empty InputStream.
- entryOffset = 0;
- entrySize = 0;
- readIdx = active.getHeaderCount();
- } else {
- entryOffset = 0;
- entrySize = active.getEntrySize();
- readIdx = 0;
- }
- recordOffset = readBuf.length;
- path = getPath(entry);
- if (path == null) {
- entry = null;
- }
- }
- entry.setName(path);
- entry.setSimpleName(names.get(entry.getIno()).getName());
- entry.setOffset(filepos);
- return entry;
- }
- /**
- * Gets full path for specified archive entry, or null if there's a gap.
- *
- * @param entry
- * @return full path for specified archive entry, or null if there's a gap.
- * @throws DumpArchiveException Infinite loop detected in directory entries.
- */
- private String getPath(final DumpArchiveEntry entry) throws DumpArchiveException {
- // build the stack of elements. It's possible that we're
- // still missing an intermediate value and if so we
- final Stack<String> elements = new Stack<>();
- final BitSet visited = new BitSet();
- Dirent dirent = null;
- for (int i = entry.getIno();; i = dirent.getParentIno()) {
- if (!names.containsKey(i)) {
- elements.clear();
- break;
- }
- if (visited.get(i)) {
- throw new DumpArchiveException("Duplicate node " + i);
- }
- dirent = names.get(i);
- visited.set(i);
- elements.push(dirent.getName());
- if (dirent.getIno() == dirent.getParentIno()) {
- break;
- }
- }
- // if an element is missing defer the work and read next entry.
- if (elements.isEmpty()) {
- pending.put(entry.getIno(), entry);
- return null;
- }
- // generate full path from stack of elements.
- final StringBuilder sb = new StringBuilder(elements.pop());
- while (!elements.isEmpty()) {
- sb.append('/');
- sb.append(elements.pop());
- }
- return sb.toString();
- }
- /**
- * Gets the archive summary information.
- *
- * @return the summary
- */
- public DumpArchiveSummary getSummary() {
- return summary;
- }
- /**
- * Reads bytes from the current dump archive entry.
- *
- * This method is aware of the boundaries of the current entry in the archive and will deal with them as if they were this stream's start and EOF.
- *
- * @param buf The buffer into which to place bytes read.
- * @param off The offset at which to place bytes read.
- * @param len The number of bytes to read.
- * @return The number of bytes read, or -1 at EOF.
- * @throws IOException on error
- */
- @Override
- public int read(final byte[] buf, int off, int len) throws IOException {
- if (len == 0) {
- return 0;
- }
- int totalRead = 0;
- if (hasHitEOF || isClosed || entryOffset >= entrySize) {
- return -1;
- }
- if (active == null) {
- throw new IllegalStateException("No current dump entry");
- }
- if (len + entryOffset > entrySize) {
- len = (int) (entrySize - entryOffset);
- }
- while (len > 0) {
- final int sz = Math.min(len, readBuf.length - recordOffset);
- // copy any data we have
- if (recordOffset + sz <= readBuf.length) {
- System.arraycopy(readBuf, recordOffset, buf, off, sz);
- totalRead += sz;
- recordOffset += sz;
- len -= sz;
- off += sz;
- }
- // load next block if necessary.
- if (len > 0) {
- if (readIdx >= 512) {
- final byte[] headerBytes = raw.readRecord();
- if (!DumpArchiveUtil.verify(headerBytes)) {
- throw new InvalidFormatException();
- }
- active = DumpArchiveEntry.parse(headerBytes);
- readIdx = 0;
- }
- if (!active.isSparseRecord(readIdx++)) {
- final int r = raw.read(readBuf, 0, readBuf.length);
- if (r != readBuf.length) {
- throw new EOFException();
- }
- } else {
- Arrays.fill(readBuf, (byte) 0);
- }
- recordOffset = 0;
- }
- }
- entryOffset += totalRead;
- return totalRead;
- }
- /**
- * Read BITS segment.
- */
- private void readBITS() throws IOException {
- final byte[] buffer = raw.readRecord();
- if (!DumpArchiveUtil.verify(buffer)) {
- throw new InvalidFormatException();
- }
- active = DumpArchiveEntry.parse(buffer);
- if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) {
- throw new InvalidFormatException();
- }
- // we don't do anything with this yet.
- if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) == -1) {
- throw new EOFException();
- }
- readIdx = active.getHeaderCount();
- }
- /**
- * Read CLRI (deleted inode) segment.
- */
- private void readCLRI() throws IOException {
- final byte[] buffer = raw.readRecord();
- if (!DumpArchiveUtil.verify(buffer)) {
- throw new InvalidFormatException();
- }
- active = DumpArchiveEntry.parse(buffer);
- if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) {
- throw new InvalidFormatException();
- }
- // we don't do anything with this yet.
- if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) == -1) {
- throw new EOFException();
- }
- readIdx = active.getHeaderCount();
- }
- /**
- * Read directory entry.
- */
- private void readDirectoryEntry(DumpArchiveEntry entry) throws IOException {
- long size = entry.getEntrySize();
- boolean first = true;
- while (first || DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType()) {
- // read the header that we just peeked at.
- if (!first) {
- raw.readRecord();
- }
- if (!names.containsKey(entry.getIno()) && DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType()) {
- pending.put(entry.getIno(), entry);
- }
- final int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount();
- if (blockBuffer.length < datalen) {
- blockBuffer = IOUtils.readRange(raw, datalen);
- if (blockBuffer.length != datalen) {
- throw new EOFException();
- }
- } else if (raw.read(blockBuffer, 0, datalen) != datalen) {
- throw new EOFException();
- }
- int reclen = 0;
- for (int i = 0; i < datalen - 8 && i < size - 8; i += reclen) {
- final int ino = DumpArchiveUtil.convert32(blockBuffer, i);
- reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4);
- if (reclen == 0) {
- throw new DumpArchiveException("reclen cannot be 0");
- }
- final byte type = blockBuffer[i + 6];
- final String name = DumpArchiveUtil.decode(zipEncoding, blockBuffer, i + 8, blockBuffer[i + 7]);
- if (CURRENT_PATH_SEGMENT.equals(name) || PARENT_PATH_SEGMENT.equals(name)) {
- // do nothing...
- continue;
- }
- final Dirent d = new Dirent(ino, entry.getIno(), type, name);
- /*
- * if ((type == 4) && names.containsKey(ino)) { System.out.println("we already have ino: " + names.get(ino)); }
- */
- names.put(ino, d);
- // check whether this allows us to fill anything in the pending list.
- for (final Map.Entry<Integer, DumpArchiveEntry> mapEntry : pending.entrySet()) {
- final DumpArchiveEntry v = mapEntry.getValue();
- final String path = getPath(v);
- if (path != null) {
- v.setName(path);
- v.setSimpleName(names.get(mapEntry.getKey()).getName());
- queue.add(v);
- }
- }
- // remove anything that we found. (We can't do it earlier
- // because of concurrent modification exceptions.)
- queue.forEach(e -> pending.remove(e.getIno()));
- }
- final byte[] peekBytes = raw.peek();
- if (!DumpArchiveUtil.verify(peekBytes)) {
- throw new InvalidFormatException();
- }
- entry = DumpArchiveEntry.parse(peekBytes);
- first = false;
- size -= DumpArchiveConstants.TP_SIZE;
- }
- }
- }