View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.archivers.dump;
20  
21  import java.io.EOFException;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.util.Arrays;
25  import java.util.BitSet;
26  import java.util.HashMap;
27  import java.util.Map;
28  import java.util.PriorityQueue;
29  import java.util.Queue;
30  import java.util.Stack;
31  
32  import org.apache.commons.compress.archivers.ArchiveException;
33  import org.apache.commons.compress.archivers.ArchiveInputStream;
34  import org.apache.commons.compress.archivers.zip.ZipEncoding;
35  import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
36  import org.apache.commons.compress.utils.IOUtils;
37  
38  /**
39   * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream. Methods are provided to position at each successive entry in the archive, and the
40   * read each entry as a normal input stream using read().
41   * <p>
42   * There doesn't seem to exist a hint on the encoding of string values in any piece documentation. Given the main purpose of dump/restore is backing up a system
43   * it seems very likely the format uses the current default encoding of the system.
44   * </p>
45   * @NotThreadSafe
46   * @since 1.3
47   */
48  public class DumpArchiveInputStream extends ArchiveInputStream<DumpArchiveEntry> {
49  
50      private static final String CURRENT_PATH_SEGMENT = ".";
51      private static final String PARENT_PATH_SEGMENT = "..";
52  
53      /**
54       * Look at the first few bytes of the file to decide if it's a dump archive. With 32 bytes we can look at the magic value, with a full 1k we can verify the
55       * checksum.
56       *
57       * @param buffer data to match
58       * @param length length of data
59       * @return whether the buffer seems to contain dump data
60       */
61      public static boolean matches(final byte[] buffer, final int length) {
62          // do we have enough of the header?
63          if (length < 32) {
64              return false;
65          }
66  
67          // this is the best test
68          if (length >= DumpArchiveConstants.TP_SIZE) {
69              return DumpArchiveUtil.verify(buffer);
70          }
71  
72          // this will work in a pinch.
73          return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer, 24);
74      }
75  
76      private final DumpArchiveSummary summary;
77      private DumpArchiveEntry active;
78      private boolean isClosed;
79      private boolean hasHitEOF;
80      private long entrySize;
81      private long entryOffset;
82      private int readIdx;
83      private final byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE];
84      private byte[] blockBuffer;
85      private int recordOffset;
86      private long filepos;
87  
88      protected TapeInputStream raw;
89  
90      /** Map of ino -> dirent entry. We can use this to reconstruct full paths. */
91      private final Map<Integer, Dirent> names = new HashMap<>();
92  
93      /** Map of ino -> (directory) entry when we're missing one or more elements in the path. */
94      private final Map<Integer, DumpArchiveEntry> pending = new HashMap<>();
95  
96      /** Queue of (directory) entries where we now have the full path. */
97      private final Queue<DumpArchiveEntry> queue;
98  
99      /**
100      * The encoding to use for file names and labels.
101      */
102     private final ZipEncoding zipEncoding;
103 
104     /**
105      * Constructor using the platform's default encoding for file names.
106      *
107      * @param is stream to read from
108      * @throws ArchiveException on error
109      */
110     public DumpArchiveInputStream(final InputStream is) throws ArchiveException {
111         this(is, null);
112     }
113 
114     /**
115      * Constructs a new instance.
116      *
117      * @param is       stream to read from
118      * @param encoding the encoding to use for file names, use null for the platform's default encoding
119      * @since 1.6
120      * @throws ArchiveException on error
121      */
122     public DumpArchiveInputStream(final InputStream is, final String encoding) throws ArchiveException {
123         super(is, encoding);
124         this.raw = new TapeInputStream(is);
125         this.hasHitEOF = false;
126         this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
127 
128         try {
129             // read header, verify it's a dump archive.
130             final byte[] headerBytes = raw.readRecord();
131 
132             if (!DumpArchiveUtil.verify(headerBytes)) {
133                 throw new UnrecognizedFormatException();
134             }
135 
136             // get summary information
137             summary = new DumpArchiveSummary(headerBytes, this.zipEncoding);
138 
139             // reset buffer with actual block size.
140             raw.resetBlockSize(summary.getNTRec(), summary.isCompressed());
141 
142             // allocate our read buffer.
143             blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE];
144 
145             // skip past CLRI and BITS segments since we don't handle them yet.
146             readCLRI();
147             readBITS();
148         } catch (final IOException ex) {
149             throw new ArchiveException(ex.getMessage(), ex);
150         }
151 
152         // put in a dummy record for the root node.
153         final Dirent root = new Dirent(2, 2, 4, CURRENT_PATH_SEGMENT);
154         names.put(2, root);
155 
156         // use priority based on queue to ensure parent directories are
157         // released first.
158         queue = new PriorityQueue<>(10, (p, q) -> {
159             if (p.getOriginalName() == null || q.getOriginalName() == null) {
160                 return Integer.MAX_VALUE;
161             }
162 
163             return p.getOriginalName().compareTo(q.getOriginalName());
164         });
165     }
166 
167     /**
168      * Closes the stream for this entry.
169      */
170     @Override
171     public void close() throws IOException {
172         if (!isClosed) {
173             isClosed = true;
174             raw.close();
175         }
176     }
177 
178     @Override
179     public long getBytesRead() {
180         return raw.getBytesRead();
181     }
182 
183     @Deprecated
184     @Override
185     public int getCount() {
186         return (int) getBytesRead();
187     }
188 
189     /**
190      * Reads the next entry.
191      *
192      * @return the next entry
193      * @throws IOException on error
194      * @deprecated Use {@link #getNextEntry()}.
195      */
196     @Deprecated
197     public DumpArchiveEntry getNextDumpEntry() throws IOException {
198         return getNextEntry();
199     }
200 
201     @Override
202     public DumpArchiveEntry getNextEntry() throws IOException {
203         DumpArchiveEntry entry = null;
204         String path = null;
205 
206         // is there anything in the queue?
207         if (!queue.isEmpty()) {
208             return queue.remove();
209         }
210 
211         while (entry == null) {
212             if (hasHitEOF) {
213                 return null;
214             }
215 
216             // skip any remaining records in this segment for prior file.
217             // we might still have holes... easiest to do it
218             // block by block. We may want to revisit this if
219             // the unnecessary decompression time adds up.
220             while (readIdx < active.getHeaderCount()) {
221                 if (!active.isSparseRecord(readIdx++) && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) {
222                     throw new EOFException();
223                 }
224             }
225 
226             readIdx = 0;
227             filepos = raw.getBytesRead();
228 
229             byte[] headerBytes = raw.readRecord();
230 
231             if (!DumpArchiveUtil.verify(headerBytes)) {
232                 throw new InvalidFormatException();
233             }
234 
235             active = DumpArchiveEntry.parse(headerBytes);
236 
237             // skip any remaining segments for prior file.
238             while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) {
239                 if (raw.skip((long) DumpArchiveConstants.TP_SIZE * (active.getHeaderCount() - active.getHeaderHoles())) == -1) {
240                     throw new EOFException();
241                 }
242 
243                 filepos = raw.getBytesRead();
244                 headerBytes = raw.readRecord();
245 
246                 if (!DumpArchiveUtil.verify(headerBytes)) {
247                     throw new InvalidFormatException();
248                 }
249 
250                 active = DumpArchiveEntry.parse(headerBytes);
251             }
252 
253             // check if this is an end-of-volume marker.
254             if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) {
255                 hasHitEOF = true;
256 
257                 return null;
258             }
259 
260             entry = active;
261 
262             if (entry.isDirectory()) {
263                 readDirectoryEntry(active);
264 
265                 // now we create an empty InputStream.
266                 entryOffset = 0;
267                 entrySize = 0;
268                 readIdx = active.getHeaderCount();
269             } else {
270                 entryOffset = 0;
271                 entrySize = active.getEntrySize();
272                 readIdx = 0;
273             }
274 
275             recordOffset = readBuf.length;
276 
277             path = getPath(entry);
278 
279             if (path == null) {
280                 entry = null;
281             }
282         }
283 
284         entry.setName(path);
285         entry.setSimpleName(names.get(entry.getIno()).getName());
286         entry.setOffset(filepos);
287 
288         return entry;
289     }
290 
291     /**
292      * Gets full path for specified archive entry, or null if there's a gap.
293      *
294      * @param entry
295      * @return full path for specified archive entry, or null if there's a gap.
296      * @throws DumpArchiveException Infinite loop detected in directory entries.
297      */
298     private String getPath(final DumpArchiveEntry entry) throws DumpArchiveException {
299         // build the stack of elements. It's possible that we're
300         // still missing an intermediate value and if so we
301         final Stack<String> elements = new Stack<>();
302         final BitSet visited = new BitSet();
303         Dirent dirent = null;
304         for (int i = entry.getIno();; i = dirent.getParentIno()) {
305             if (!names.containsKey(i)) {
306                 elements.clear();
307                 break;
308             }
309             if (visited.get(i)) {
310                 throw new DumpArchiveException("Duplicate node " + i);
311             }
312             dirent = names.get(i);
313             visited.set(i);
314             elements.push(dirent.getName());
315             if (dirent.getIno() == dirent.getParentIno()) {
316                 break;
317             }
318         }
319         // if an element is missing defer the work and read next entry.
320         if (elements.isEmpty()) {
321             pending.put(entry.getIno(), entry);
322             return null;
323         }
324         // generate full path from stack of elements.
325         final StringBuilder sb = new StringBuilder(elements.pop());
326         while (!elements.isEmpty()) {
327             sb.append('/');
328             sb.append(elements.pop());
329         }
330         return sb.toString();
331     }
332 
333     /**
334      * Gets the archive summary information.
335      *
336      * @return the summary
337      */
338     public DumpArchiveSummary getSummary() {
339         return summary;
340     }
341 
342     /**
343      * Reads bytes from the current dump archive entry.
344      *
345      * This method is aware of the boundaries of the current entry in the archive and will deal with them as if they were this stream's start and EOF.
346      *
347      * @param buf The buffer into which to place bytes read.
348      * @param off The offset at which to place bytes read.
349      * @param len The number of bytes to read.
350      * @return The number of bytes read, or -1 at EOF.
351      * @throws IOException on error
352      */
353     @Override
354     public int read(final byte[] buf, int off, int len) throws IOException {
355         if (len == 0) {
356             return 0;
357         }
358         int totalRead = 0;
359 
360         if (hasHitEOF || isClosed || entryOffset >= entrySize) {
361             return -1;
362         }
363 
364         if (active == null) {
365             throw new IllegalStateException("No current dump entry");
366         }
367 
368         if (len + entryOffset > entrySize) {
369             len = (int) (entrySize - entryOffset);
370         }
371 
372         while (len > 0) {
373             final int sz = Math.min(len, readBuf.length - recordOffset);
374 
375             // copy any data we have
376             if (recordOffset + sz <= readBuf.length) {
377                 System.arraycopy(readBuf, recordOffset, buf, off, sz);
378                 totalRead += sz;
379                 recordOffset += sz;
380                 len -= sz;
381                 off += sz;
382             }
383 
384             // load next block if necessary.
385             if (len > 0) {
386                 if (readIdx >= 512) {
387                     final byte[] headerBytes = raw.readRecord();
388 
389                     if (!DumpArchiveUtil.verify(headerBytes)) {
390                         throw new InvalidFormatException();
391                     }
392 
393                     active = DumpArchiveEntry.parse(headerBytes);
394                     readIdx = 0;
395                 }
396 
397                 if (!active.isSparseRecord(readIdx++)) {
398                     final int r = raw.read(readBuf, 0, readBuf.length);
399                     if (r != readBuf.length) {
400                         throw new EOFException();
401                     }
402                 } else {
403                     Arrays.fill(readBuf, (byte) 0);
404                 }
405 
406                 recordOffset = 0;
407             }
408         }
409 
410         entryOffset += totalRead;
411 
412         return totalRead;
413     }
414 
415     /**
416      * Read BITS segment.
417      */
418     private void readBITS() throws IOException {
419         final byte[] buffer = raw.readRecord();
420 
421         if (!DumpArchiveUtil.verify(buffer)) {
422             throw new InvalidFormatException();
423         }
424 
425         active = DumpArchiveEntry.parse(buffer);
426 
427         if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) {
428             throw new InvalidFormatException();
429         }
430 
431         // we don't do anything with this yet.
432         if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) == -1) {
433             throw new EOFException();
434         }
435         readIdx = active.getHeaderCount();
436     }
437 
438     /**
439      * Read CLRI (deleted inode) segment.
440      */
441     private void readCLRI() throws IOException {
442         final byte[] buffer = raw.readRecord();
443 
444         if (!DumpArchiveUtil.verify(buffer)) {
445             throw new InvalidFormatException();
446         }
447 
448         active = DumpArchiveEntry.parse(buffer);
449 
450         if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) {
451             throw new InvalidFormatException();
452         }
453 
454         // we don't do anything with this yet.
455         if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) == -1) {
456             throw new EOFException();
457         }
458         readIdx = active.getHeaderCount();
459     }
460 
461     /**
462      * Read directory entry.
463      */
464     private void readDirectoryEntry(DumpArchiveEntry entry) throws IOException {
465         long size = entry.getEntrySize();
466         boolean first = true;
467 
468         while (first || DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType()) {
469             // read the header that we just peeked at.
470             if (!first) {
471                 raw.readRecord();
472             }
473 
474             if (!names.containsKey(entry.getIno()) && DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType()) {
475                 pending.put(entry.getIno(), entry);
476             }
477 
478             final int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount();
479 
480             if (blockBuffer.length < datalen) {
481                 blockBuffer = IOUtils.readRange(raw, datalen);
482                 if (blockBuffer.length != datalen) {
483                     throw new EOFException();
484                 }
485             } else if (raw.read(blockBuffer, 0, datalen) != datalen) {
486                 throw new EOFException();
487             }
488 
489             int reclen = 0;
490 
491             for (int i = 0; i < datalen - 8 && i < size - 8; i += reclen) {
492                 final int ino = DumpArchiveUtil.convert32(blockBuffer, i);
493                 reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4);
494                 if (reclen == 0) {
495                     throw new DumpArchiveException("reclen cannot be 0");
496                 }
497 
498                 final byte type = blockBuffer[i + 6];
499 
500                 final String name = DumpArchiveUtil.decode(zipEncoding, blockBuffer, i + 8, blockBuffer[i + 7]);
501 
502                 if (CURRENT_PATH_SEGMENT.equals(name) || PARENT_PATH_SEGMENT.equals(name)) {
503                     // do nothing...
504                     continue;
505                 }
506 
507                 final Dirent d = new Dirent(ino, entry.getIno(), type, name);
508 
509                 /*
510                  * if ((type == 4) && names.containsKey(ino)) { System.out.println("we already have ino: " + names.get(ino)); }
511                  */
512 
513                 names.put(ino, d);
514 
515                 // check whether this allows us to fill anything in the pending list.
516                 for (final Map.Entry<Integer, DumpArchiveEntry> mapEntry : pending.entrySet()) {
517                     final DumpArchiveEntry v = mapEntry.getValue();
518                     final String path = getPath(v);
519                     if (path != null) {
520                         v.setName(path);
521                         v.setSimpleName(names.get(mapEntry.getKey()).getName());
522                         queue.add(v);
523                     }
524                 }
525 
526                 // remove anything that we found. (We can't do it earlier
527                 // because of concurrent modification exceptions.)
528                 queue.forEach(e -> pending.remove(e.getIno()));
529             }
530 
531             final byte[] peekBytes = raw.peek();
532 
533             if (!DumpArchiveUtil.verify(peekBytes)) {
534                 throw new InvalidFormatException();
535             }
536 
537             entry = DumpArchiveEntry.parse(peekBytes);
538             first = false;
539             size -= DumpArchiveConstants.TP_SIZE;
540         }
541     }
542 
543 }