View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.archivers.dump;
20  
21  import java.io.EOFException;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.util.Arrays;
25  import java.util.BitSet;
26  import java.util.HashMap;
27  import java.util.Map;
28  import java.util.PriorityQueue;
29  import java.util.Queue;
30  import java.util.Stack;
31  
32  import org.apache.commons.compress.archivers.ArchiveException;
33  import org.apache.commons.compress.archivers.ArchiveInputStream;
34  import org.apache.commons.compress.archivers.zip.ZipEncoding;
35  import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
36  import org.apache.commons.compress.utils.IOUtils;
37  
38  /**
39   * The DumpArchiveInputStream reads a Unix dump archive as an InputStream. Methods are provided to position at each successive entry in the archive, and the
40   * read each entry as a normal input stream using read().
41   * <p>
42   * There doesn't seem to exist a hint on the encoding of string values in any piece documentation. Given the main purpose of dump/restore is backing up a system
43   * it seems very likely the format uses the current default encoding of the system.
44   * </p>
45   * @NotThreadSafe
46   * @since 1.3
47   */
48  public class DumpArchiveInputStream extends ArchiveInputStream<DumpArchiveEntry> {
49  
50      private static final String CURRENT_PATH_SEGMENT = ".";
51      private static final String PARENT_PATH_SEGMENT = "..";
52  
53      /**
54       * Look at the first few bytes of the file to decide if it's a dump archive. With 32 bytes we can look at the magic value, with a full 1k we can verify the
55       * checksum.
56       *
57       * @param buffer data to match
58       * @param length length of data
59       * @return whether the buffer seems to contain dump data
60       */
61      public static boolean matches(final byte[] buffer, final int length) {
62          // do we have enough of the header?
63          if (length < 32) {
64              return false;
65          }
66  
67          // this is the best test
68          if (length >= DumpArchiveConstants.TP_SIZE) {
69              return DumpArchiveUtil.verify(buffer);
70          }
71  
72          // this will work in a pinch.
73          return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer, 24);
74      }
75  
76      private final DumpArchiveSummary summary;
77      private DumpArchiveEntry active;
78      private boolean isClosed;
79      private boolean hasHitEOF;
80      private long entrySize;
81      private long entryOffset;
82      private int readIdx;
83      private final byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE];
84      private byte[] blockBuffer;
85      private int recordOffset;
86      private long filepos;
87  
88      /**
89       * TapeInputStream is the raw input.
90       */
91      protected TapeInputStream raw;
92  
93      /** Map of ino -> dirent entry. We can use this to reconstruct full paths. */
94      private final Map<Integer, Dirent> names = new HashMap<>();
95  
96      /** Map of ino -> (directory) entry when we're missing one or more elements in the path. */
97      private final Map<Integer, DumpArchiveEntry> pending = new HashMap<>();
98  
99      /** Queue of (directory) entries where we now have the full path. */
100     private final Queue<DumpArchiveEntry> queue;
101 
102     /**
103      * The encoding to use for file names and labels.
104      */
105     private final ZipEncoding zipEncoding;
106 
107     /**
108      * Constructor using the platform's default encoding for file names.
109      *
110      * @param is stream to read from
111      * @throws ArchiveException on error
112      */
113     public DumpArchiveInputStream(final InputStream is) throws ArchiveException {
114         this(is, null);
115     }
116 
117     /**
118      * Constructs a new instance.
119      *
120      * @param is       stream to read from
121      * @param encoding the encoding to use for file names, use null for the platform's default encoding
122      * @throws ArchiveException on error
123      * @since 1.6
124      */
125     public DumpArchiveInputStream(final InputStream is, final String encoding) throws ArchiveException {
126         super(is, encoding);
127         this.raw = new TapeInputStream(is);
128         this.hasHitEOF = false;
129         this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
130 
131         try {
132             // read header, verify it's a dump archive.
133             final byte[] headerBytes = raw.readRecord();
134 
135             if (!DumpArchiveUtil.verify(headerBytes)) {
136                 throw new UnrecognizedFormatException();
137             }
138 
139             // get summary information
140             summary = new DumpArchiveSummary(headerBytes, this.zipEncoding);
141 
142             // reset buffer with actual block size.
143             raw.resetBlockSize(summary.getNTRec(), summary.isCompressed());
144 
145             // allocate our read buffer.
146             blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE];
147 
148             // skip past CLRI and BITS segments since we don't handle them yet.
149             readCLRI();
150             readBITS();
151         } catch (final IOException e) {
152             throw new ArchiveException(e.getMessage(), (Throwable) e);
153         }
154 
155         // put in a dummy record for the root node.
156         final Dirent root = new Dirent(2, 2, 4, CURRENT_PATH_SEGMENT);
157         names.put(2, root);
158 
159         // use priority based on queue to ensure parent directories are
160         // released first.
161         queue = new PriorityQueue<>(10, (p, q) -> {
162             if (p.getOriginalName() == null || q.getOriginalName() == null) {
163                 return Integer.MAX_VALUE;
164             }
165 
166             return p.getOriginalName().compareTo(q.getOriginalName());
167         });
168     }
169 
170     /**
171      * Closes the stream for this entry.
172      */
173     @Override
174     public void close() throws IOException {
175         if (!isClosed) {
176             isClosed = true;
177             raw.close();
178         }
179     }
180 
181     @Override
182     public long getBytesRead() {
183         return raw.getBytesRead();
184     }
185 
186     @Deprecated
187     @Override
188     public int getCount() {
189         return (int) getBytesRead();
190     }
191 
192     /**
193      * Reads the next entry.
194      *
195      * @return the next entry
196      * @throws IOException on error
197      * @deprecated Use {@link #getNextEntry()}.
198      */
199     @Deprecated
200     public DumpArchiveEntry getNextDumpEntry() throws IOException {
201         return getNextEntry();
202     }
203 
204     @Override
205     public DumpArchiveEntry getNextEntry() throws IOException {
206         DumpArchiveEntry entry = null;
207         String path = null;
208 
209         // is there anything in the queue?
210         if (!queue.isEmpty()) {
211             return queue.remove();
212         }
213 
214         while (entry == null) {
215             if (hasHitEOF) {
216                 return null;
217             }
218 
219             // skip any remaining records in this segment for prior file.
220             // we might still have holes... easiest to do it
221             // block by block. We may want to revisit this if
222             // the unnecessary decompression time adds up.
223             while (readIdx < active.getHeaderCount()) {
224                 if (!active.isSparseRecord(readIdx++) && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) {
225                     throw new EOFException();
226                 }
227             }
228 
229             readIdx = 0;
230             filepos = raw.getBytesRead();
231 
232             byte[] headerBytes = raw.readRecord();
233 
234             if (!DumpArchiveUtil.verify(headerBytes)) {
235                 throw new InvalidFormatException();
236             }
237 
238             active = DumpArchiveEntry.parse(headerBytes);
239 
240             // skip any remaining segments for prior file.
241             while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) {
242                 if (raw.skip((long) DumpArchiveConstants.TP_SIZE * (active.getHeaderCount() - active.getHeaderHoles())) == -1) {
243                     throw new EOFException();
244                 }
245 
246                 filepos = raw.getBytesRead();
247                 headerBytes = raw.readRecord();
248 
249                 if (!DumpArchiveUtil.verify(headerBytes)) {
250                     throw new InvalidFormatException();
251                 }
252 
253                 active = DumpArchiveEntry.parse(headerBytes);
254             }
255 
256             // check if this is an end-of-volume marker.
257             if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) {
258                 hasHitEOF = true;
259 
260                 return null;
261             }
262 
263             entry = active;
264 
265             if (entry.isDirectory()) {
266                 readDirectoryEntry(active);
267 
268                 // now we create an empty InputStream.
269                 entryOffset = 0;
270                 entrySize = 0;
271                 readIdx = active.getHeaderCount();
272             } else {
273                 entryOffset = 0;
274                 entrySize = active.getEntrySize();
275                 readIdx = 0;
276             }
277 
278             recordOffset = readBuf.length;
279 
280             path = getPath(entry);
281 
282             if (path == null) {
283                 entry = null;
284             }
285         }
286 
287         entry.setName(path);
288         entry.setSimpleName(names.get(entry.getIno()).getName());
289         entry.setOffset(filepos);
290 
291         return entry;
292     }
293 
294     /**
295      * Gets full path for specified archive entry, or null if there's a gap.
296      *
297      * @param entry
298      * @return full path for specified archive entry, or null if there's a gap.
299      * @throws DumpArchiveException Infinite loop detected in directory entries.
300      */
301     private String getPath(final DumpArchiveEntry entry) throws DumpArchiveException {
302         // build the stack of elements. It's possible that we're
303         // still missing an intermediate value and if so we
304         final Stack<String> elements = new Stack<>();
305         final BitSet visited = new BitSet();
306         Dirent dirent = null;
307         for (int i = entry.getIno();; i = dirent.getParentIno()) {
308             if (!names.containsKey(i)) {
309                 elements.clear();
310                 break;
311             }
312             if (visited.get(i)) {
313                 throw new DumpArchiveException("Duplicate node " + i);
314             }
315             dirent = names.get(i);
316             visited.set(i);
317             elements.push(dirent.getName());
318             if (dirent.getIno() == dirent.getParentIno()) {
319                 break;
320             }
321         }
322         // if an element is missing defer the work and read next entry.
323         if (elements.isEmpty()) {
324             pending.put(entry.getIno(), entry);
325             return null;
326         }
327         // generate full path from stack of elements.
328         final StringBuilder sb = new StringBuilder(elements.pop());
329         while (!elements.isEmpty()) {
330             sb.append('/');
331             sb.append(elements.pop());
332         }
333         return sb.toString();
334     }
335 
336     /**
337      * Gets the archive summary information.
338      *
339      * @return the summary
340      */
341     public DumpArchiveSummary getSummary() {
342         return summary;
343     }
344 
345     /**
346      * Reads bytes from the current dump archive entry.
347      *
348      * This method is aware of the boundaries of the current entry in the archive and will deal with them as if they were this stream's start and EOF.
349      *
350      * @param buf The buffer into which to place bytes read.
351      * @param off The offset at which to place bytes read.
352      * @param len The number of bytes to read.
353      * @return The number of bytes read, or -1 at EOF.
354      * @throws IOException on error
355      */
356     @Override
357     public int read(final byte[] buf, int off, int len) throws IOException {
358         if (len == 0) {
359             return 0;
360         }
361         int totalRead = 0;
362 
363         if (hasHitEOF || isClosed || entryOffset >= entrySize) {
364             return -1;
365         }
366 
367         if (active == null) {
368             throw new IllegalStateException("No current dump entry");
369         }
370 
371         if (len + entryOffset > entrySize) {
372             len = (int) (entrySize - entryOffset);
373         }
374 
375         while (len > 0) {
376             final int sz = Math.min(len, readBuf.length - recordOffset);
377 
378             // copy any data we have
379             if (recordOffset + sz <= readBuf.length) {
380                 System.arraycopy(readBuf, recordOffset, buf, off, sz);
381                 totalRead += sz;
382                 recordOffset += sz;
383                 len -= sz;
384                 off += sz;
385             }
386 
387             // load next block if necessary.
388             if (len > 0) {
389                 if (readIdx >= 512) {
390                     final byte[] headerBytes = raw.readRecord();
391 
392                     if (!DumpArchiveUtil.verify(headerBytes)) {
393                         throw new InvalidFormatException();
394                     }
395 
396                     active = DumpArchiveEntry.parse(headerBytes);
397                     readIdx = 0;
398                 }
399 
400                 if (!active.isSparseRecord(readIdx++)) {
401                     final int r = raw.read(readBuf, 0, readBuf.length);
402                     if (r != readBuf.length) {
403                         throw new EOFException();
404                     }
405                 } else {
406                     Arrays.fill(readBuf, (byte) 0);
407                 }
408 
409                 recordOffset = 0;
410             }
411         }
412 
413         entryOffset += totalRead;
414 
415         return totalRead;
416     }
417 
418     /**
419      * Reads BITS segment.
420      */
421     private void readBITS() throws IOException {
422         final byte[] buffer = raw.readRecord();
423 
424         if (!DumpArchiveUtil.verify(buffer)) {
425             throw new InvalidFormatException();
426         }
427 
428         active = DumpArchiveEntry.parse(buffer);
429 
430         if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) {
431             throw new InvalidFormatException();
432         }
433 
434         // we don't do anything with this yet.
435         if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) == -1) {
436             throw new EOFException();
437         }
438         readIdx = active.getHeaderCount();
439     }
440 
441     /**
442      * Reads CLRI (deleted inode) segment.
443      */
444     private void readCLRI() throws IOException {
445         final byte[] buffer = raw.readRecord();
446 
447         if (!DumpArchiveUtil.verify(buffer)) {
448             throw new InvalidFormatException();
449         }
450 
451         active = DumpArchiveEntry.parse(buffer);
452 
453         if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) {
454             throw new InvalidFormatException();
455         }
456 
457         // we don't do anything with this yet.
458         if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) == -1) {
459             throw new EOFException();
460         }
461         readIdx = active.getHeaderCount();
462     }
463 
464     /**
465      * Reads directory entry.
466      */
467     private void readDirectoryEntry(DumpArchiveEntry entry) throws IOException {
468         long size = entry.getEntrySize();
469         boolean first = true;
470 
471         while (first || DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType()) {
472             // read the header that we just peeked at.
473             if (!first) {
474                 raw.readRecord();
475             }
476 
477             if (!names.containsKey(entry.getIno()) && DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType()) {
478                 pending.put(entry.getIno(), entry);
479             }
480 
481             final int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount();
482 
483             if (blockBuffer.length < datalen) {
484                 blockBuffer = IOUtils.readRange(raw, datalen);
485                 if (blockBuffer.length != datalen) {
486                     throw new EOFException();
487                 }
488             } else if (raw.read(blockBuffer, 0, datalen) != datalen) {
489                 throw new EOFException();
490             }
491 
492             int reclen = 0;
493 
494             for (int i = 0; i < datalen - 8 && i < size - 8; i += reclen) {
495                 final int ino = DumpArchiveUtil.convert32(blockBuffer, i);
496                 reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4);
497                 if (reclen == 0) {
498                     throw new DumpArchiveException("reclen cannot be 0");
499                 }
500 
501                 final byte type = blockBuffer[i + 6];
502 
503                 final String name = DumpArchiveUtil.decode(zipEncoding, blockBuffer, i + 8, blockBuffer[i + 7]);
504 
505                 if (CURRENT_PATH_SEGMENT.equals(name) || PARENT_PATH_SEGMENT.equals(name)) {
506                     // do nothing...
507                     continue;
508                 }
509 
510                 final Dirent d = new Dirent(ino, entry.getIno(), type, name);
511 
512                 /*
513                  * if ((type == 4) && names.containsKey(ino)) { System.out.println("we already have ino: " + names.get(ino)); }
514                  */
515 
516                 names.put(ino, d);
517 
518                 // check whether this allows us to fill anything in the pending list.
519                 for (final Map.Entry<Integer, DumpArchiveEntry> mapEntry : pending.entrySet()) {
520                     final DumpArchiveEntry v = mapEntry.getValue();
521                     final String path = getPath(v);
522                     if (path != null) {
523                         v.setName(path);
524                         v.setSimpleName(names.get(mapEntry.getKey()).getName());
525                         queue.add(v);
526                     }
527                 }
528 
529                 // remove anything that we found. (We can't do it earlier
530                 // because of concurrent modification exceptions.)
531                 queue.forEach(e -> pending.remove(e.getIno()));
532             }
533 
534             final byte[] peekBytes = raw.peek();
535 
536             if (!DumpArchiveUtil.verify(peekBytes)) {
537                 throw new InvalidFormatException();
538             }
539 
540             entry = DumpArchiveEntry.parse(peekBytes);
541             first = false;
542             size -= DumpArchiveConstants.TP_SIZE;
543         }
544     }
545 
546 }