View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.archivers;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.io.OutputStream;
25  import java.security.AccessController;
26  import java.security.PrivilegedAction;
27  import java.util.Collections;
28  import java.util.ServiceLoader;
29  import java.util.Set;
30  import java.util.SortedMap;
31  import java.util.TreeMap;
32  
33  import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
34  import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
35  import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream;
36  import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
37  import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream;
38  import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
39  import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
40  import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
41  import org.apache.commons.compress.archivers.sevenz.SevenZFile;
42  import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
43  import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
44  import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
45  import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
46  import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
47  import org.apache.commons.compress.utils.IOUtils;
48  import org.apache.commons.compress.utils.Sets;
49  import org.apache.commons.lang3.StringUtils;
50  
51  /**
52   * Creates an Archive[In|Out]putStreams from names or the first bytes of the InputStream. In order to add other implementations, you should extend
53   * ArchiveStreamFactory and override the appropriate methods (and call their implementation from super of course).
54   *
55   * Compressing a ZIP-File:
56   *
57   * <pre>
58   * final OutputStream out = Files.newOutputStream(output.toPath());
59   * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out);
60   *
61   * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml"));
62   * IOUtils.copy(Files.newInputStream(file1.toPath()), os);
63   * os.closeArchiveEntry();
64   *
65   * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml"));
66   * IOUtils.copy(Files.newInputStream(file2.toPath()), os);
67   * os.closeArchiveEntry();
68   * os.close();
69   * </pre>
70   *
71   * Decompressing a ZIP-File:
72   *
73   * <pre>
74   * final InputStream is = Files.newInputStream(input.toPath());
75   * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is);
76   * ZipArchiveEntry entry = (ZipArchiveEntry) in.getNextEntry();
77   * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName()));
78   * IOUtils.copy(in, out);
79   * out.close();
80   * in.close();
81   * </pre>
82   *
83   * @Immutable provided that the deprecated method setEntryEncoding is not used.
84   * @ThreadSafe even if the deprecated method setEntryEncoding is used
85   */
86  public class ArchiveStreamFactory implements ArchiveStreamProvider {
87  
88      private static final int TAR_HEADER_SIZE = 512;
89  
90      private static final int TAR_TEST_ENTRY_COUNT = 10;
91  
92      private static final int DUMP_SIGNATURE_SIZE = 32;
93  
94      private static final int SIGNATURE_SIZE = 12;
95  
96      /**
97       * The singleton instance using the platform default encoding.
98       *
99       * @since 1.21
100      */
101     public static final ArchiveStreamFactory DEFAULT = new ArchiveStreamFactory();
102 
103     /**
104      * Constant (value {@value}) used to identify the APK archive format.
105      * <p>
106      * APK file extensions are .apk, .xapk, .apks, .apkm
107      * </p>
108      *
109      * @since 1.22
110      */
111     public static final String APK = "apk";
112 
113     /**
114      * Constant (value {@value}) used to identify the XAPK archive format.
115      * <p>
116      * APK file extensions are .apk, .xapk, .apks, .apkm
117      * </p>
118      *
119      * @since 1.22
120      */
121     public static final String XAPK = "xapk";
122 
123     /**
124      * Constant (value {@value}) used to identify the APKS archive format.
125      * <p>
126      * APK file extensions are .apk, .xapk, .apks, .apkm
127      * </p>
128      *
129      * @since 1.22
130      */
131     public static final String APKS = "apks";
132 
133     /**
134      * Constant (value {@value}) used to identify the APKM archive format.
135      * <p>
136      * APK file extensions are .apk, .xapk, .apks, .apkm
137      * </p>
138      *
139      * @since 1.22
140      */
141     public static final String APKM = "apkm";
142 
143     /**
144      * Constant (value {@value}) used to identify the AR archive format.
145      *
146      * @since 1.1
147      */
148     public static final String AR = "ar";
149 
150     /**
151      * Constant (value {@value}) used to identify the ARJ archive format. Not supported as an output stream type.
152      *
153      * @since 1.6
154      */
155     public static final String ARJ = "arj";
156 
157     /**
158      * Constant (value {@value}) used to identify the CPIO archive format.
159      *
160      * @since 1.1
161      */
162     public static final String CPIO = "cpio";
163 
164     /**
165      * Constant (value {@value}) used to identify the Unix DUMP archive format. Not supported as an output stream type.
166      *
167      * @since 1.3
168      */
169     public static final String DUMP = "dump";
170 
171     /**
172      * Constant (value {@value}) used to identify the JAR archive format.
173      *
174      * @since 1.1
175      */
176     public static final String JAR = "jar";
177 
178     /**
179      * Constant used to identify the TAR archive format.
180      *
181      * @since 1.1
182      */
183     public static final String TAR = "tar";
184 
185     /**
186      * Constant (value {@value}) used to identify the ZIP archive format.
187      *
188      * @since 1.1
189      */
190     public static final String ZIP = "zip";
191 
192     /**
193      * Constant (value {@value}) used to identify the 7z archive format.
194      *
195      * @since 1.8
196      */
197     public static final String SEVEN_Z = "7z";
198 
199     private static Iterable<ArchiveStreamProvider> archiveStreamProviderIterable() {
200         return ServiceLoader.load(ArchiveStreamProvider.class, ClassLoader.getSystemClassLoader());
201     }
202 
203     /**
204      * Try to determine the type of Archiver
205      *
206      * @param in input stream
207      * @return type of archiver if found
208      * @throws ArchiveException if an archiver cannot be detected in the stream
209      * @since 1.14
210      */
211     public static String detect(final InputStream in) throws ArchiveException {
212         if (in == null) {
213             throw new IllegalArgumentException("Stream must not be null.");
214         }
215         if (!in.markSupported()) {
216             throw new IllegalArgumentException("Mark is not supported.");
217         }
218         final byte[] signature = new byte[SIGNATURE_SIZE];
219         in.mark(signature.length);
220         int signatureLength = -1;
221         try {
222             signatureLength = IOUtils.readFully(in, signature);
223             in.reset();
224         } catch (final IOException e) {
225             throw new ArchiveException("Failure reading signature.", (Throwable) e);
226         }
227         // For now JAR files are detected as ZIP files.
228         if (ZipArchiveInputStream.matches(signature, signatureLength)) {
229             return ZIP;
230         }
231         // For now JAR files are detected as ZIP files.
232         if (JarArchiveInputStream.matches(signature, signatureLength)) {
233             return JAR;
234         }
235         if (ArArchiveInputStream.matches(signature, signatureLength)) {
236             return AR;
237         }
238         if (CpioArchiveInputStream.matches(signature, signatureLength)) {
239             return CPIO;
240         }
241         if (ArjArchiveInputStream.matches(signature, signatureLength)) {
242             return ARJ;
243         }
244         if (SevenZFile.matches(signature, signatureLength)) {
245             return SEVEN_Z;
246         }
247         // Dump needs a bigger buffer to check the signature;
248         final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE];
249         in.mark(dumpsig.length);
250         try {
251             signatureLength = IOUtils.readFully(in, dumpsig);
252             in.reset();
253         } catch (final IOException e) {
254             throw new ArchiveException("IOException while reading dump signature", (Throwable) e);
255         }
256         if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) {
257             return DUMP;
258         }
259         // Tar needs an even bigger buffer to check the signature; read the first block
260         final byte[] tarHeader = new byte[TAR_HEADER_SIZE];
261         in.mark(tarHeader.length);
262         try {
263             signatureLength = IOUtils.readFully(in, tarHeader);
264             in.reset();
265         } catch (final IOException e) {
266             throw new ArchiveException("IOException while reading tar signature", (Throwable) e);
267         }
268         if (TarArchiveInputStream.matches(tarHeader, signatureLength)) {
269             return TAR;
270         }
271         // COMPRESS-117
272         if (signatureLength >= TAR_HEADER_SIZE) {
273             try (TarArchiveInputStream inputStream = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader))) {
274                 // COMPRESS-191 - verify the header checksum
275                 TarArchiveEntry entry = inputStream.getNextEntry();
276                 // try to find the first non-directory entry within the first 10 entries.
277                 int count = 0;
278                 while (entry != null && entry.isDirectory() && entry.isCheckSumOK() && count++ < TAR_TEST_ENTRY_COUNT) {
279                     entry = inputStream.getNextEntry();
280                 }
281                 if (entry != null && entry.isCheckSumOK() && !entry.isDirectory() && isName(entry.getGroupName()) && isName(entry.getName())
282                         && isName(entry.getUserName()) || count > 0) {
283                     return TAR;
284                 }
285             } catch (final Exception ignored) {
286                 // can generate IllegalArgumentException as well as IOException auto-detection, simply not a TAR ignored
287             }
288         }
289         throw new ArchiveException("No Archiver found for the stream signature");
290     }
291 
292     /**
293      * Constructs a new sorted map from input stream provider names to provider objects.
294      *
295      * <p>
296      * The map returned by this method will have one entry for each provider for which support is available in the current Java virtual machine. If two or more
297      * supported provider have the same name then the resulting map will contain just one of them; which one it will contain is not specified.
298      * </p>
299      *
300      * <p>
301      * The invocation of this method, and the subsequent use of the resulting map, may cause time-consuming disk or network I/O operations to occur. This method
302      * is provided for applications that need to enumerate all of the available providers, for example to allow user provider selection.
303      * </p>
304      *
305      * <p>
306      * This method may return different results at different times if new providers are dynamically made available to the current Java virtual machine.
307      * </p>
308      *
309      * @return An immutable, map from names to provider objects
310      * @since 1.13
311      */
312     public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() {
313         return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> {
314             final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
315             putAll(DEFAULT.getInputStreamArchiveNames(), DEFAULT, map);
316             archiveStreamProviderIterable().forEach(provider -> putAll(provider.getInputStreamArchiveNames(), provider, map));
317             return map;
318         });
319     }
320 
321     /**
322      * Constructs a new sorted map from output stream provider names to provider objects.
323      *
324      * <p>
325      * The map returned by this method will have one entry for each provider for which support is available in the current Java virtual machine. If two or more
326      * supported provider have the same name then the resulting map will contain just one of them; which one it will contain is not specified.
327      * </p>
328      *
329      * <p>
330      * The invocation of this method, and the subsequent use of the resulting map, may cause time-consuming disk or network I/O operations to occur. This method
331      * is provided for applications that need to enumerate all of the available providers, for example to allow user provider selection.
332      * </p>
333      *
334      * <p>
335      * This method may return different results at different times if new providers are dynamically made available to the current Java virtual machine.
336      * </p>
337      *
338      * @return An immutable, map from names to provider objects
339      * @since 1.13
340      */
341     public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() {
342         return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> {
343             final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
344             putAll(DEFAULT.getOutputStreamArchiveNames(), DEFAULT, map);
345             archiveStreamProviderIterable().forEach(provider -> putAll(provider.getOutputStreamArchiveNames(), provider, map));
346             return map;
347         });
348     }
349 
350     private static boolean isName(final String value) {
351         // Expect ASCII https://www.mkssoftware.com/docs/man4/tar.4.asp
352         return value.isEmpty() || value.chars().allMatch(ch -> ch > 31 && ch < 128);
353     }
354 
355     static void putAll(final Set<String> names, final ArchiveStreamProvider provider, final TreeMap<String, ArchiveStreamProvider> map) {
356         names.forEach(name -> map.put(toKey(name), provider));
357     }
358 
359     private static String toKey(final String name) {
360         return StringUtils.toRootUpperCase(name);
361     }
362 
363     /**
364      * Entry encoding, null for the default.
365      */
366     private volatile String entryEncoding;
367 
368     private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders;
369 
370     private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders;
371 
372     /**
373      * Constructs an instance using the platform default encoding.
374      */
375     public ArchiveStreamFactory() {
376         this(null);
377     }
378 
379     /**
380      * Constructs an instance using the specified encoding.
381      *
382      * @param entryEncoding the encoding to be used.
383      * @since 1.10
384      */
385     public ArchiveStreamFactory(final String entryEncoding) {
386         this.entryEncoding = entryEncoding;
387     }
388 
389     /**
390      * Creates an archive input stream from an input stream, autodetecting the archive type from the first few bytes of the stream. The InputStream must support
391      * marks, like BufferedInputStream.
392      *
393      * @param <I> The {@link ArchiveInputStream} type.
394      * @param in  the input stream
395      * @return the archive input stream
396      * @throws ArchiveException               if the archiver name is not known
397      * @throws StreamingNotSupportedException if the format cannot be read from a stream
398      * @throws IllegalArgumentException       if the stream is null or does not support mark
399      */
400     public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final InputStream in) throws ArchiveException {
401         return createArchiveInputStream(detect(in), in);
402     }
403 
404     /**
405      * Creates an archive input stream from an archiver name and an input stream.
406      *
407      * @param <I>          The {@link ArchiveInputStream} type.
408      * @param archiverName the archive name, i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or
409      *                     {@value #SEVEN_Z}
410      * @param in           the input stream
411      * @return the archive input stream
412      * @throws ArchiveException               if the archiver name is not known
413      * @throws StreamingNotSupportedException if the format cannot be read from a stream
414      * @throws IllegalArgumentException       if the archiver name or stream is null
415      */
416     public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final String archiverName, final InputStream in)
417             throws ArchiveException {
418         return createArchiveInputStream(archiverName, in, entryEncoding);
419     }
420 
421     @SuppressWarnings("unchecked")
422     @Override
423     public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final String archiverName, final InputStream in,
424             final String actualEncoding) throws ArchiveException {
425         if (archiverName == null) {
426             throw new IllegalArgumentException("Archiver name must not be null.");
427         }
428         if (in == null) {
429             throw new IllegalArgumentException("InputStream must not be null.");
430         }
431         if (AR.equalsIgnoreCase(archiverName)) {
432             return (I) new ArArchiveInputStream(in);
433         }
434         if (ARJ.equalsIgnoreCase(archiverName)) {
435             if (actualEncoding != null) {
436                 return (I) new ArjArchiveInputStream(in, actualEncoding);
437             }
438             return (I) new ArjArchiveInputStream(in);
439         }
440         if (ZIP.equalsIgnoreCase(archiverName)) {
441             if (actualEncoding != null) {
442                 return (I) new ZipArchiveInputStream(in, actualEncoding);
443             }
444             return (I) new ZipArchiveInputStream(in);
445         }
446         if (TAR.equalsIgnoreCase(archiverName)) {
447             if (actualEncoding != null) {
448                 return (I) new TarArchiveInputStream(in, actualEncoding);
449             }
450             return (I) new TarArchiveInputStream(in);
451         }
452         if (JAR.equalsIgnoreCase(archiverName) || APK.equalsIgnoreCase(archiverName)) {
453             if (actualEncoding != null) {
454                 return (I) new JarArchiveInputStream(in, actualEncoding);
455             }
456             return (I) new JarArchiveInputStream(in);
457         }
458         if (CPIO.equalsIgnoreCase(archiverName)) {
459             if (actualEncoding != null) {
460                 return (I) new CpioArchiveInputStream(in, actualEncoding);
461             }
462             return (I) new CpioArchiveInputStream(in);
463         }
464         if (DUMP.equalsIgnoreCase(archiverName)) {
465             if (actualEncoding != null) {
466                 return (I) new DumpArchiveInputStream(in, actualEncoding);
467             }
468             return (I) new DumpArchiveInputStream(in);
469         }
470         if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
471             throw new StreamingNotSupportedException(SEVEN_Z);
472         }
473         final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName));
474         if (archiveStreamProvider != null) {
475             return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding);
476         }
477         throw new ArchiveException("Archiver: " + archiverName + " not found.");
478     }
479 
480     /**
481      * Creates an archive output stream from an archiver name and an output stream.
482      *
483      * @param <O>          The {@link ArchiveOutputStream} type.
484      * @param archiverName the archive name, i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO}
485      * @param out          the output stream
486      * @return the archive output stream
487      * @throws ArchiveException               if the archiver name is not known
488      * @throws StreamingNotSupportedException if the format cannot be written to a stream
489      * @throws IllegalArgumentException       if the archiver name or stream is null
490      */
491     public <O extends ArchiveOutputStream<? extends ArchiveEntry>> O createArchiveOutputStream(final String archiverName, final OutputStream out)
492             throws ArchiveException {
493         return createArchiveOutputStream(archiverName, out, entryEncoding);
494     }
495 
496     @SuppressWarnings("unchecked")
497     @Override
498     public <O extends ArchiveOutputStream<? extends ArchiveEntry>> O createArchiveOutputStream(final String archiverName, final OutputStream out,
499             final String actualEncoding) throws ArchiveException {
500         if (archiverName == null) {
501             throw new IllegalArgumentException("Archiver name must not be null.");
502         }
503         if (out == null) {
504             throw new IllegalArgumentException("OutputStream must not be null.");
505         }
506         if (AR.equalsIgnoreCase(archiverName)) {
507             return (O) new ArArchiveOutputStream(out);
508         }
509         if (ZIP.equalsIgnoreCase(archiverName)) {
510             final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out);
511             if (actualEncoding != null) {
512                 zip.setEncoding(actualEncoding);
513             }
514             return (O) zip;
515         }
516         if (TAR.equalsIgnoreCase(archiverName)) {
517             if (actualEncoding != null) {
518                 return (O) new TarArchiveOutputStream(out, actualEncoding);
519             }
520             return (O) new TarArchiveOutputStream(out);
521         }
522         if (JAR.equalsIgnoreCase(archiverName)) {
523             if (actualEncoding != null) {
524                 return (O) new JarArchiveOutputStream(out, actualEncoding);
525             }
526             return (O) new JarArchiveOutputStream(out);
527         }
528         if (CPIO.equalsIgnoreCase(archiverName)) {
529             if (actualEncoding != null) {
530                 return (O) new CpioArchiveOutputStream(out, actualEncoding);
531             }
532             return (O) new CpioArchiveOutputStream(out);
533         }
534         if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
535             throw new StreamingNotSupportedException(SEVEN_Z);
536         }
537         final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName));
538         if (archiveStreamProvider != null) {
539             return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding);
540         }
541         throw new ArchiveException("Archiver: " + archiverName + " not found.");
542     }
543 
544     /**
545      * Gets an unmodifiable sorted map from input stream provider names to provider objects.
546      *
547      * @return an unmodifiable sorted map of from input stream provider names to provider objects.
548      * @see #findAvailableArchiveInputStreamProviders()
549      */
550     public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() {
551         if (archiveInputStreamProviders == null) {
552             archiveInputStreamProviders = Collections.unmodifiableSortedMap(findAvailableArchiveInputStreamProviders());
553         }
554         return archiveInputStreamProviders;
555     }
556 
557     /**
558      * Gets an unmodifiable sorted map from output stream provider names to provider objects.
559      *
560      * @return an unmodifiable sorted map of from input stream provider names to provider objects.
561      * @see #findAvailableArchiveInputStreamProviders()
562      */
563     public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() {
564         if (archiveOutputStreamProviders == null) {
565             archiveOutputStreamProviders = Collections.unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders());
566         }
567         return archiveOutputStreamProviders;
568     }
569 
570     /**
571      * Gets the encoding to use for arj, jar, ZIP, dump, cpio and tar files, or null for the archiver default.
572      *
573      * @return entry encoding, or null for the archiver default
574      * @since 1.5
575      */
576     public String getEntryEncoding() {
577         return entryEncoding;
578     }
579 
580     @Override
581     public Set<String> getInputStreamArchiveNames() {
582         return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z);
583     }
584 
585     @Override
586     public Set<String> getOutputStreamArchiveNames() {
587         return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z);
588     }
589 
590     /**
591      * Sets the encoding to use for arj, jar, ZIP, dump, cpio and tar files. Use null for the archiver default.
592      *
593      * @param entryEncoding the entry encoding, null uses the archiver default.
594      * @since 1.5
595      * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding
596      */
597     @Deprecated
598     public void setEntryEncoding(final String entryEncoding) {
599         this.entryEncoding = entryEncoding;
600     }
601 
602 }