001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers;
020
021import java.io.ByteArrayInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.OutputStream;
025import java.security.AccessController;
026import java.security.PrivilegedAction;
027import java.util.Collections;
028import java.util.ServiceLoader;
029import java.util.Set;
030import java.util.SortedMap;
031import java.util.TreeMap;
032
033import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
034import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
035import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream;
036import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
037import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream;
038import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
039import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
040import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
041import org.apache.commons.compress.archivers.sevenz.SevenZFile;
042import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
043import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
044import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
045import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
046import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
047import org.apache.commons.compress.utils.IOUtils;
048import org.apache.commons.compress.utils.Sets;
049import org.apache.commons.lang3.StringUtils;
050
051/**
052 * Creates an Archive[In|Out]putStreams from names or the first bytes of the InputStream. In order to add other implementations, you should extend
053 * ArchiveStreamFactory and override the appropriate methods (and call their implementation from super of course).
054 *
055 * Compressing a ZIP-File:
056 *
057 * <pre>
058 * final OutputStream out = Files.newOutputStream(output.toPath());
059 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out);
060 *
061 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml"));
062 * IOUtils.copy(Files.newInputStream(file1.toPath()), os);
063 * os.closeArchiveEntry();
064 *
065 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml"));
066 * IOUtils.copy(Files.newInputStream(file2.toPath()), os);
067 * os.closeArchiveEntry();
068 * os.close();
069 * </pre>
070 *
071 * Decompressing a ZIP-File:
072 *
073 * <pre>
074 * final InputStream is = Files.newInputStream(input.toPath());
075 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is);
076 * ZipArchiveEntry entry = (ZipArchiveEntry) in.getNextEntry();
077 * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName()));
078 * IOUtils.copy(in, out);
079 * out.close();
080 * in.close();
081 * </pre>
082 *
083 * @Immutable provided that the deprecated method setEntryEncoding is not used.
084 * @ThreadSafe even if the deprecated method setEntryEncoding is used
085 */
086public class ArchiveStreamFactory implements ArchiveStreamProvider {
087
088    private static final int TAR_HEADER_SIZE = 512;
089
090    private static final int TAR_TEST_ENTRY_COUNT = 10;
091
092    private static final int DUMP_SIGNATURE_SIZE = 32;
093
094    private static final int SIGNATURE_SIZE = 12;
095
096    /**
097     * The singleton instance using the platform default encoding.
098     *
099     * @since 1.21
100     */
101    public static final ArchiveStreamFactory DEFAULT = new ArchiveStreamFactory();
102
103    /**
104     * Constant (value {@value}) used to identify the APK archive format.
105     * <p>
106     * APK file extensions are .apk, .xapk, .apks, .apkm
107     * </p>
108     *
109     * @since 1.22
110     */
111    public static final String APK = "apk";
112
113    /**
114     * Constant (value {@value}) used to identify the XAPK archive format.
115     * <p>
116     * APK file extensions are .apk, .xapk, .apks, .apkm
117     * </p>
118     *
119     * @since 1.22
120     */
121    public static final String XAPK = "xapk";
122
123    /**
124     * Constant (value {@value}) used to identify the APKS archive format.
125     * <p>
126     * APK file extensions are .apk, .xapk, .apks, .apkm
127     * </p>
128     *
129     * @since 1.22
130     */
131    public static final String APKS = "apks";
132
133    /**
134     * Constant (value {@value}) used to identify the APKM archive format.
135     * <p>
136     * APK file extensions are .apk, .xapk, .apks, .apkm
137     * </p>
138     *
139     * @since 1.22
140     */
141    public static final String APKM = "apkm";
142
143    /**
144     * Constant (value {@value}) used to identify the AR archive format.
145     *
146     * @since 1.1
147     */
148    public static final String AR = "ar";
149
150    /**
151     * Constant (value {@value}) used to identify the ARJ archive format. Not supported as an output stream type.
152     *
153     * @since 1.6
154     */
155    public static final String ARJ = "arj";
156
157    /**
158     * Constant (value {@value}) used to identify the CPIO archive format.
159     *
160     * @since 1.1
161     */
162    public static final String CPIO = "cpio";
163
164    /**
165     * Constant (value {@value}) used to identify the Unix DUMP archive format. Not supported as an output stream type.
166     *
167     * @since 1.3
168     */
169    public static final String DUMP = "dump";
170
171    /**
172     * Constant (value {@value}) used to identify the JAR archive format.
173     *
174     * @since 1.1
175     */
176    public static final String JAR = "jar";
177
178    /**
179     * Constant used to identify the TAR archive format.
180     *
181     * @since 1.1
182     */
183    public static final String TAR = "tar";
184
185    /**
186     * Constant (value {@value}) used to identify the ZIP archive format.
187     *
188     * @since 1.1
189     */
190    public static final String ZIP = "zip";
191
192    /**
193     * Constant (value {@value}) used to identify the 7z archive format.
194     *
195     * @since 1.8
196     */
197    public static final String SEVEN_Z = "7z";
198
199    private static Iterable<ArchiveStreamProvider> archiveStreamProviderIterable() {
200        return ServiceLoader.load(ArchiveStreamProvider.class, ClassLoader.getSystemClassLoader());
201    }
202
203    /**
204     * Try to determine the type of Archiver
205     *
206     * @param in input stream
207     * @return type of archiver if found
208     * @throws ArchiveException if an archiver cannot be detected in the stream
209     * @since 1.14
210     */
211    public static String detect(final InputStream in) throws ArchiveException {
212        if (in == null) {
213            throw new IllegalArgumentException("Stream must not be null.");
214        }
215        if (!in.markSupported()) {
216            throw new IllegalArgumentException("Mark is not supported.");
217        }
218        final byte[] signature = new byte[SIGNATURE_SIZE];
219        in.mark(signature.length);
220        int signatureLength = -1;
221        try {
222            signatureLength = IOUtils.readFully(in, signature);
223            in.reset();
224        } catch (final IOException e) {
225            throw new ArchiveException("Failure reading signature.", (Throwable) e);
226        }
227        // For now JAR files are detected as ZIP files.
228        if (ZipArchiveInputStream.matches(signature, signatureLength)) {
229            return ZIP;
230        }
231        // For now JAR files are detected as ZIP files.
232        if (JarArchiveInputStream.matches(signature, signatureLength)) {
233            return JAR;
234        }
235        if (ArArchiveInputStream.matches(signature, signatureLength)) {
236            return AR;
237        }
238        if (CpioArchiveInputStream.matches(signature, signatureLength)) {
239            return CPIO;
240        }
241        if (ArjArchiveInputStream.matches(signature, signatureLength)) {
242            return ARJ;
243        }
244        if (SevenZFile.matches(signature, signatureLength)) {
245            return SEVEN_Z;
246        }
247        // Dump needs a bigger buffer to check the signature;
248        final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE];
249        in.mark(dumpsig.length);
250        try {
251            signatureLength = IOUtils.readFully(in, dumpsig);
252            in.reset();
253        } catch (final IOException e) {
254            throw new ArchiveException("IOException while reading dump signature", (Throwable) e);
255        }
256        if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) {
257            return DUMP;
258        }
259        // Tar needs an even bigger buffer to check the signature; read the first block
260        final byte[] tarHeader = new byte[TAR_HEADER_SIZE];
261        in.mark(tarHeader.length);
262        try {
263            signatureLength = IOUtils.readFully(in, tarHeader);
264            in.reset();
265        } catch (final IOException e) {
266            throw new ArchiveException("IOException while reading tar signature", (Throwable) e);
267        }
268        if (TarArchiveInputStream.matches(tarHeader, signatureLength)) {
269            return TAR;
270        }
271        // COMPRESS-117
272        if (signatureLength >= TAR_HEADER_SIZE) {
273            try (TarArchiveInputStream inputStream = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader))) {
274                // COMPRESS-191 - verify the header checksum
275                TarArchiveEntry entry = inputStream.getNextEntry();
276                // try to find the first non-directory entry within the first 10 entries.
277                int count = 0;
278                while (entry != null && entry.isDirectory() && entry.isCheckSumOK() && count++ < TAR_TEST_ENTRY_COUNT) {
279                    entry = inputStream.getNextEntry();
280                }
281                if (entry != null && entry.isCheckSumOK() && !entry.isDirectory() && isName(entry.getGroupName()) && isName(entry.getName())
282                        && isName(entry.getUserName()) || count > 0) {
283                    return TAR;
284                }
285            } catch (final Exception ignored) {
286                // can generate IllegalArgumentException as well as IOException auto-detection, simply not a TAR ignored
287            }
288        }
289        throw new ArchiveException("No Archiver found for the stream signature");
290    }
291
292    /**
293     * Constructs a new sorted map from input stream provider names to provider objects.
294     *
295     * <p>
296     * The map returned by this method will have one entry for each provider for which support is available in the current Java virtual machine. If two or more
297     * supported provider have the same name then the resulting map will contain just one of them; which one it will contain is not specified.
298     * </p>
299     *
300     * <p>
301     * The invocation of this method, and the subsequent use of the resulting map, may cause time-consuming disk or network I/O operations to occur. This method
302     * is provided for applications that need to enumerate all of the available providers, for example to allow user provider selection.
303     * </p>
304     *
305     * <p>
306     * This method may return different results at different times if new providers are dynamically made available to the current Java virtual machine.
307     * </p>
308     *
309     * @return An immutable, map from names to provider objects
310     * @since 1.13
311     */
312    public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() {
313        return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> {
314            final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
315            putAll(DEFAULT.getInputStreamArchiveNames(), DEFAULT, map);
316            archiveStreamProviderIterable().forEach(provider -> putAll(provider.getInputStreamArchiveNames(), provider, map));
317            return map;
318        });
319    }
320
321    /**
322     * Constructs a new sorted map from output stream provider names to provider objects.
323     *
324     * <p>
325     * The map returned by this method will have one entry for each provider for which support is available in the current Java virtual machine. If two or more
326     * supported provider have the same name then the resulting map will contain just one of them; which one it will contain is not specified.
327     * </p>
328     *
329     * <p>
330     * The invocation of this method, and the subsequent use of the resulting map, may cause time-consuming disk or network I/O operations to occur. This method
331     * is provided for applications that need to enumerate all of the available providers, for example to allow user provider selection.
332     * </p>
333     *
334     * <p>
335     * This method may return different results at different times if new providers are dynamically made available to the current Java virtual machine.
336     * </p>
337     *
338     * @return An immutable, map from names to provider objects
339     * @since 1.13
340     */
341    public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() {
342        return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> {
343            final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
344            putAll(DEFAULT.getOutputStreamArchiveNames(), DEFAULT, map);
345            archiveStreamProviderIterable().forEach(provider -> putAll(provider.getOutputStreamArchiveNames(), provider, map));
346            return map;
347        });
348    }
349
350    private static boolean isName(final String value) {
351        // Expect ASCII https://www.mkssoftware.com/docs/man4/tar.4.asp
352        return value.isEmpty() || value.chars().allMatch(ch -> ch > 31 && ch < 128);
353    }
354
355    static void putAll(final Set<String> names, final ArchiveStreamProvider provider, final TreeMap<String, ArchiveStreamProvider> map) {
356        names.forEach(name -> map.put(toKey(name), provider));
357    }
358
359    private static String toKey(final String name) {
360        return StringUtils.toRootUpperCase(name);
361    }
362
363    /**
364     * Entry encoding, null for the default.
365     */
366    private volatile String entryEncoding;
367
368    private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders;
369
370    private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders;
371
372    /**
373     * Constructs an instance using the platform default encoding.
374     */
375    public ArchiveStreamFactory() {
376        this(null);
377    }
378
379    /**
380     * Constructs an instance using the specified encoding.
381     *
382     * @param entryEncoding the encoding to be used.
383     * @since 1.10
384     */
385    public ArchiveStreamFactory(final String entryEncoding) {
386        this.entryEncoding = entryEncoding;
387    }
388
389    /**
390     * Creates an archive input stream from an input stream, autodetecting the archive type from the first few bytes of the stream. The InputStream must support
391     * marks, like BufferedInputStream.
392     *
393     * @param <I> The {@link ArchiveInputStream} type.
394     * @param in  the input stream
395     * @return the archive input stream
396     * @throws ArchiveException               if the archiver name is not known
397     * @throws StreamingNotSupportedException if the format cannot be read from a stream
398     * @throws IllegalArgumentException       if the stream is null or does not support mark
399     */
400    public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final InputStream in) throws ArchiveException {
401        return createArchiveInputStream(detect(in), in);
402    }
403
404    /**
405     * Creates an archive input stream from an archiver name and an input stream.
406     *
407     * @param <I>          The {@link ArchiveInputStream} type.
408     * @param archiverName the archive name, i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or
409     *                     {@value #SEVEN_Z}
410     * @param in           the input stream
411     * @return the archive input stream
412     * @throws ArchiveException               if the archiver name is not known
413     * @throws StreamingNotSupportedException if the format cannot be read from a stream
414     * @throws IllegalArgumentException       if the archiver name or stream is null
415     */
416    public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final String archiverName, final InputStream in)
417            throws ArchiveException {
418        return createArchiveInputStream(archiverName, in, entryEncoding);
419    }
420
421    @SuppressWarnings("unchecked")
422    @Override
423    public <I extends ArchiveInputStream<? extends ArchiveEntry>> I createArchiveInputStream(final String archiverName, final InputStream in,
424            final String actualEncoding) throws ArchiveException {
425        if (archiverName == null) {
426            throw new IllegalArgumentException("Archiver name must not be null.");
427        }
428        if (in == null) {
429            throw new IllegalArgumentException("InputStream must not be null.");
430        }
431        if (AR.equalsIgnoreCase(archiverName)) {
432            return (I) new ArArchiveInputStream(in);
433        }
434        if (ARJ.equalsIgnoreCase(archiverName)) {
435            if (actualEncoding != null) {
436                return (I) new ArjArchiveInputStream(in, actualEncoding);
437            }
438            return (I) new ArjArchiveInputStream(in);
439        }
440        if (ZIP.equalsIgnoreCase(archiverName)) {
441            if (actualEncoding != null) {
442                return (I) new ZipArchiveInputStream(in, actualEncoding);
443            }
444            return (I) new ZipArchiveInputStream(in);
445        }
446        if (TAR.equalsIgnoreCase(archiverName)) {
447            if (actualEncoding != null) {
448                return (I) new TarArchiveInputStream(in, actualEncoding);
449            }
450            return (I) new TarArchiveInputStream(in);
451        }
452        if (JAR.equalsIgnoreCase(archiverName) || APK.equalsIgnoreCase(archiverName)) {
453            if (actualEncoding != null) {
454                return (I) new JarArchiveInputStream(in, actualEncoding);
455            }
456            return (I) new JarArchiveInputStream(in);
457        }
458        if (CPIO.equalsIgnoreCase(archiverName)) {
459            if (actualEncoding != null) {
460                return (I) new CpioArchiveInputStream(in, actualEncoding);
461            }
462            return (I) new CpioArchiveInputStream(in);
463        }
464        if (DUMP.equalsIgnoreCase(archiverName)) {
465            if (actualEncoding != null) {
466                return (I) new DumpArchiveInputStream(in, actualEncoding);
467            }
468            return (I) new DumpArchiveInputStream(in);
469        }
470        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
471            throw new StreamingNotSupportedException(SEVEN_Z);
472        }
473        final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName));
474        if (archiveStreamProvider != null) {
475            return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding);
476        }
477        throw new ArchiveException("Archiver: " + archiverName + " not found.");
478    }
479
480    /**
481     * Creates an archive output stream from an archiver name and an output stream.
482     *
483     * @param <O>          The {@link ArchiveOutputStream} type.
484     * @param archiverName the archive name, i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO}
485     * @param out          the output stream
486     * @return the archive output stream
487     * @throws ArchiveException               if the archiver name is not known
488     * @throws StreamingNotSupportedException if the format cannot be written to a stream
489     * @throws IllegalArgumentException       if the archiver name or stream is null
490     */
491    public <O extends ArchiveOutputStream<? extends ArchiveEntry>> O createArchiveOutputStream(final String archiverName, final OutputStream out)
492            throws ArchiveException {
493        return createArchiveOutputStream(archiverName, out, entryEncoding);
494    }
495
496    @SuppressWarnings("unchecked")
497    @Override
498    public <O extends ArchiveOutputStream<? extends ArchiveEntry>> O createArchiveOutputStream(final String archiverName, final OutputStream out,
499            final String actualEncoding) throws ArchiveException {
500        if (archiverName == null) {
501            throw new IllegalArgumentException("Archiver name must not be null.");
502        }
503        if (out == null) {
504            throw new IllegalArgumentException("OutputStream must not be null.");
505        }
506        if (AR.equalsIgnoreCase(archiverName)) {
507            return (O) new ArArchiveOutputStream(out);
508        }
509        if (ZIP.equalsIgnoreCase(archiverName)) {
510            final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out);
511            if (actualEncoding != null) {
512                zip.setEncoding(actualEncoding);
513            }
514            return (O) zip;
515        }
516        if (TAR.equalsIgnoreCase(archiverName)) {
517            if (actualEncoding != null) {
518                return (O) new TarArchiveOutputStream(out, actualEncoding);
519            }
520            return (O) new TarArchiveOutputStream(out);
521        }
522        if (JAR.equalsIgnoreCase(archiverName)) {
523            if (actualEncoding != null) {
524                return (O) new JarArchiveOutputStream(out, actualEncoding);
525            }
526            return (O) new JarArchiveOutputStream(out);
527        }
528        if (CPIO.equalsIgnoreCase(archiverName)) {
529            if (actualEncoding != null) {
530                return (O) new CpioArchiveOutputStream(out, actualEncoding);
531            }
532            return (O) new CpioArchiveOutputStream(out);
533        }
534        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
535            throw new StreamingNotSupportedException(SEVEN_Z);
536        }
537        final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName));
538        if (archiveStreamProvider != null) {
539            return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding);
540        }
541        throw new ArchiveException("Archiver: " + archiverName + " not found.");
542    }
543
544    /**
545     * Gets an unmodifiable sorted map from input stream provider names to provider objects.
546     *
547     * @return an unmodifiable sorted map of from input stream provider names to provider objects.
548     * @see #findAvailableArchiveInputStreamProviders()
549     */
550    public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() {
551        if (archiveInputStreamProviders == null) {
552            archiveInputStreamProviders = Collections.unmodifiableSortedMap(findAvailableArchiveInputStreamProviders());
553        }
554        return archiveInputStreamProviders;
555    }
556
557    /**
558     * Gets an unmodifiable sorted map from output stream provider names to provider objects.
559     *
560     * @return an unmodifiable sorted map of from input stream provider names to provider objects.
561     * @see #findAvailableArchiveInputStreamProviders()
562     */
563    public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() {
564        if (archiveOutputStreamProviders == null) {
565            archiveOutputStreamProviders = Collections.unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders());
566        }
567        return archiveOutputStreamProviders;
568    }
569
570    /**
571     * Gets the encoding to use for arj, jar, ZIP, dump, cpio and tar files, or null for the archiver default.
572     *
573     * @return entry encoding, or null for the archiver default
574     * @since 1.5
575     */
576    public String getEntryEncoding() {
577        return entryEncoding;
578    }
579
580    @Override
581    public Set<String> getInputStreamArchiveNames() {
582        return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z);
583    }
584
585    @Override
586    public Set<String> getOutputStreamArchiveNames() {
587        return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z);
588    }
589
590    /**
591     * Sets the encoding to use for arj, jar, ZIP, dump, cpio and tar files. Use null for the archiver default.
592     *
593     * @param entryEncoding the entry encoding, null uses the archiver default.
594     * @since 1.5
595     * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding
596     */
597    @Deprecated
598    public void setEntryEncoding(final String entryEncoding) {
599        this.entryEncoding = entryEncoding;
600    }
601
602}