001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers;
020
021import java.io.ByteArrayInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.OutputStream;
025import java.security.AccessController;
026import java.security.PrivilegedAction;
027import java.util.ArrayList;
028import java.util.Collections;
029import java.util.Iterator;
030import java.util.Locale;
031import java.util.Set;
032import java.util.SortedMap;
033import java.util.TreeMap;
034
035import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
036import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
037import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream;
038import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
039import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream;
040import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
041import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
042import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
043import org.apache.commons.compress.archivers.sevenz.SevenZFile;
044import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
045import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
046import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
047import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
048import org.apache.commons.compress.utils.IOUtils;
049import org.apache.commons.compress.utils.Lists;
050import org.apache.commons.compress.utils.ServiceLoaderIterator;
051import org.apache.commons.compress.utils.Sets;
052
053/**
054 * Factory to create Archive[In|Out]putStreams from names or the first bytes of
055 * the InputStream. In order to add other implementations, you should extend
056 * ArchiveStreamFactory and override the appropriate methods (and call their
057 * implementation from super of course).
058 *
059 * Compressing a ZIP-File:
060 *
061 * <pre>
062 * final OutputStream out = Files.newOutputStream(output.toPath());
063 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out);
064 *
065 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml"));
066 * IOUtils.copy(Files.newInputStream(file1.toPath()), os);
067 * os.closeArchiveEntry();
068 *
069 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml"));
070 * IOUtils.copy(Files.newInputStream(file2.toPath()), os);
071 * os.closeArchiveEntry();
072 * os.close();
073 * </pre>
074 *
075 * Decompressing a ZIP-File:
076 *
077 * <pre>
078 * final InputStream is = Files.newInputStream(input.toPath());
079 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is);
080 * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry();
081 * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName()));
082 * IOUtils.copy(in, out);
083 * out.close();
084 * in.close();
085 * </pre>
086 * @Immutable provided that the deprecated method setEntryEncoding is not used.
087 * @ThreadSafe even if the deprecated method setEntryEncoding is used
088 */
089public class ArchiveStreamFactory implements ArchiveStreamProvider {
090
091    private static final int TAR_HEADER_SIZE = 512;
092
093    private static final int DUMP_SIGNATURE_SIZE = 32;
094
095    private static final int SIGNATURE_SIZE = 12;
096
097    /**
098     * The singleton instance using the platform default encoding.
099     * @since 1.21
100     */
101    public static final ArchiveStreamFactory DEFAULT = new ArchiveStreamFactory();
102
103    /**
104     * Constant (value {@value}) used to identify the AR archive format.
105     * @since 1.1
106     */
107    public static final String AR = "ar";
108
109    /**
110     * Constant (value {@value}) used to identify the ARJ archive format.
111     * Not supported as an output stream type.
112     * @since 1.6
113     */
114    public static final String ARJ = "arj";
115
116    /**
117     * Constant (value {@value}) used to identify the CPIO archive format.
118     * @since 1.1
119     */
120    public static final String CPIO = "cpio";
121
122    /**
123     * Constant (value {@value}) used to identify the Unix DUMP archive format.
124     * Not supported as an output stream type.
125     * @since 1.3
126     */
127    public static final String DUMP = "dump";
128
129    /**
130     * Constant (value {@value}) used to identify the JAR archive format.
131     * @since 1.1
132     */
133    public static final String JAR = "jar";
134
135    /**
136     * Constant used to identify the TAR archive format.
137     * @since 1.1
138     */
139    public static final String TAR = "tar";
140
141    /**
142     * Constant (value {@value}) used to identify the ZIP archive format.
143     * @since 1.1
144     */
145    public static final String ZIP = "zip";
146
147    /**
148     * Constant (value {@value}) used to identify the 7z archive format.
149     * @since 1.8
150     */
151    public static final String SEVEN_Z = "7z";
152
153    /**
154     * Entry encoding, null for the platform default.
155     */
156    private final String encoding;
157
158    /**
159     * Entry encoding, null for the default.
160     */
161    private volatile String entryEncoding;
162
163    private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders;
164
165    private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders;
166
167    private static ArrayList<ArchiveStreamProvider> findArchiveStreamProviders() {
168        return Lists.newArrayList(serviceLoaderIterator());
169    }
170
171    static void putAll(final Set<String> names, final ArchiveStreamProvider provider,
172            final TreeMap<String, ArchiveStreamProvider> map) {
173        for (final String name : names) {
174            map.put(toKey(name), provider);
175        }
176    }
177
178    private static Iterator<ArchiveStreamProvider> serviceLoaderIterator() {
179        return new ServiceLoaderIterator<>(ArchiveStreamProvider.class);
180    }
181
182    private static String toKey(final String name) {
183        return name.toUpperCase(Locale.ROOT);
184    }
185
186    /**
187     * Constructs a new sorted map from input stream provider names to provider
188     * objects.
189     *
190     * <p>
191     * The map returned by this method will have one entry for each provider for
192     * which support is available in the current Java virtual machine. If two or
193     * more supported provider have the same name then the resulting map will
194     * contain just one of them; which one it will contain is not specified.
195     * </p>
196     *
197     * <p>
198     * The invocation of this method, and the subsequent use of the resulting
199     * map, may cause time-consuming disk or network I/O operations to occur.
200     * This method is provided for applications that need to enumerate all of
201     * the available providers, for example to allow user provider selection.
202     * </p>
203     *
204     * <p>
205     * This method may return different results at different times if new
206     * providers are dynamically made available to the current Java virtual
207     * machine.
208     * </p>
209     *
210     * @return An immutable, map from names to provider objects
211     * @since 1.13
212     */
213    public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() {
214        return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> {
215            final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
216            putAll(DEFAULT.getInputStreamArchiveNames(), DEFAULT, map);
217            for (final ArchiveStreamProvider provider : findArchiveStreamProviders()) {
218                putAll(provider.getInputStreamArchiveNames(), provider, map);
219            }
220            return map;
221        });
222    }
223
224    /**
225     * Constructs a new sorted map from output stream provider names to provider
226     * objects.
227     *
228     * <p>
229     * The map returned by this method will have one entry for each provider for
230     * which support is available in the current Java virtual machine. If two or
231     * more supported provider have the same name then the resulting map will
232     * contain just one of them; which one it will contain is not specified.
233     * </p>
234     *
235     * <p>
236     * The invocation of this method, and the subsequent use of the resulting
237     * map, may cause time-consuming disk or network I/O operations to occur.
238     * This method is provided for applications that need to enumerate all of
239     * the available providers, for example to allow user provider selection.
240     * </p>
241     *
242     * <p>
243     * This method may return different results at different times if new
244     * providers are dynamically made available to the current Java virtual
245     * machine.
246     * </p>
247     *
248     * @return An immutable, map from names to provider objects
249     * @since 1.13
250     */
251    public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() {
252        return AccessController.doPrivileged((PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>) () -> {
253            final TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
254            putAll(DEFAULT.getOutputStreamArchiveNames(), DEFAULT, map);
255            for (final ArchiveStreamProvider provider : findArchiveStreamProviders()) {
256                putAll(provider.getOutputStreamArchiveNames(), provider, map);
257            }
258            return map;
259        });
260    }
261
262    /**
263     * Create an instance using the platform default encoding.
264     */
265    public ArchiveStreamFactory() {
266        this(null);
267    }
268
269    /**
270     * Create an instance using the specified encoding.
271     *
272     * @param encoding the encoding to be used.
273     *
274     * @since 1.10
275     */
276    public ArchiveStreamFactory(final String encoding) {
277        this.encoding = encoding;
278        // Also set the original field so can continue to use it.
279        this.entryEncoding = encoding;
280    }
281
282    /**
283     * Returns the encoding to use for arj, jar, zip, dump, cpio and tar
284     * files, or null for the archiver default.
285     *
286     * @return entry encoding, or null for the archiver default
287     * @since 1.5
288     */
289    public String getEntryEncoding() {
290        return entryEncoding;
291    }
292
293    /**
294     * Sets the encoding to use for arj, jar, zip, dump, cpio and tar files. Use null for the archiver default.
295     *
296     * @param entryEncoding the entry encoding, null uses the archiver default.
297     * @since 1.5
298     * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding
299     * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)}
300     * was used to specify the factory encoding.
301     */
302    @Deprecated
303    public void setEntryEncoding(final String entryEncoding) {
304        // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway
305        if (encoding != null) {
306            throw new IllegalStateException("Cannot overide encoding set by the constructor");
307        }
308        this.entryEncoding = entryEncoding;
309    }
310
311    /**
312     * Creates an archive input stream from an archiver name and an input stream.
313     *
314     * @param archiverName the archive name,
315     * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z}
316     * @param in the input stream
317     * @return the archive input stream
318     * @throws ArchiveException if the archiver name is not known
319     * @throws StreamingNotSupportedException if the format cannot be
320     * read from a stream
321     * @throws IllegalArgumentException if the archiver name or stream is null
322     */
323    public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in)
324            throws ArchiveException {
325        return createArchiveInputStream(archiverName, in, entryEncoding);
326    }
327
328    @Override
329    public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in,
330            final String actualEncoding) throws ArchiveException {
331
332        if (archiverName == null) {
333            throw new IllegalArgumentException("Archivername must not be null.");
334        }
335
336        if (in == null) {
337            throw new IllegalArgumentException("InputStream must not be null.");
338        }
339
340        if (AR.equalsIgnoreCase(archiverName)) {
341            return new ArArchiveInputStream(in);
342        }
343        if (ARJ.equalsIgnoreCase(archiverName)) {
344            if (actualEncoding != null) {
345                return new ArjArchiveInputStream(in, actualEncoding);
346            }
347            return new ArjArchiveInputStream(in);
348        }
349        if (ZIP.equalsIgnoreCase(archiverName)) {
350            if (actualEncoding != null) {
351                return new ZipArchiveInputStream(in, actualEncoding);
352            }
353            return new ZipArchiveInputStream(in);
354        }
355        if (TAR.equalsIgnoreCase(archiverName)) {
356            if (actualEncoding != null) {
357                return new TarArchiveInputStream(in, actualEncoding);
358            }
359            return new TarArchiveInputStream(in);
360        }
361        if (JAR.equalsIgnoreCase(archiverName)) {
362            if (actualEncoding != null) {
363                return new JarArchiveInputStream(in, actualEncoding);
364            }
365            return new JarArchiveInputStream(in);
366        }
367        if (CPIO.equalsIgnoreCase(archiverName)) {
368            if (actualEncoding != null) {
369                return new CpioArchiveInputStream(in, actualEncoding);
370            }
371            return new CpioArchiveInputStream(in);
372        }
373        if (DUMP.equalsIgnoreCase(archiverName)) {
374            if (actualEncoding != null) {
375                return new DumpArchiveInputStream(in, actualEncoding);
376            }
377            return new DumpArchiveInputStream(in);
378        }
379        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
380            throw new StreamingNotSupportedException(SEVEN_Z);
381        }
382
383        final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName));
384        if (archiveStreamProvider != null) {
385            return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding);
386        }
387
388        throw new ArchiveException("Archiver: " + archiverName + " not found.");
389    }
390
391    /**
392     * Creates an archive output stream from an archiver name and an output stream.
393     *
394     * @param archiverName the archive name,
395     * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO}
396     * @param out the output stream
397     * @return the archive output stream
398     * @throws ArchiveException if the archiver name is not known
399     * @throws StreamingNotSupportedException if the format cannot be
400     * written to a stream
401     * @throws IllegalArgumentException if the archiver name or stream is null
402     */
403    public ArchiveOutputStream createArchiveOutputStream(final String archiverName, final OutputStream out)
404            throws ArchiveException {
405        return createArchiveOutputStream(archiverName, out, entryEncoding);
406    }
407
408    @Override
409    public ArchiveOutputStream createArchiveOutputStream(
410            final String archiverName, final OutputStream out, final String actualEncoding)
411            throws ArchiveException {
412        if (archiverName == null) {
413            throw new IllegalArgumentException("Archivername must not be null.");
414        }
415        if (out == null) {
416            throw new IllegalArgumentException("OutputStream must not be null.");
417        }
418
419        if (AR.equalsIgnoreCase(archiverName)) {
420            return new ArArchiveOutputStream(out);
421        }
422        if (ZIP.equalsIgnoreCase(archiverName)) {
423            final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out);
424            if (actualEncoding != null) {
425                zip.setEncoding(actualEncoding);
426            }
427            return zip;
428        }
429        if (TAR.equalsIgnoreCase(archiverName)) {
430            if (actualEncoding != null) {
431                return new TarArchiveOutputStream(out, actualEncoding);
432            }
433            return new TarArchiveOutputStream(out);
434        }
435        if (JAR.equalsIgnoreCase(archiverName)) {
436            if (actualEncoding != null) {
437                return new JarArchiveOutputStream(out, actualEncoding);
438            }
439            return new JarArchiveOutputStream(out);
440        }
441        if (CPIO.equalsIgnoreCase(archiverName)) {
442            if (actualEncoding != null) {
443                return new CpioArchiveOutputStream(out, actualEncoding);
444            }
445            return new CpioArchiveOutputStream(out);
446        }
447        if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
448            throw new StreamingNotSupportedException(SEVEN_Z);
449        }
450
451        final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName));
452        if (archiveStreamProvider != null) {
453            return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding);
454        }
455
456        throw new ArchiveException("Archiver: " + archiverName + " not found.");
457    }
458
459    /**
460     * Create an archive input stream from an input stream, autodetecting
461     * the archive type from the first few bytes of the stream. The InputStream
462     * must support marks, like BufferedInputStream.
463     *
464     * @param in the input stream
465     * @return the archive input stream
466     * @throws ArchiveException if the archiver name is not known
467     * @throws StreamingNotSupportedException if the format cannot be
468     * read from a stream
469     * @throws IllegalArgumentException if the stream is null or does not support mark
470     */
471    public ArchiveInputStream createArchiveInputStream(final InputStream in)
472            throws ArchiveException {
473        return createArchiveInputStream(detect(in), in);
474    }
475
476    /**
477     * Try to determine the type of Archiver
478     * @param in input stream
479     * @return type of archiver if found
480     * @throws ArchiveException if an archiver cannot be detected in the stream
481     * @since 1.14
482     */
483    public static String detect(final InputStream in) throws ArchiveException {
484        if (in == null) {
485            throw new IllegalArgumentException("Stream must not be null.");
486        }
487
488        if (!in.markSupported()) {
489            throw new IllegalArgumentException("Mark is not supported.");
490        }
491
492        final byte[] signature = new byte[SIGNATURE_SIZE];
493        in.mark(signature.length);
494        int signatureLength = -1;
495        try {
496            signatureLength = IOUtils.readFully(in, signature);
497            in.reset();
498        } catch (final IOException e) {
499            throw new ArchiveException("IOException while reading signature.", e);
500        }
501
502        if (ZipArchiveInputStream.matches(signature, signatureLength)) {
503            return ZIP;
504        }
505        if (JarArchiveInputStream.matches(signature, signatureLength)) {
506            return JAR;
507        }
508        if (ArArchiveInputStream.matches(signature, signatureLength)) {
509            return AR;
510        }
511        if (CpioArchiveInputStream.matches(signature, signatureLength)) {
512            return CPIO;
513        }
514        if (ArjArchiveInputStream.matches(signature, signatureLength)) {
515            return ARJ;
516        }
517        if (SevenZFile.matches(signature, signatureLength)) {
518            return SEVEN_Z;
519        }
520
521        // Dump needs a bigger buffer to check the signature;
522        final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE];
523        in.mark(dumpsig.length);
524        try {
525            signatureLength = IOUtils.readFully(in, dumpsig);
526            in.reset();
527        } catch (final IOException e) {
528            throw new ArchiveException("IOException while reading dump signature", e);
529        }
530        if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) {
531            return DUMP;
532        }
533
534        // Tar needs an even bigger buffer to check the signature; read the first block
535        final byte[] tarHeader = new byte[TAR_HEADER_SIZE];
536        in.mark(tarHeader.length);
537        try {
538            signatureLength = IOUtils.readFully(in, tarHeader);
539            in.reset();
540        } catch (final IOException e) {
541            throw new ArchiveException("IOException while reading tar signature", e);
542        }
543        if (TarArchiveInputStream.matches(tarHeader, signatureLength)) {
544            return TAR;
545        }
546
547        // COMPRESS-117 - improve auto-recognition
548        if (signatureLength >= TAR_HEADER_SIZE) {
549            TarArchiveInputStream tais = null;
550            try {
551                tais = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader));
552                // COMPRESS-191 - verify the header checksum
553                if (tais.getNextTarEntry().isCheckSumOK()) {
554                    return TAR;
555                }
556            } catch (final Exception e) { // NOPMD NOSONAR
557                // can generate IllegalArgumentException as well
558                // as IOException
559                // autodetection, simply not a TAR
560                // ignored
561            } finally {
562                IOUtils.closeQuietly(tais);
563            }
564        }
565        throw new ArchiveException("No Archiver found for the stream signature");
566    }
567
568    public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() {
569        if (archiveInputStreamProviders == null) {
570            archiveInputStreamProviders = Collections
571                    .unmodifiableSortedMap(findAvailableArchiveInputStreamProviders());
572        }
573        return archiveInputStreamProviders;
574    }
575
576    public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() {
577        if (archiveOutputStreamProviders == null) {
578            archiveOutputStreamProviders = Collections
579                    .unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders());
580        }
581        return archiveOutputStreamProviders;
582    }
583
584    @Override
585    public Set<String> getInputStreamArchiveNames() {
586        return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z);
587    }
588
589    @Override
590    public Set<String> getOutputStreamArchiveNames() {
591        return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z);
592    }
593
594}