View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.archivers;
20  
21  import static org.junit.jupiter.api.Assertions.assertEquals;
22  import static org.junit.jupiter.api.Assertions.assertNull;
23  import static org.junit.jupiter.api.Assertions.assertThrows;
24  import static org.junit.jupiter.api.Assertions.assertTrue;
25  import static org.junit.jupiter.api.Assertions.fail;
26  
27  import java.io.BufferedInputStream;
28  import java.io.ByteArrayInputStream;
29  import java.io.ByteArrayOutputStream;
30  import java.io.IOException;
31  import java.io.InputStream;
32  import java.nio.charset.Charset;
33  import java.nio.charset.StandardCharsets;
34  import java.nio.file.Files;
35  import java.nio.file.Path;
36  import java.nio.file.Paths;
37  import java.util.stream.Stream;
38  
39  import org.apache.commons.compress.AbstractTest;
40  import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream;
41  import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
42  import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
43  import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
44  import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
45  import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
46  import org.apache.commons.compress.utils.ByteUtils;
47  import org.apache.commons.io.input.BrokenInputStream;
48  import org.apache.commons.lang3.reflect.FieldUtils;
49  import org.junit.jupiter.api.Test;
50  import org.junit.jupiter.params.ParameterizedTest;
51  import org.junit.jupiter.params.provider.MethodSource;
52  
53  public class ArchiveStreamFactoryTest extends AbstractTest {
54  
55      static class TestData {
56          final String testFile;
57          final String expectedEncoding;
58          final ArchiveStreamFactory fac;
59          final String fieldName;
60          final String type;
61          final boolean hasOutputStream;
62  
63          TestData(final String testFile, final String type, final boolean hasOut, final String expectedEncoding, final ArchiveStreamFactory fac,
64                  final String fieldName) {
65              this.testFile = testFile;
66              this.expectedEncoding = expectedEncoding;
67              this.fac = fac;
68              this.fieldName = fieldName;
69              this.type = type;
70              this.hasOutputStream = hasOut;
71          }
72  
73          @Override
74          public String toString() {
75              return "TestData [testFile=" + testFile + ", expectedEncoding=" + expectedEncoding + ", fac=" + fac + ", fieldName=" + fieldName + ", type=" + type
76                      + ", hasOutputStream=" + hasOutputStream + "]";
77          }
78      }
79  
80      private static final String UNKNOWN = "??";
81  
82      private static final ArchiveStreamFactory FACTORY = ArchiveStreamFactory.DEFAULT;
83  
84      private static final ArchiveStreamFactory FACTORY_UTF8 = new ArchiveStreamFactory(StandardCharsets.UTF_8.name());
85  
86      private static final ArchiveStreamFactory FACTORY_ASCII = new ArchiveStreamFactory(StandardCharsets.US_ASCII.name());
87  
88      private static final ArchiveStreamFactory FACTORY_SET_UTF8 = getFactory(StandardCharsets.UTF_8.name());
89  
90      private static final ArchiveStreamFactory FACTORY_SET_ASCII = getFactory(StandardCharsets.US_ASCII.name());
91  
92      /**
93       * Default encoding if none is provided (not even null). The test currently assumes that the output default is the same as the input default.
94       */
95      private static final String ARJ_DEFAULT;
96      private static final String DUMP_DEFAULT;
97      private static final String ZIP_DEFAULT = getCharsetName(new ZipArchiveInputStream(null));
98      private static final String CPIO_DEFAULT = getCharsetName(new CpioArchiveInputStream(null));
99      private static final String TAR_DEFAULT = getCharsetName(new TarArchiveInputStream(null));
100     private static final String JAR_DEFAULT = getCharsetName(new JarArchiveInputStream(null));
101 
102     static {
103         String dflt;
104         dflt = UNKNOWN;
105         try (ArjArchiveInputStream inputStream = new ArjArchiveInputStream(newInputStream("bla.arj"))) {
106             dflt = getCharsetName(inputStream);
107         } catch (final Exception e) {
108             e.printStackTrace();
109         }
110         ARJ_DEFAULT = dflt;
111         dflt = UNKNOWN;
112         try (DumpArchiveInputStream inputStream = new DumpArchiveInputStream(newInputStream("bla.dump"))) {
113             dflt = getCharsetName(inputStream);
114         } catch (final Exception e) {
115             e.printStackTrace();
116         }
117         DUMP_DEFAULT = dflt;
118     }
119 
120     static final TestData[] TESTS = { new TestData("bla.arj", ArchiveStreamFactory.ARJ, false, ARJ_DEFAULT, FACTORY, "charsetName"),
121             new TestData("bla.arj", ArchiveStreamFactory.ARJ, false, StandardCharsets.UTF_8.name(), FACTORY_UTF8, "charsetName"),
122             new TestData("bla.arj", ArchiveStreamFactory.ARJ, false, StandardCharsets.US_ASCII.name(), FACTORY_ASCII, "charsetName"),
123             new TestData("bla.arj", ArchiveStreamFactory.ARJ, false, StandardCharsets.UTF_8.name(), FACTORY_SET_UTF8, "charsetName"),
124             new TestData("bla.arj", ArchiveStreamFactory.ARJ, false, StandardCharsets.US_ASCII.name(), FACTORY_SET_ASCII, "charsetName"),
125 
126             new TestData("bla.cpio", ArchiveStreamFactory.CPIO, true, CPIO_DEFAULT, FACTORY, "charsetName"),
127             new TestData("bla.cpio", ArchiveStreamFactory.CPIO, true, StandardCharsets.UTF_8.name(), FACTORY_UTF8, "charsetName"),
128             new TestData("bla.cpio", ArchiveStreamFactory.CPIO, true, StandardCharsets.US_ASCII.name(), FACTORY_ASCII, "charsetName"),
129             new TestData("bla.cpio", ArchiveStreamFactory.CPIO, true, StandardCharsets.UTF_8.name(), FACTORY_SET_UTF8, "charsetName"),
130             new TestData("bla.cpio", ArchiveStreamFactory.CPIO, true, StandardCharsets.US_ASCII.name(), FACTORY_SET_ASCII, "charsetName"),
131 
132             new TestData("bla.dump", ArchiveStreamFactory.DUMP, false, DUMP_DEFAULT, FACTORY, "charsetName"),
133             new TestData("bla.dump", ArchiveStreamFactory.DUMP, false, StandardCharsets.UTF_8.name(), FACTORY_UTF8, "charsetName"),
134             new TestData("bla.dump", ArchiveStreamFactory.DUMP, false, StandardCharsets.US_ASCII.name(), FACTORY_ASCII, "charsetName"),
135             new TestData("bla.dump", ArchiveStreamFactory.DUMP, false, StandardCharsets.UTF_8.name(), FACTORY_SET_UTF8, "charsetName"),
136             new TestData("bla.dump", ArchiveStreamFactory.DUMP, false, StandardCharsets.US_ASCII.name(), FACTORY_SET_ASCII, "charsetName"),
137 
138             new TestData("bla.tar", ArchiveStreamFactory.TAR, true, TAR_DEFAULT, FACTORY, "charsetName"),
139             new TestData("bla.tar", ArchiveStreamFactory.TAR, true, StandardCharsets.UTF_8.name(), FACTORY_UTF8, "charsetName"),
140             new TestData("bla.tar", ArchiveStreamFactory.TAR, true, StandardCharsets.US_ASCII.name(), FACTORY_ASCII, "charsetName"),
141             new TestData("bla.tar", ArchiveStreamFactory.TAR, true, StandardCharsets.UTF_8.name(), FACTORY_SET_UTF8, "charsetName"),
142             new TestData("bla.tar", ArchiveStreamFactory.TAR, true, StandardCharsets.US_ASCII.name(), FACTORY_SET_ASCII, "charsetName"),
143 
144             new TestData("bla.jar", ArchiveStreamFactory.JAR, true, JAR_DEFAULT, FACTORY, "charset"),
145             new TestData("bla.jar", ArchiveStreamFactory.JAR, true, StandardCharsets.UTF_8.name(), FACTORY_UTF8, "charset"),
146             new TestData("bla.jar", ArchiveStreamFactory.JAR, true, StandardCharsets.US_ASCII.name(), FACTORY_ASCII, "charset"),
147             new TestData("bla.jar", ArchiveStreamFactory.JAR, true, StandardCharsets.UTF_8.name(), FACTORY_SET_UTF8, "charset"),
148             new TestData("bla.jar", ArchiveStreamFactory.JAR, true, StandardCharsets.US_ASCII.name(), FACTORY_SET_ASCII, "charset"),
149 
150             new TestData("bla.zip", ArchiveStreamFactory.ZIP, true, ZIP_DEFAULT, FACTORY, "charset"),
151             new TestData("bla.zip", ArchiveStreamFactory.ZIP, true, StandardCharsets.UTF_8.name(), FACTORY_UTF8, "charset"),
152             new TestData("bla.zip", ArchiveStreamFactory.ZIP, true, StandardCharsets.US_ASCII.name(), FACTORY_ASCII, "charset"),
153             new TestData("bla.zip", ArchiveStreamFactory.ZIP, true, StandardCharsets.UTF_8.name(), FACTORY_SET_UTF8, "charset"),
154             new TestData("bla.zip", ArchiveStreamFactory.ZIP, true, StandardCharsets.US_ASCII.name(), FACTORY_SET_ASCII, "charset"), };
155 
156     /** equals allowing null. */
157     private static boolean eq(final String exp, final String act) {
158         if (exp == null) {
159             return act == null;
160         }
161         return exp.equals(act);
162     }
163 
164     private static String getCharsetName(final ArchiveInputStream<?> instance) {
165         return instance.getCharset().name();
166     }
167 
168     @SuppressWarnings("deprecation") // test of deprecated method
169     static ArchiveStreamFactory getFactory(final String entryEncoding) {
170         final ArchiveStreamFactory fac = new ArchiveStreamFactory();
171         fac.setEntryEncoding(entryEncoding);
172         return fac;
173     }
174 
175     private static String getFieldAsString(final Object instance, final String name) {
176         if (instance instanceof ArchiveInputStream) {
177             return getCharsetName((ArchiveInputStream<?>) instance);
178         }
179         try {
180             final Object object = FieldUtils.readField(instance, name, true);
181             if (object == null) {
182                 return null;
183             }
184             if (object instanceof String) {
185                 // For example "charsetName"
186                 return (String) object;
187             }
188             if (object instanceof Charset) {
189                 // For example "charset"
190                 return ((Charset) object).name();
191             }
192             // System.out.println("Wrong type: " + object.getClass().getCanonicalName() + " for " + name + " in class " + instance.getClass().getSimpleName());
193             return object.toString();
194         } catch (final IllegalAccessException e) {
195             System.out.println("Cannot find " + name + " in class " + instance.getClass().getSimpleName());
196             return UNKNOWN;
197         }
198     }
199 
200     public static Stream<Path> getIcoPathStream() throws IOException {
201         return Files.walk(Paths.get("src/test/resources/org/apache/commons/compress/ico")).filter(Files::isRegularFile);
202     }
203 
204     private String detect(final String resource) throws IOException, ArchiveException {
205         try (InputStream in = new BufferedInputStream(newInputStream(resource))) {
206             return ArchiveStreamFactory.detect(in);
207         }
208     }
209 
210     @SuppressWarnings("resource")
211     private <T extends ArchiveInputStream<? extends E>, E extends ArchiveEntry> T getInputStream(final String resource, final ArchiveStreamFactory factory)
212             throws IOException, ArchiveException {
213         return factory.createArchiveInputStream(new BufferedInputStream(newInputStream(resource)));
214     }
215 
216     @SuppressWarnings("resource")
217     private <T extends ArchiveInputStream<? extends E>, E extends ArchiveEntry> T getInputStream(final String type, final String resource,
218             final ArchiveStreamFactory factory) throws IOException, ArchiveException {
219         return factory.createArchiveInputStream(type, new BufferedInputStream(newInputStream(resource)));
220     }
221 
222     private <T extends ArchiveOutputStream<? extends E>, E extends ArchiveEntry> T getOutputStream(final String type, final ArchiveStreamFactory factory)
223             throws ArchiveException {
224         return factory.createArchiveOutputStream(type, new ByteArrayOutputStream());
225     }
226 
227     /**
228      * see https://issues.apache.org/jira/browse/COMPRESS-191
229      */
230     @Test
231     public void testAiffFilesAreNoTARs() throws Exception {
232         try (InputStream fis = newInputStream("testAIFF.aif");
233                 InputStream is = new BufferedInputStream(fis)) {
234             final ArchiveException ae = assertThrows(ArchiveException.class, () -> ArchiveStreamFactory.DEFAULT.createArchiveInputStream(is),
235                     "created an input stream for a non-archive");
236             assertTrue(ae.getMessage().startsWith("No Archiver found"));
237         }
238     }
239 
240     @Test
241     public void testCantRead7zFromStream() throws Exception {
242         assertThrows(StreamingNotSupportedException.class, () -> ArchiveStreamFactory.DEFAULT.createArchiveInputStream(ArchiveStreamFactory.SEVEN_Z,
243                 new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY)));
244     }
245 
246     @Test
247     public void testCantWrite7zToStream() throws Exception {
248         assertThrows(StreamingNotSupportedException.class,
249                 () -> ArchiveStreamFactory.DEFAULT.createArchiveOutputStream(ArchiveStreamFactory.SEVEN_Z, new ByteArrayOutputStream()));
250     }
251 
252     @Test
253     public void testCOMPRESS209() throws Exception {
254         try (InputStream fis = newInputStream("testCompress209.doc");
255                 InputStream bis = new BufferedInputStream(fis)) {
256             final ArchiveException ae = assertThrows(ArchiveException.class, () -> ArchiveStreamFactory.DEFAULT.createArchiveInputStream(bis),
257                     "created an input stream for a non-archive");
258             assertTrue(ae.getMessage().startsWith("No Archiver found"));
259         }
260     }
261 
262     @Test
263     public void testDetect() throws Exception {
264         for (final String extension : new String[] { ArchiveStreamFactory.AR, ArchiveStreamFactory.ARJ, ArchiveStreamFactory.CPIO, ArchiveStreamFactory.DUMP,
265                 // Compress doesn't know how to detect JARs, see COMPRESS-91
266                 // ArchiveStreamFactory.JAR,
267                 ArchiveStreamFactory.SEVEN_Z, ArchiveStreamFactory.TAR, ArchiveStreamFactory.ZIP }) {
268             assertEquals(extension, detect("bla." + extension));
269         }
270 
271         final ArchiveException e1 = assertThrows(ArchiveException.class,
272                 () -> ArchiveStreamFactory.detect(new BufferedInputStream(new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY))),
273                 "shouldn't be able to detect empty stream");
274         assertEquals("No Archiver found for the stream signature", e1.getMessage());
275 
276         final IllegalArgumentException e2 = assertThrows(IllegalArgumentException.class, () -> ArchiveStreamFactory.detect(null),
277                 "shouldn't be able to detect null stream");
278         assertEquals("Stream must not be null.", e2.getMessage());
279 
280         final ArchiveException e3 = assertThrows(ArchiveException.class, () -> ArchiveStreamFactory.detect(new BufferedInputStream(new BrokenInputStream())),
281                 "Expected ArchiveException");
282         assertEquals("IOException while reading signature.", e3.getMessage());
283     }
284 
285     /**
286      * Test case for <a href="https://issues.apache.org/jira/browse/COMPRESS-267" >COMPRESS-267</a>.
287      */
288     @Test
289     public void testDetectsAndThrowsFor7z() throws Exception {
290         try (InputStream fis = newInputStream("bla.7z");
291                 InputStream bis = new BufferedInputStream(fis)) {
292             final StreamingNotSupportedException ex = assertThrows(StreamingNotSupportedException.class,
293                     () -> ArchiveStreamFactory.DEFAULT.createArchiveInputStream(bis), "Expected a StreamingNotSupportedException");
294             assertEquals(ArchiveStreamFactory.SEVEN_Z, ex.getFormat());
295         }
296     }
297 
298     @Test
299     public void testEncodingCtor() {
300         ArchiveStreamFactory fac = new ArchiveStreamFactory();
301         assertNull(fac.getEntryEncoding());
302         fac = new ArchiveStreamFactory(null);
303         assertNull(fac.getEntryEncoding());
304         fac = new ArchiveStreamFactory(StandardCharsets.UTF_8.name());
305         assertEquals(StandardCharsets.UTF_8.name(), fac.getEntryEncoding());
306     }
307 
308     @Test
309     @SuppressWarnings("deprecation")
310     public void testEncodingDeprecated() {
311         final ArchiveStreamFactory fac1 = new ArchiveStreamFactory();
312         assertNull(fac1.getEntryEncoding());
313         fac1.setEntryEncoding(StandardCharsets.UTF_8.name());
314         assertEquals(StandardCharsets.UTF_8.name(), fac1.getEntryEncoding());
315         fac1.setEntryEncoding(StandardCharsets.US_ASCII.name());
316         assertEquals(StandardCharsets.US_ASCII.name(), fac1.getEntryEncoding());
317         final ArchiveStreamFactory fac2 = new ArchiveStreamFactory(StandardCharsets.UTF_8.name());
318         assertEquals(StandardCharsets.UTF_8.name(), fac2.getEntryEncoding());
319         fac2.setEntryEncoding(StandardCharsets.US_ASCII.name());
320         assertEquals(StandardCharsets.US_ASCII.name(), fac2.getEntryEncoding());
321     }
322 
323     @Test
324     public void testEncodingInputStream() throws Exception {
325         int failed = 0;
326         for (int i = 1; i <= TESTS.length; i++) {
327             final TestData test = TESTS[i - 1];
328             try (ArchiveInputStream<?> ais = getInputStream(test.type, test.testFile, test.fac)) {
329                 final String field = getCharsetName(ais);
330                 if (!eq(test.expectedEncoding, field)) {
331                     System.err.println("Failed test " + i + ". expected: " + test.expectedEncoding + " actual: " + field + " type: " + test.type);
332                     failed++;
333                 }
334             }
335         }
336         if (failed > 0) {
337             fail("Tests failed: " + failed + " out of " + TESTS.length);
338         }
339     }
340 
341     @Test
342     public void testEncodingInputStreamAutodetect() throws Exception {
343         int failed = 0;
344         for (int i = 1; i <= TESTS.length; i++) {
345             final TestData test = TESTS[i - 1];
346             try (ArchiveInputStream<?> ais = getInputStream(test.testFile, test.fac)) {
347                 final String field = getCharsetName(ais);
348                 if (!eq(test.expectedEncoding, field)) {
349                     System.err.println("Failed test " + i + ". expected: " + test.expectedEncoding + " actual: " + field + " type: " + test.type);
350                     failed++;
351                 }
352             }
353         }
354         if (failed > 0) {
355             fail("Tests failed: " + failed + " out of " + TESTS.length);
356         }
357     }
358 
359     @Test
360     public void testEncodingOutputStream() throws Exception {
361         int failed = 0;
362         for (int i = 1; i <= TESTS.length; i++) {
363             final TestData test = TESTS[i - 1];
364             if (test.hasOutputStream) {
365                 try (ArchiveOutputStream<?> ais = getOutputStream(test.type, test.fac)) {
366                     final String field = getFieldAsString(ais, test.fieldName);
367                     if (!eq(test.expectedEncoding, field)) {
368                         System.err.println("Failed test " + i + ". expected: " + test.expectedEncoding + " actual: " + field + " type: " + test.type);
369                         failed++;
370                     }
371                 }
372             }
373         }
374         if (failed > 0) {
375             fail("Tests failed: " + failed + " out of " + TESTS.length);
376         }
377     }
378 
379     @ParameterizedTest
380     @MethodSource("getIcoPathStream")
381     public void testIcoFilesAreNoTARs(final Path path) throws Exception {
382         try (InputStream fis = Files.newInputStream(path);
383                 InputStream is = new BufferedInputStream(fis)) {
384             final ArchiveException ae = assertThrows(ArchiveException.class, () -> ArchiveStreamFactory.detect(is),
385                     "created an input stream for a non-archive");
386             assertTrue(ae.getMessage().startsWith("No Archiver found"));
387         }
388         try (InputStream fis = Files.newInputStream(path);
389                 InputStream is = new BufferedInputStream(fis)) {
390             final ArchiveException ae = assertThrows(ArchiveException.class, () -> ArchiveStreamFactory.DEFAULT.createArchiveInputStream(is),
391                     "created an input stream for a non-archive");
392             assertTrue(ae.getMessage().startsWith("No Archiver found"));
393         }
394     }
395 
396     /**
397      * See https://issues.apache.org/jira/browse/COMPRESS-171
398      */
399     @Test
400     public void testShortTextFilesAreNoTARs() {
401         final ArchiveException ae = assertThrows(ArchiveException.class,
402                 () -> ArchiveStreamFactory.DEFAULT
403                         .createArchiveInputStream(new ByteArrayInputStream("This certainly is not a tar archive, really, no kidding".getBytes())),
404                 "created an input stream for a non-archive");
405         assertTrue(ae.getMessage().startsWith("No Archiver found"));
406     }
407 
408     /**
409      * Tests case for <a href="https://issues.apache.org/jira/browse/COMPRESS-208" >COMPRESS-208</a>.
410      */
411     @Test
412     public void testSkipsPK00Prefix() throws Exception {
413         try (InputStream fis = newInputStream("COMPRESS-208.zip")) {
414             try (InputStream bis = new BufferedInputStream(fis)) {
415                 try (ArchiveInputStream<?> ais = ArchiveStreamFactory.DEFAULT.createArchiveInputStream(bis)) {
416                     assertTrue(ais instanceof ZipArchiveInputStream);
417                 }
418             }
419         }
420     }
421 }