View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.archivers;
20  
21  import static org.junit.jupiter.api.Assertions.assertEquals;
22  import static org.junit.jupiter.api.Assertions.assertInstanceOf;
23  import static org.junit.jupiter.api.Assertions.assertNull;
24  import static org.junit.jupiter.api.Assertions.assertThrows;
25  import static org.junit.jupiter.api.Assertions.assertTrue;
26  import static org.junit.jupiter.api.Assertions.fail;
27  
28  import java.io.BufferedInputStream;
29  import java.io.ByteArrayInputStream;
30  import java.io.ByteArrayOutputStream;
31  import java.io.IOException;
32  import java.io.InputStream;
33  import java.nio.charset.Charset;
34  import java.nio.charset.StandardCharsets;
35  import java.nio.file.Files;
36  import java.nio.file.Path;
37  import java.nio.file.Paths;
38  import java.util.Objects;
39  import java.util.stream.Stream;
40  
41  import org.apache.commons.compress.AbstractTest;
42  import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream;
43  import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
44  import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
45  import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
46  import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
47  import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
48  import org.apache.commons.compress.utils.ByteUtils;
49  import org.apache.commons.io.input.BrokenInputStream;
50  import org.apache.commons.lang3.reflect.FieldUtils;
51  import org.junit.jupiter.api.Test;
52  import org.junit.jupiter.params.ParameterizedTest;
53  import org.junit.jupiter.params.provider.MethodSource;
54  
55  class ArchiveStreamFactoryTest extends AbstractTest {
56  
57      static class TestData {
58          final String testFile;
59          final String expectedEncoding;
60          final ArchiveStreamFactory fac;
61          final String fieldName;
62          final String type;
63          final boolean hasOutputStream;
64  
65          TestData(final String testFile, final String type, final boolean hasOut, final String expectedEncoding, final ArchiveStreamFactory fac,
66                  final String fieldName) {
67              this.testFile = testFile;
68              this.expectedEncoding = expectedEncoding;
69              this.fac = fac;
70              this.fieldName = fieldName;
71              this.type = type;
72              this.hasOutputStream = hasOut;
73          }
74  
75          @Override
76          public String toString() {
77              return "TestData [testFile=" + testFile + ", expectedEncoding=" + expectedEncoding + ", fac=" + fac + ", fieldName=" + fieldName + ", type=" + type
78                      + ", hasOutputStream=" + hasOutputStream + "]";
79          }
80      }
81  
82      private static final String UNKNOWN = "??";
83  
84      private static final ArchiveStreamFactory FACTORY = ArchiveStreamFactory.DEFAULT;
85  
86      private static final ArchiveStreamFactory FACTORY_UTF8 = new ArchiveStreamFactory(StandardCharsets.UTF_8.name());
87  
88      private static final ArchiveStreamFactory FACTORY_ASCII = new ArchiveStreamFactory(StandardCharsets.US_ASCII.name());
89  
90      private static final ArchiveStreamFactory FACTORY_SET_UTF8 = getFactory(StandardCharsets.UTF_8.name());
91  
92      private static final ArchiveStreamFactory FACTORY_SET_ASCII = getFactory(StandardCharsets.US_ASCII.name());
93  
94      /**
95       * Default encoding if none is provided (not even null). The test currently assumes that the output default is the same as the input default.
96       */
97      private static final String ARJ_DEFAULT;
98      private static final String DUMP_DEFAULT;
99      private static final String ZIP_DEFAULT = getCharsetName(new ZipArchiveInputStream(null));
100     private static final String CPIO_DEFAULT = getCharsetName(new CpioArchiveInputStream(null));
101     private static final String TAR_DEFAULT = getCharsetName(new TarArchiveInputStream(null));
102     private static final String JAR_DEFAULT = getCharsetName(new JarArchiveInputStream(null));
103 
104     static {
105         String dflt;
106         dflt = UNKNOWN;
107         try (ArjArchiveInputStream inputStream = new ArjArchiveInputStream(newInputStream("bla.arj"))) {
108             dflt = getCharsetName(inputStream);
109         } catch (final Exception e) {
110             e.printStackTrace();
111         }
112         ARJ_DEFAULT = dflt;
113         dflt = UNKNOWN;
114         try (DumpArchiveInputStream inputStream = new DumpArchiveInputStream(newInputStream("bla.dump"))) {
115             dflt = getCharsetName(inputStream);
116         } catch (final Exception e) {
117             e.printStackTrace();
118         }
119         DUMP_DEFAULT = dflt;
120     }
121 
122     static final TestData[] TESTS = { new TestData("bla.arj", ArchiveStreamFactory.ARJ, false, ARJ_DEFAULT, FACTORY, "charsetName"),
123             new TestData("bla.arj", ArchiveStreamFactory.ARJ, false, StandardCharsets.UTF_8.name(), FACTORY_UTF8, "charsetName"),
124             new TestData("bla.arj", ArchiveStreamFactory.ARJ, false, StandardCharsets.US_ASCII.name(), FACTORY_ASCII, "charsetName"),
125             new TestData("bla.arj", ArchiveStreamFactory.ARJ, false, StandardCharsets.UTF_8.name(), FACTORY_SET_UTF8, "charsetName"),
126             new TestData("bla.arj", ArchiveStreamFactory.ARJ, false, StandardCharsets.US_ASCII.name(), FACTORY_SET_ASCII, "charsetName"),
127 
128             new TestData("bla.cpio", ArchiveStreamFactory.CPIO, true, CPIO_DEFAULT, FACTORY, "charsetName"),
129             new TestData("bla.cpio", ArchiveStreamFactory.CPIO, true, StandardCharsets.UTF_8.name(), FACTORY_UTF8, "charsetName"),
130             new TestData("bla.cpio", ArchiveStreamFactory.CPIO, true, StandardCharsets.US_ASCII.name(), FACTORY_ASCII, "charsetName"),
131             new TestData("bla.cpio", ArchiveStreamFactory.CPIO, true, StandardCharsets.UTF_8.name(), FACTORY_SET_UTF8, "charsetName"),
132             new TestData("bla.cpio", ArchiveStreamFactory.CPIO, true, StandardCharsets.US_ASCII.name(), FACTORY_SET_ASCII, "charsetName"),
133 
134             new TestData("bla.dump", ArchiveStreamFactory.DUMP, false, DUMP_DEFAULT, FACTORY, "charsetName"),
135             new TestData("bla.dump", ArchiveStreamFactory.DUMP, false, StandardCharsets.UTF_8.name(), FACTORY_UTF8, "charsetName"),
136             new TestData("bla.dump", ArchiveStreamFactory.DUMP, false, StandardCharsets.US_ASCII.name(), FACTORY_ASCII, "charsetName"),
137             new TestData("bla.dump", ArchiveStreamFactory.DUMP, false, StandardCharsets.UTF_8.name(), FACTORY_SET_UTF8, "charsetName"),
138             new TestData("bla.dump", ArchiveStreamFactory.DUMP, false, StandardCharsets.US_ASCII.name(), FACTORY_SET_ASCII, "charsetName"),
139 
140             new TestData("bla.tar", ArchiveStreamFactory.TAR, true, TAR_DEFAULT, FACTORY, "charsetName"),
141             new TestData("bla.tar", ArchiveStreamFactory.TAR, true, StandardCharsets.UTF_8.name(), FACTORY_UTF8, "charsetName"),
142             new TestData("bla.tar", ArchiveStreamFactory.TAR, true, StandardCharsets.US_ASCII.name(), FACTORY_ASCII, "charsetName"),
143             new TestData("bla.tar", ArchiveStreamFactory.TAR, true, StandardCharsets.UTF_8.name(), FACTORY_SET_UTF8, "charsetName"),
144             new TestData("bla.tar", ArchiveStreamFactory.TAR, true, StandardCharsets.US_ASCII.name(), FACTORY_SET_ASCII, "charsetName"),
145 
146             new TestData("bla.jar", ArchiveStreamFactory.JAR, true, JAR_DEFAULT, FACTORY, "charset"),
147             new TestData("bla.jar", ArchiveStreamFactory.JAR, true, StandardCharsets.UTF_8.name(), FACTORY_UTF8, "charset"),
148             new TestData("bla.jar", ArchiveStreamFactory.JAR, true, StandardCharsets.US_ASCII.name(), FACTORY_ASCII, "charset"),
149             new TestData("bla.jar", ArchiveStreamFactory.JAR, true, StandardCharsets.UTF_8.name(), FACTORY_SET_UTF8, "charset"),
150             new TestData("bla.jar", ArchiveStreamFactory.JAR, true, StandardCharsets.US_ASCII.name(), FACTORY_SET_ASCII, "charset"),
151 
152             new TestData("bla.zip", ArchiveStreamFactory.ZIP, true, ZIP_DEFAULT, FACTORY, "charset"),
153             new TestData("bla.zip", ArchiveStreamFactory.ZIP, true, StandardCharsets.UTF_8.name(), FACTORY_UTF8, "charset"),
154             new TestData("bla.zip", ArchiveStreamFactory.ZIP, true, StandardCharsets.US_ASCII.name(), FACTORY_ASCII, "charset"),
155             new TestData("bla.zip", ArchiveStreamFactory.ZIP, true, StandardCharsets.UTF_8.name(), FACTORY_SET_UTF8, "charset"),
156             new TestData("bla.zip", ArchiveStreamFactory.ZIP, true, StandardCharsets.US_ASCII.name(), FACTORY_SET_ASCII, "charset"), };
157 
158     private static String getCharsetName(final ArchiveInputStream<?> inputStream) {
159         return inputStream.getCharset().name();
160     }
161 
162     @SuppressWarnings("deprecation") // test of deprecated method
163     static ArchiveStreamFactory getFactory(final String entryEncoding) {
164         final ArchiveStreamFactory fac = new ArchiveStreamFactory();
165         fac.setEntryEncoding(entryEncoding);
166         return fac;
167     }
168 
169     private static String getFieldAsString(final Object instance, final String name) {
170         if (instance instanceof ArchiveInputStream) {
171             return getCharsetName((ArchiveInputStream<?>) instance);
172         }
173         try {
174             final Object object = FieldUtils.readField(instance, name, true);
175             if (object == null) {
176                 return null;
177             }
178             if (object instanceof String) {
179                 // For example "charsetName"
180                 return (String) object;
181             }
182             if (object instanceof Charset) {
183                 // For example "charset"
184                 return ((Charset) object).name();
185             }
186             // System.out.println("Wrong type: " + object.getClass().getCanonicalName() + " for " + name + " in class " + instance.getClass().getSimpleName());
187             return object.toString();
188         } catch (final IllegalAccessException e) {
189             System.out.println("Cannot find " + name + " in class " + instance.getClass().getSimpleName());
190             return UNKNOWN;
191         }
192     }
193 
194     @SuppressWarnings("resource") // Caller closes
195     public static Stream<Path> getIcoPathStream() throws IOException {
196         return Files.walk(Paths.get("src/test/resources/org/apache/commons/compress/ico")).filter(Files::isRegularFile);
197     }
198 
199     private String detect(final String resource) throws IOException, ArchiveException {
200         try (InputStream in = new BufferedInputStream(newInputStream(resource))) {
201             return ArchiveStreamFactory.detect(in);
202         }
203     }
204 
205     @SuppressWarnings("resource")
206     private <T extends ArchiveInputStream<? extends E>, E extends ArchiveEntry> T getInputStream(final String resource, final ArchiveStreamFactory factory)
207             throws IOException, ArchiveException {
208         return factory.createArchiveInputStream(new BufferedInputStream(newInputStream(resource)));
209     }
210 
211     @SuppressWarnings("resource")
212     private <T extends ArchiveInputStream<? extends E>, E extends ArchiveEntry> T getInputStream(final String type, final String resource,
213             final ArchiveStreamFactory factory) throws IOException, ArchiveException {
214         return factory.createArchiveInputStream(type, new BufferedInputStream(newInputStream(resource)));
215     }
216 
217     private <T extends ArchiveOutputStream<? extends E>, E extends ArchiveEntry> T getOutputStream(final String type, final ArchiveStreamFactory factory)
218             throws ArchiveException {
219         return factory.createArchiveOutputStream(type, new ByteArrayOutputStream());
220     }
221 
222     /**
223      * see https://issues.apache.org/jira/browse/COMPRESS-191
224      */
225     @Test
226     void testAiffFilesAreNoTARs() throws Exception {
227         try (InputStream fis = newInputStream("testAIFF.aif");
228                 InputStream is = new BufferedInputStream(fis)) {
229             final ArchiveException ae = assertThrows(ArchiveException.class, () -> ArchiveStreamFactory.DEFAULT.createArchiveInputStream(is),
230                     "created an input stream for a non-archive");
231             assertTrue(ae.getMessage().startsWith("No Archiver found"));
232         }
233     }
234 
235     @Test
236     void testCantRead7zFromStream() throws Exception {
237         assertThrows(StreamingNotSupportedException.class, () -> ArchiveStreamFactory.DEFAULT.createArchiveInputStream(ArchiveStreamFactory.SEVEN_Z,
238                 new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY)));
239     }
240 
241     @Test
242     void testCantWrite7zToStream() throws Exception {
243         assertThrows(StreamingNotSupportedException.class,
244                 () -> ArchiveStreamFactory.DEFAULT.createArchiveOutputStream(ArchiveStreamFactory.SEVEN_Z, new ByteArrayOutputStream()));
245     }
246 
247     @Test
248     void testCOMPRESS209() throws Exception {
249         try (InputStream fis = newInputStream("testCompress209.doc");
250                 InputStream bis = new BufferedInputStream(fis)) {
251             final ArchiveException ae = assertThrows(ArchiveException.class, () -> ArchiveStreamFactory.DEFAULT.createArchiveInputStream(bis),
252                     "created an input stream for a non-archive");
253             assertTrue(ae.getMessage().startsWith("No Archiver found"));
254         }
255     }
256 
257     @Test
258     void testDetect() throws Exception {
259         for (final String extension : new String[] { ArchiveStreamFactory.AR, ArchiveStreamFactory.ARJ, ArchiveStreamFactory.CPIO, ArchiveStreamFactory.DUMP,
260                 // Compress doesn't know how to detect JARs, see COMPRESS-91
261                 // ArchiveStreamFactory.JAR,
262                 ArchiveStreamFactory.SEVEN_Z, ArchiveStreamFactory.TAR, ArchiveStreamFactory.ZIP }) {
263             assertEquals(extension, detect("bla." + extension));
264         }
265 
266         final ArchiveException e1 = assertThrows(ArchiveException.class,
267                 () -> ArchiveStreamFactory.detect(new BufferedInputStream(new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY))),
268                 "shouldn't be able to detect empty stream");
269         assertEquals("No Archiver found for the stream signature", e1.getMessage());
270 
271         final IllegalArgumentException e2 = assertThrows(IllegalArgumentException.class, () -> ArchiveStreamFactory.detect(null),
272                 "shouldn't be able to detect null stream");
273         assertEquals("Stream must not be null.", e2.getMessage());
274 
275         final ArchiveException e3 = assertThrows(ArchiveException.class, () -> ArchiveStreamFactory.detect(new BufferedInputStream(new BrokenInputStream())),
276                 "Expected ArchiveException");
277         assertEquals("Failure reading signature.", e3.getMessage());
278     }
279 
280     /**
281      * Test case for <a href="https://issues.apache.org/jira/browse/COMPRESS-267">COMPRESS-267</a>.
282      */
283     @Test
284     void testDetectsAndThrowsFor7z() throws Exception {
285         try (InputStream fis = newInputStream("bla.7z");
286                 InputStream bis = new BufferedInputStream(fis)) {
287             final StreamingNotSupportedException ex = assertThrows(StreamingNotSupportedException.class,
288                     () -> ArchiveStreamFactory.DEFAULT.createArchiveInputStream(bis), "Expected a StreamingNotSupportedException");
289             assertEquals(ArchiveStreamFactory.SEVEN_Z, ex.getFormat());
290         }
291     }
292 
293     @Test
294     void testEncodingCtor() {
295         ArchiveStreamFactory fac = new ArchiveStreamFactory();
296         assertNull(fac.getEntryEncoding());
297         fac = new ArchiveStreamFactory(null);
298         assertNull(fac.getEntryEncoding());
299         fac = new ArchiveStreamFactory(StandardCharsets.UTF_8.name());
300         assertEquals(StandardCharsets.UTF_8.name(), fac.getEntryEncoding());
301     }
302 
303     @Test
304     @SuppressWarnings("deprecation")
305     void testEncodingDeprecated() {
306         final ArchiveStreamFactory fac1 = new ArchiveStreamFactory();
307         assertNull(fac1.getEntryEncoding());
308         fac1.setEntryEncoding(StandardCharsets.UTF_8.name());
309         assertEquals(StandardCharsets.UTF_8.name(), fac1.getEntryEncoding());
310         fac1.setEntryEncoding(StandardCharsets.US_ASCII.name());
311         assertEquals(StandardCharsets.US_ASCII.name(), fac1.getEntryEncoding());
312         final ArchiveStreamFactory fac2 = new ArchiveStreamFactory(StandardCharsets.UTF_8.name());
313         assertEquals(StandardCharsets.UTF_8.name(), fac2.getEntryEncoding());
314         fac2.setEntryEncoding(StandardCharsets.US_ASCII.name());
315         assertEquals(StandardCharsets.US_ASCII.name(), fac2.getEntryEncoding());
316     }
317 
318     @Test
319     void testEncodingInputStream() throws Exception {
320         int failed = 0;
321         for (int i = 1; i <= TESTS.length; i++) {
322             final TestData test = TESTS[i - 1];
323             try (ArchiveInputStream<?> ais = getInputStream(test.type, test.testFile, test.fac)) {
324                 final String field = getCharsetName(ais);
325                 if (!Objects.equals(field, field)) {
326                     System.err.println("Failed test " + i + ". expected: " + test.expectedEncoding + " actual: " + field + " type: " + test.type);
327                     failed++;
328                 }
329             }
330         }
331         if (failed > 0) {
332             fail("Tests failed: " + failed + " out of " + TESTS.length);
333         }
334     }
335 
336     @Test
337     void testEncodingInputStreamAutodetect() throws Exception {
338         int failed = 0;
339         for (int i = 1; i <= TESTS.length; i++) {
340             final TestData test = TESTS[i - 1];
341             try (ArchiveInputStream<?> ais = getInputStream(test.testFile, test.fac)) {
342                 final String field = getCharsetName(ais);
343                 if (!Objects.equals(field, field)) {
344                     System.err.println("Failed test " + i + ". expected: " + test.expectedEncoding + " actual: " + field + " type: " + test.type);
345                     failed++;
346                 }
347             }
348         }
349         if (failed > 0) {
350             fail("Tests failed: " + failed + " out of " + TESTS.length);
351         }
352     }
353 
354     @Test
355     void testEncodingOutputStream() throws Exception {
356         int failed = 0;
357         for (int i = 1; i <= TESTS.length; i++) {
358             final TestData test = TESTS[i - 1];
359             if (test.hasOutputStream) {
360                 try (ArchiveOutputStream<?> ais = getOutputStream(test.type, test.fac)) {
361                     final String field = getFieldAsString(ais, test.fieldName);
362                     if (!Objects.equals(field, field)) {
363                         System.err.println("Failed test " + i + ". expected: " + test.expectedEncoding + " actual: " + field + " type: " + test.type);
364                         failed++;
365                     }
366                 }
367             }
368         }
369         if (failed > 0) {
370             fail("Tests failed: " + failed + " out of " + TESTS.length);
371         }
372     }
373 
374     @ParameterizedTest
375     @MethodSource("getIcoPathStream")
376     void testIcoFilesAreNoTARs(final Path path) throws Exception {
377         try (InputStream fis = Files.newInputStream(path);
378                 InputStream is = new BufferedInputStream(fis)) {
379             final ArchiveException ae = assertThrows(ArchiveException.class, () -> ArchiveStreamFactory.detect(is),
380                     "created an input stream for a non-archive");
381             assertTrue(ae.getMessage().startsWith("No Archiver found"));
382         }
383         try (InputStream fis = Files.newInputStream(path);
384                 InputStream is = new BufferedInputStream(fis)) {
385             final ArchiveException ae = assertThrows(ArchiveException.class, () -> ArchiveStreamFactory.DEFAULT.createArchiveInputStream(is),
386                     "created an input stream for a non-archive");
387             assertTrue(ae.getMessage().startsWith("No Archiver found"));
388         }
389     }
390 
391     /**
392      * See https://issues.apache.org/jira/browse/COMPRESS-171
393      */
394     @Test
395     void testShortTextFilesAreNoTARs() {
396         final ArchiveException ae = assertThrows(ArchiveException.class,
397                 () -> ArchiveStreamFactory.DEFAULT
398                         .createArchiveInputStream(new ByteArrayInputStream("This certainly is not a tar archive, really, no kidding".getBytes())),
399                 "created an input stream for a non-archive");
400         assertTrue(ae.getMessage().startsWith("No Archiver found"));
401     }
402 
403     /**
404      * Tests case for <a href="https://issues.apache.org/jira/browse/COMPRESS-208">COMPRESS-208</a>.
405      */
406     @Test
407     void testSkipsPK00Prefix() throws Exception {
408         try (InputStream fis = newInputStream("COMPRESS-208.zip")) {
409             try (InputStream bis = new BufferedInputStream(fis)) {
410                 try (ArchiveInputStream<?> ais = ArchiveStreamFactory.DEFAULT.createArchiveInputStream(bis)) {
411                     assertInstanceOf(ZipArchiveInputStream.class, ais);
412                 }
413             }
414         }
415     }
416 
417     @Test
418     void testTarContainingDirWith1TxtFileIsTAR() throws IOException, ArchiveException {
419         assertEquals(ArchiveStreamFactory.TAR, detect("dirWith1TxtFile.tar"));
420     }
421 
422     @Test
423     void testTarContainingEmptyDirIsTAR() throws IOException, ArchiveException {
424         assertEquals(ArchiveStreamFactory.TAR, detect("emptyDir.tar"));
425     }
426 
427     /**
428      * Test case for <a href="https://issues.apache.org/jira/browse/COMPRESS-674">COMPRESS-674</a>.
429      */
430     @Test
431     void testUtf16TextIsNotTAR() {
432         final ArchiveException archiveException = assertThrows(ArchiveException.class,
433                 () -> detect("utf16-text.txt"));
434         assertEquals("No Archiver found for the stream signature", archiveException.getMessage());
435     }
436 }