View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  package org.apache.commons.compress.archivers.zip;
21  
22  import static java.nio.charset.StandardCharsets.UTF_8;
23  import static org.junit.jupiter.api.Assertions.assertEquals;
24  import static org.junit.jupiter.api.Assertions.assertNotNull;
25  import static org.junit.jupiter.api.Assertions.assertNotSame;
26  import static org.junit.jupiter.api.Assertions.fail;
27  
28  import java.io.File;
29  import java.io.IOException;
30  import java.io.InputStream;
31  import java.nio.ByteBuffer;
32  import java.nio.charset.Charset;
33  import java.nio.charset.StandardCharsets;
34  import java.nio.file.Files;
35  import java.util.zip.CRC32;
36  
37  import org.apache.commons.compress.AbstractTest;
38  import org.junit.jupiter.api.Test;
39  
40  class UTF8ZipFilesTest extends AbstractTest {
41  
42      private static final String CP437 = "cp437";
43      private static final String ASCII_TXT = "ascii.txt";
44      private static final String EURO_FOR_DOLLAR_TXT = "\u20AC_for_Dollar.txt";
45      private static final String OIL_BARREL_TXT = "\u00D6lf\u00E4sser.txt";
46  
47      private static void assertRawNameOfAcsiiTxt(final ZipArchiveEntry ze) {
48          final byte[] b = ze.getRawName();
49          assertNotNull(b);
50          final int len = ASCII_TXT.length();
51          assertEquals(len, b.length);
52          for (int i = 0; i < len; i++) {
53              assertEquals((byte) ASCII_TXT.charAt(i), b[i], "Byte " + i);
54          }
55          assertNotSame(b, ze.getRawName());
56      }
57  
58      private static void assertUnicodeName(final ZipArchiveEntry ze, final String expectedName, final String encoding) throws IOException {
59          if (!expectedName.equals(ze.getName())) {
60              final UnicodePathExtraField ucpf = findUniCodePath(ze);
61              assertNotNull(ucpf);
62  
63              final ZipEncoding enc = ZipEncodingHelper.getZipEncoding(encoding);
64              final ByteBuffer ne = enc.encode(ze.getName());
65  
66              final CRC32 crc = new CRC32();
67              crc.update(ne.array(), ne.arrayOffset(), ne.limit() - ne.position());
68  
69              assertEquals(crc.getValue(), ucpf.getNameCRC32());
70              assertEquals(expectedName, new String(ucpf.getUnicodeName(), UTF_8));
71          }
72      }
73  
74      private static void createTestFile(final File file, final String encoding, final boolean withEFS, final boolean withExplicitUnicodeExtra)
75              throws IOException {
76  
77          final ZipEncoding zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
78  
79          try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(file)) {
80              zos.setEncoding(encoding);
81              zos.setUseLanguageEncodingFlag(withEFS);
82              zos.setCreateUnicodeExtraFields(
83                      withExplicitUnicodeExtra ? ZipArchiveOutputStream.UnicodeExtraFieldPolicy.NEVER : ZipArchiveOutputStream.UnicodeExtraFieldPolicy.ALWAYS);
84  
85              ZipArchiveEntry ze = new ZipArchiveEntry(OIL_BARREL_TXT);
86              if (withExplicitUnicodeExtra && !zipEncoding.canEncode(ze.getName())) {
87  
88                  final ByteBuffer en = zipEncoding.encode(ze.getName());
89  
90                  ze.addExtraField(new UnicodePathExtraField(ze.getName(), en.array(), en.arrayOffset(), en.limit() - en.position()));
91              }
92  
93              zos.putArchiveEntry(ze);
94              zos.writeUsAscii("Hello, world!");
95              zos.closeArchiveEntry();
96  
97              ze = new ZipArchiveEntry(EURO_FOR_DOLLAR_TXT);
98              if (withExplicitUnicodeExtra && !zipEncoding.canEncode(ze.getName())) {
99  
100                 final ByteBuffer en = zipEncoding.encode(ze.getName());
101 
102                 ze.addExtraField(new UnicodePathExtraField(ze.getName(), en.array(), en.arrayOffset(), en.limit() - en.position()));
103             }
104 
105             zos.putArchiveEntry(ze);
106             zos.writeUsAscii("Give me your money!");
107             zos.closeArchiveEntry();
108 
109             ze = new ZipArchiveEntry(ASCII_TXT);
110 
111             if (withExplicitUnicodeExtra && !zipEncoding.canEncode(ze.getName())) {
112 
113                 final ByteBuffer en = zipEncoding.encode(ze.getName());
114 
115                 ze.addExtraField(new UnicodePathExtraField(ze.getName(), en.array(), en.arrayOffset(), en.limit() - en.position()));
116             }
117 
118             zos.putArchiveEntry(ze);
119             zos.writeUsAscii("ascii");
120             zos.closeArchiveEntry();
121 
122             zos.finish();
123         }
124     }
125 
126     private static UnicodePathExtraField findUniCodePath(final ZipArchiveEntry ze) {
127         return (UnicodePathExtraField) ze.getExtraField(UnicodePathExtraField.UPATH_ID);
128     }
129 
130     private static void testFile(final File file, final String encoding) throws IOException {
131         try (ZipFile zipFile = ZipFile.builder().setFile(file).setCharset(encoding).setUseUnicodeExtraFields(false).get()) {
132             zipFile.stream().forEach(ze -> {
133                 if (ze.getName().endsWith("sser.txt")) {
134                     assertUnicodeName(ze, OIL_BARREL_TXT, encoding);
135                 } else if (ze.getName().endsWith("_for_Dollar.txt")) {
136                     assertUnicodeName(ze, EURO_FOR_DOLLAR_TXT, encoding);
137                 } else if (!ze.getName().equals(ASCII_TXT)) {
138                     fail("Unrecognized ZIP entry with name [" + ze.getName() + "] found.");
139                 }
140             });
141         }
142     }
143 
144     private void assertCanRead(final ZipFile zf, final String fileName) throws IOException {
145         final ZipArchiveEntry entry = zf.getEntry(fileName);
146         assertNotNull(entry, "Entry doesn't exist");
147         try (InputStream is = zf.getInputStream(entry)) {
148             assertNotNull(is, "InputStream is null");
149             is.read();
150         }
151     }
152 
153     @Test
154     void testASCIIFileRoundtripExplicitUnicodeExtra() throws IOException {
155         testFileRoundtrip(StandardCharsets.US_ASCII.name(), false, true);
156     }
157 
158     @Test
159     void testASCIIFileRoundtripImplicitUnicodeExtra() throws IOException {
160         testFileRoundtrip(StandardCharsets.US_ASCII.name(), false, false);
161     }
162 
163     @Test
164     void testCP437FileRoundtripExplicitUnicodeExtra() throws IOException {
165         testFileRoundtrip(CP437, false, true);
166     }
167 
168     @Test
169     void testCP437FileRoundtripImplicitUnicodeExtra() throws IOException {
170         testFileRoundtrip(CP437, false, false);
171     }
172 
173     private void testFileRoundtrip(final String encoding, final boolean withEFS, final boolean withExplicitUnicodeExtra) throws IOException {
174         final File file = createTempFile(encoding + "-test", ".zip");
175         createTestFile(file, encoding, withEFS, withExplicitUnicodeExtra);
176         testFile(file, encoding);
177     }
178 
179     @Test
180     void testRawNameReadFromStream() throws IOException {
181         try (ZipArchiveInputStream zi = new ZipArchiveInputStream(newInputStream("utf8-7zip-test.zip"), CP437, false)) {
182             assertRawNameOfAcsiiTxt(zi.getNextEntry());
183         }
184     }
185 
186     @Test
187     void testRawNameReadFromZipFile() throws IOException {
188         final File archive = getFile("utf8-7zip-test.zip");
189         try (ZipFile zf = ZipFile.builder().setFile(archive).setCharset(CP437).setUseUnicodeExtraFields(false).get()) {
190             assertRawNameOfAcsiiTxt(zf.getEntry(ASCII_TXT));
191         }
192     }
193 
194     /*
195      * 7-ZIP created archive, uses EFS to signal UTF-8 file names.
196      *
197      * 7-ZIP doesn't use EFS for strings that can be encoded in CP437 - which is true for OIL_BARREL_TXT.
198      */
199     @Test
200     void testRead7ZipArchive() throws IOException {
201         final File archive = getFile("utf8-7zip-test.zip");
202         try (ZipFile zf = new ZipFile(archive, CP437, false)) {
203             assertNotNull(zf.getEntry(ASCII_TXT));
204             assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
205             assertNotNull(zf.getEntry(OIL_BARREL_TXT));
206         }
207     }
208 
209     @Test
210     void testRead7ZipArchiveForStream() throws IOException {
211         try (ZipArchiveInputStream zi = new ZipArchiveInputStream(newInputStream("utf8-7zip-test.zip"), CP437, false)) {
212             assertEquals(ASCII_TXT, zi.getNextEntry().getName());
213             assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
214             assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
215         }
216     }
217 
218     /*
219      * WinZIP created archive, uses Unicode Extra Fields but only in the central directory.
220      */
221     @Test
222     void testReadWinZipArchive() throws IOException {
223         final File archive = getFile("utf8-winzip-test.zip");
224         // fix for test fails on Windows with default charset that is not UTF-8
225         String encoding = null;
226         if (Charset.defaultCharset() != UTF_8) {
227             encoding = UTF_8.name();
228         }
229         try (ZipFile zf = ZipFile.builder().setFile(archive).setCharset(encoding).setUseUnicodeExtraFields(true).get()) {
230             assertCanRead(zf, ASCII_TXT);
231             assertCanRead(zf, EURO_FOR_DOLLAR_TXT);
232             assertCanRead(zf, OIL_BARREL_TXT);
233         }
234     }
235 
236     @Test
237     void testReadWinZipArchiveForStream() throws IOException {
238         // fix for test fails on Windows with default charset that is not UTF-8
239         String encoding = null;
240         if (Charset.defaultCharset() != UTF_8) {
241             encoding = UTF_8.name();
242         }
243         try (InputStream archive = newInputStream("utf8-winzip-test.zip");
244                 ZipArchiveInputStream zi = new ZipArchiveInputStream(archive, encoding, true)) {
245             assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
246             assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
247             assertEquals(ASCII_TXT, zi.getNextEntry().getName());
248         }
249     }
250 
251     /**
252      * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-479">COMPRESS-479</a>
253      */
254     @Test
255     void testStreamSkipsOverUnicodeExtraFieldWithUnsupportedVersion() throws IOException {
256         try (InputStream archive = newInputStream("COMPRESS-479.zip");
257                 ZipArchiveInputStream zi = new ZipArchiveInputStream(archive)) {
258             assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
259             assertEquals("%U20AC_for_Dollar.txt", zi.getNextEntry().getName());
260             assertEquals(ASCII_TXT, zi.getNextEntry().getName());
261         }
262     }
263 
264     @Test
265     void testUtf8FileRoundtripExplicitUnicodeExtra() throws IOException {
266         testFileRoundtrip(StandardCharsets.UTF_8.name(), true, true);
267     }
268 
269     @Test
270     void testUtf8FileRoundtripImplicitUnicodeExtra() throws IOException {
271         testFileRoundtrip(StandardCharsets.UTF_8.name(), true, false);
272     }
273 
274     @Test
275     void testUtf8FileRoundtripNoEFSExplicitUnicodeExtra() throws IOException {
276         testFileRoundtrip(StandardCharsets.UTF_8.name(), false, true);
277     }
278 
279     @Test
280     void testUtf8FileRoundtripNoEFSImplicitUnicodeExtra() throws IOException {
281         testFileRoundtrip(StandardCharsets.UTF_8.name(), false, false);
282     }
283 
284     @Test
285     void testUtf8Interoperability() throws IOException {
286         final File file1 = getFile("utf8-7zip-test.zip");
287         final File file2 = getFile("utf8-winzip-test.zip");
288         testFile(file1, CP437);
289         testFile(file2, CP437);
290     }
291 
292     @Test
293     void testZipArchiveInputStreamReadsUnicodeFields() throws IOException {
294         final File file = createTempFile("unicode-test", ".zip");
295         createTestFile(file, StandardCharsets.US_ASCII.name(), false, true);
296         try (ZipFile zf = ZipFile.builder().setFile(file).setCharset(StandardCharsets.US_ASCII).setUseUnicodeExtraFields(true).get()) {
297             assertNotNull(zf.getEntry(ASCII_TXT));
298             assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
299             assertNotNull(zf.getEntry(OIL_BARREL_TXT));
300         }
301     }
302 
303     @Test
304     void testZipFileReadsUnicodeFields() throws IOException {
305         final File file = createTempFile("unicode-test", ".zip");
306         createTestFile(file, StandardCharsets.US_ASCII.name(), false, true);
307         try (ZipArchiveInputStream zi = new ZipArchiveInputStream(Files.newInputStream(file.toPath()), StandardCharsets.US_ASCII.name(), true)) {
308             assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
309             assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
310             assertEquals(ASCII_TXT, zi.getNextEntry().getName());
311         }
312     }
313 
314     /**
315      * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-479">COMPRESS-479</a>
316      */
317     @Test
318     void testZipFileSkipsOverUnicodeExtraFieldWithUnsupportedVersion() throws IOException {
319         try (ZipFile zf = ZipFile.builder().setFile(getFile("COMPRESS-479.zip")).get()) {
320             assertNotNull(zf.getEntry(ASCII_TXT));
321             assertNotNull(zf.getEntry("%U20AC_for_Dollar.txt"));
322             assertNotNull(zf.getEntry(OIL_BARREL_TXT));
323         }
324     }
325 }