View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   *
17   */
18  
19  package org.apache.commons.compress.archivers.zip;
20  
21  import static org.junit.Assert.*;
22  
23  import java.io.File;
24  import java.io.FileInputStream;
25  import java.io.IOException;
26  import java.io.InputStream;
27  import java.io.UnsupportedEncodingException;
28  import java.nio.ByteBuffer;
29  import java.util.Enumeration;
30  import java.util.zip.CRC32;
31  
32  import org.apache.commons.compress.AbstractTestCase;
33  import org.apache.commons.compress.utils.CharsetNames;
34  import org.junit.Test;
35  
36  public class UTF8ZipFilesTest extends AbstractTestCase {
37  
38      private static final String CP437 = "cp437";
39      private static final String ASCII_TXT = "ascii.txt";
40      private static final String EURO_FOR_DOLLAR_TXT = "\u20AC_for_Dollar.txt";
41      private static final String OIL_BARREL_TXT = "\u00D6lf\u00E4sser.txt";
42  
43      @Test
44      public void testUtf8FileRoundtripExplicitUnicodeExtra()
45          throws IOException {
46          testFileRoundtrip(CharsetNames.UTF_8, true, true);
47      }
48  
49      @Test
50      public void testUtf8FileRoundtripNoEFSExplicitUnicodeExtra()
51          throws IOException {
52          testFileRoundtrip(CharsetNames.UTF_8, false, true);
53      }
54  
55      @Test
56      public void testCP437FileRoundtripExplicitUnicodeExtra()
57          throws IOException {
58          testFileRoundtrip(CP437, false, true);
59      }
60  
61      @Test
62      public void testASCIIFileRoundtripExplicitUnicodeExtra()
63          throws IOException {
64          testFileRoundtrip(CharsetNames.US_ASCII, false, true);
65      }
66  
67      @Test
68      public void testUtf8FileRoundtripImplicitUnicodeExtra()
69          throws IOException {
70          testFileRoundtrip(CharsetNames.UTF_8, true, false);
71      }
72  
73      @Test
74      public void testUtf8FileRoundtripNoEFSImplicitUnicodeExtra()
75          throws IOException {
76          testFileRoundtrip(CharsetNames.UTF_8, false, false);
77      }
78  
79      @Test
80      public void testCP437FileRoundtripImplicitUnicodeExtra()
81          throws IOException {
82          testFileRoundtrip(CP437, false, false);
83      }
84  
85      @Test
86      public void testASCIIFileRoundtripImplicitUnicodeExtra()
87          throws IOException {
88          testFileRoundtrip(CharsetNames.US_ASCII, false, false);
89      }
90  
91      /*
92       * 7-ZIP created archive, uses EFS to signal UTF-8 filenames.
93       *
94       * 7-ZIP doesn't use EFS for strings that can be encoded in CP437
95       * - which is true for OIL_BARREL_TXT.
96       */
97      @Test
98      public void testRead7ZipArchive() throws IOException {
99          File archive = getFile("utf8-7zip-test.zip");
100         ZipFile zf = null;
101         try {
102             zf = new ZipFile(archive, CP437, false);
103             assertNotNull(zf.getEntry(ASCII_TXT));
104             assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
105             assertNotNull(zf.getEntry(OIL_BARREL_TXT));
106         } finally {
107             ZipFile.closeQuietly(zf);
108         }
109     }
110 
111     @Test
112     public void testRead7ZipArchiveForStream() throws IOException {
113         FileInputStream archive =
114             new FileInputStream(getFile("utf8-7zip-test.zip"));
115         ZipArchiveInputStream zi = null;
116         try {
117             zi = new ZipArchiveInputStream(archive, CP437, false);
118             assertEquals(ASCII_TXT, zi.getNextEntry().getName());
119             assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
120             assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
121         } finally {
122             if (zi != null) {
123                 zi.close();
124             }
125         }
126     }
127 
128     /*
129      * WinZIP created archive, uses Unicode Extra Fields but only in
130      * the central directory.
131      */
132     @Test
133     public void testReadWinZipArchive() throws IOException {
134         File archive = getFile("utf8-winzip-test.zip");
135         ZipFile zf = null;
136         try {
137             zf = new ZipFile(archive, null, true);
138             assertCanRead(zf, ASCII_TXT);
139             assertCanRead(zf, EURO_FOR_DOLLAR_TXT);
140             assertCanRead(zf, OIL_BARREL_TXT);
141         } finally {
142             ZipFile.closeQuietly(zf);
143         }
144     }
145 
146     private void assertCanRead(ZipFile zf, String fileName) throws IOException {
147         ZipArchiveEntry entry = zf.getEntry(fileName);
148         assertNotNull("Entry doesn't exist", entry);
149         InputStream is = zf.getInputStream(entry);
150         assertNotNull("InputStream is null", is);
151         try {
152             is.read();
153         } finally {
154             is.close();
155         }
156     }
157 
158     @Test
159     public void testReadWinZipArchiveForStream() throws IOException {
160         FileInputStream archive =
161             new FileInputStream(getFile("utf8-winzip-test.zip"));
162         ZipArchiveInputStream zi = null;
163         try {
164             zi = new ZipArchiveInputStream(archive, null, true);
165             assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
166             assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
167             assertEquals(ASCII_TXT, zi.getNextEntry().getName());
168         } finally {
169             if (zi != null) {
170                 zi.close();
171             }
172         }
173     }
174 
175     @Test
176     public void testZipFileReadsUnicodeFields() throws IOException {
177         File file = File.createTempFile("unicode-test", ".zip");
178         file.deleteOnExit();
179         ZipArchiveInputStream zi = null;
180         try {
181             createTestFile(file, CharsetNames.US_ASCII, false, true);
182             FileInputStream archive = new FileInputStream(file);
183             zi = new ZipArchiveInputStream(archive, CharsetNames.US_ASCII, true);
184             assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
185             assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
186             assertEquals(ASCII_TXT, zi.getNextEntry().getName());
187         } finally {
188             if (zi != null) {
189                 zi.close();
190             }
191             tryHardToDelete(file);
192         }
193     }
194 
195     @Test
196     public void testZipArchiveInputStreamReadsUnicodeFields()
197         throws IOException {
198         File file = File.createTempFile("unicode-test", ".zip");
199         file.deleteOnExit();
200         ZipFile zf = null;
201         try {
202             createTestFile(file, CharsetNames.US_ASCII, false, true);
203             zf = new ZipFile(file, CharsetNames.US_ASCII, true);
204             assertNotNull(zf.getEntry(ASCII_TXT));
205             assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
206             assertNotNull(zf.getEntry(OIL_BARREL_TXT));
207         } finally {
208             ZipFile.closeQuietly(zf);
209             tryHardToDelete(file);
210         }
211     }
212 
213     @Test
214     public void testRawNameReadFromZipFile()
215         throws IOException {
216         File archive = getFile("utf8-7zip-test.zip");
217         ZipFile zf = null;
218         try {
219             zf = new ZipFile(archive, CP437, false);
220             assertRawNameOfAcsiiTxt(zf.getEntry(ASCII_TXT));
221         } finally {
222             ZipFile.closeQuietly(zf);
223         }
224     }
225 
226     @Test
227     public void testRawNameReadFromStream()
228         throws IOException {
229         FileInputStream archive =
230             new FileInputStream(getFile("utf8-7zip-test.zip"));
231         ZipArchiveInputStream zi = null;
232         try {
233             zi = new ZipArchiveInputStream(archive, CP437, false);
234             assertRawNameOfAcsiiTxt((ZipArchiveEntry) zi.getNextEntry());
235         } finally {
236             if (zi != null) {
237                 zi.close();
238             }
239         }
240     }
241 
242     private static void testFileRoundtrip(String encoding, boolean withEFS,
243                                           boolean withExplicitUnicodeExtra)
244         throws IOException {
245 
246         File file = File.createTempFile(encoding + "-test", ".zip");
247         file.deleteOnExit();
248         try {
249             createTestFile(file, encoding, withEFS, withExplicitUnicodeExtra);
250             testFile(file, encoding);
251         } finally {
252             tryHardToDelete(file);
253         }
254     }
255 
256     private static void createTestFile(File file, String encoding,
257                                        boolean withEFS,
258                                        boolean withExplicitUnicodeExtra)
259         throws UnsupportedEncodingException, IOException {
260 
261         ZipEncoding zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
262 
263         ZipArchiveOutputStream zos = null;
264         try {
265             zos = new ZipArchiveOutputStream(file);
266             zos.setEncoding(encoding);
267             zos.setUseLanguageEncodingFlag(withEFS);
268             zos.setCreateUnicodeExtraFields(withExplicitUnicodeExtra ? 
269                                             ZipArchiveOutputStream
270                                             .UnicodeExtraFieldPolicy.NEVER
271                                             : ZipArchiveOutputStream
272                                             .UnicodeExtraFieldPolicy.ALWAYS);
273 
274             ZipArchiveEntry ze = new ZipArchiveEntry(OIL_BARREL_TXT);
275             if (withExplicitUnicodeExtra
276                 && !zipEncoding.canEncode(ze.getName())) {
277 
278                 ByteBuffer en = zipEncoding.encode(ze.getName());
279 
280                 ze.addExtraField(new UnicodePathExtraField(ze.getName(),
281                                                            en.array(),
282                                                            en.arrayOffset(),
283                                                            en.limit()
284                                                            - en.position()));
285             }
286 
287             zos.putArchiveEntry(ze);
288             zos.write("Hello, world!".getBytes(CharsetNames.US_ASCII));
289             zos.closeArchiveEntry();
290 
291             ze = new ZipArchiveEntry(EURO_FOR_DOLLAR_TXT);
292             if (withExplicitUnicodeExtra
293                 && !zipEncoding.canEncode(ze.getName())) {
294 
295                 ByteBuffer en = zipEncoding.encode(ze.getName());
296 
297                 ze.addExtraField(new UnicodePathExtraField(ze.getName(),
298                                                            en.array(),
299                                                            en.arrayOffset(),
300                                                            en.limit()
301                                                            - en.position()));
302             }
303 
304             zos.putArchiveEntry(ze);
305             zos.write("Give me your money!".getBytes(CharsetNames.US_ASCII));
306             zos.closeArchiveEntry();
307 
308             ze = new ZipArchiveEntry(ASCII_TXT);
309 
310             if (withExplicitUnicodeExtra
311                 && !zipEncoding.canEncode(ze.getName())) {
312 
313                 ByteBuffer en = zipEncoding.encode(ze.getName());
314 
315                 ze.addExtraField(new UnicodePathExtraField(ze.getName(),
316                                                            en.array(),
317                                                            en.arrayOffset(),
318                                                            en.limit()
319                                                            - en.position()));
320             }
321 
322             zos.putArchiveEntry(ze);
323             zos.write("ascii".getBytes(CharsetNames.US_ASCII));
324             zos.closeArchiveEntry();
325 
326             zos.finish();
327         } finally {
328             if (zos != null) {
329                 try {
330                     zos.close();
331                 } catch (IOException e) { /* swallow */ }
332             }
333         }
334     }
335 
336     private static void testFile(File file, String encoding)
337         throws IOException {
338         ZipFile zf = null;
339         try {
340             zf = new ZipFile(file, encoding, false);
341 
342             Enumeration<ZipArchiveEntry> e = zf.getEntries();
343             while (e.hasMoreElements()) {
344                 ZipArchiveEntry ze = e.nextElement();
345 
346                 if (ze.getName().endsWith("sser.txt")) {
347                     assertUnicodeName(ze, OIL_BARREL_TXT, encoding);
348 
349                 } else if (ze.getName().endsWith("_for_Dollar.txt")) {
350                     assertUnicodeName(ze, EURO_FOR_DOLLAR_TXT, encoding);
351                 } else if (!ze.getName().equals(ASCII_TXT)) {
352                     throw new AssertionError("Unrecognized ZIP entry with name ["
353                                              + ze.getName() + "] found.");
354                 }
355             }
356         } finally {
357             ZipFile.closeQuietly(zf);
358         }
359     }
360 
361     private static UnicodePathExtraField findUniCodePath(ZipArchiveEntry ze) {
362         return (UnicodePathExtraField)
363             ze.getExtraField(UnicodePathExtraField.UPATH_ID);
364     }
365 
366     private static void assertUnicodeName(ZipArchiveEntry ze,
367                                           String expectedName,
368                                           String encoding)
369         throws IOException {
370         if (!expectedName.equals(ze.getName())) {
371             UnicodePathExtraField ucpf = findUniCodePath(ze);
372             assertNotNull(ucpf);
373 
374             ZipEncoding enc = ZipEncodingHelper.getZipEncoding(encoding);
375             ByteBuffer ne = enc.encode(ze.getName());
376 
377             CRC32 crc = new CRC32();
378             crc.update(ne.array(), ne.arrayOffset(),
379                        ne.limit() - ne.position());
380 
381             assertEquals(crc.getValue(), ucpf.getNameCRC32());
382             assertEquals(expectedName, new String(ucpf.getUnicodeName(),
383                                                   CharsetNames.UTF_8));
384         }
385     }
386 
387     @Test
388     public void testUtf8Interoperability() throws IOException {
389         File file1 = getFile("utf8-7zip-test.zip");
390         File file2 = getFile("utf8-winzip-test.zip");
391 
392         testFile(file1,CP437);
393         testFile(file2,CP437);
394 
395     }
396 
397     private static void assertRawNameOfAcsiiTxt(ZipArchiveEntry ze) {
398         byte[] b = ze.getRawName();
399         assertNotNull(b);
400         final int len = ASCII_TXT.length();
401         assertEquals(len, b.length);
402         for (int i = 0; i < len; i++) {
403             assertEquals("Byte " + i, (byte) ASCII_TXT.charAt(i), b[i]);
404         }
405         assertNotSame(b, ze.getRawName());
406     }
407 }
408