View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   *
17   */
18  
19  package org.apache.commons.compress.archivers.zip;
20  
21  import java.io.File;
22  import java.io.FileInputStream;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.UnsupportedEncodingException;
26  import java.nio.ByteBuffer;
27  import java.util.Enumeration;
28  import java.util.zip.CRC32;
29  
30  import org.apache.commons.compress.AbstractTestCase;
31  import org.apache.commons.compress.utils.CharsetNames;
32  
33  public class UTF8ZipFilesTest extends AbstractTestCase {
34  
35      private static final String CP437 = "cp437";
36      private static final String ASCII_TXT = "ascii.txt";
37      private static final String EURO_FOR_DOLLAR_TXT = "\u20AC_for_Dollar.txt";
38      private static final String OIL_BARREL_TXT = "\u00D6lf\u00E4sser.txt";
39  
40      public void testUtf8FileRoundtripExplicitUnicodeExtra()
41          throws IOException {
42          testFileRoundtrip(CharsetNames.UTF_8, true, true);
43      }
44  
45      public void testUtf8FileRoundtripNoEFSExplicitUnicodeExtra()
46          throws IOException {
47          testFileRoundtrip(CharsetNames.UTF_8, false, true);
48      }
49  
50      public void testCP437FileRoundtripExplicitUnicodeExtra()
51          throws IOException {
52          testFileRoundtrip(CP437, false, true);
53      }
54  
55      public void testASCIIFileRoundtripExplicitUnicodeExtra()
56          throws IOException {
57          testFileRoundtrip(CharsetNames.US_ASCII, false, true);
58      }
59  
60      public void testUtf8FileRoundtripImplicitUnicodeExtra()
61          throws IOException {
62          testFileRoundtrip(CharsetNames.UTF_8, true, false);
63      }
64  
65      public void testUtf8FileRoundtripNoEFSImplicitUnicodeExtra()
66          throws IOException {
67          testFileRoundtrip(CharsetNames.UTF_8, false, false);
68      }
69  
70      public void testCP437FileRoundtripImplicitUnicodeExtra()
71          throws IOException {
72          testFileRoundtrip(CP437, false, false);
73      }
74  
75      public void testASCIIFileRoundtripImplicitUnicodeExtra()
76          throws IOException {
77          testFileRoundtrip(CharsetNames.US_ASCII, false, false);
78      }
79  
80      /*
81       * 7-ZIP created archive, uses EFS to signal UTF-8 filenames.
82       *
83       * 7-ZIP doesn't use EFS for strings that can be encoded in CP437
84       * - which is true for OIL_BARREL_TXT.
85       */
86      public void testRead7ZipArchive() throws IOException {
87          File archive = getFile("utf8-7zip-test.zip");
88          ZipFile zf = null;
89          try {
90              zf = new ZipFile(archive, CP437, false);
91              assertNotNull(zf.getEntry(ASCII_TXT));
92              assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
93              assertNotNull(zf.getEntry(OIL_BARREL_TXT));
94          } finally {
95              ZipFile.closeQuietly(zf);
96          }
97      }
98  
99      public void testRead7ZipArchiveForStream() throws IOException {
100         FileInputStream archive =
101             new FileInputStream(getFile("utf8-7zip-test.zip"));
102         ZipArchiveInputStream zi = null;
103         try {
104             zi = new ZipArchiveInputStream(archive, CP437, false);
105             assertEquals(ASCII_TXT, zi.getNextEntry().getName());
106             assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
107             assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
108         } finally {
109             if (zi != null) {
110                 zi.close();
111             }
112         }
113     }
114 
115     /*
116      * WinZIP created archive, uses Unicode Extra Fields but only in
117      * the central directory.
118      */
119     public void testReadWinZipArchive() throws IOException {
120         File archive = getFile("utf8-winzip-test.zip");
121         ZipFile zf = null;
122         try {
123             zf = new ZipFile(archive, null, true);
124             assertCanRead(zf, ASCII_TXT);
125             assertCanRead(zf, EURO_FOR_DOLLAR_TXT);
126             assertCanRead(zf, OIL_BARREL_TXT);
127         } finally {
128             ZipFile.closeQuietly(zf);
129         }
130     }
131 
132     private void assertCanRead(ZipFile zf, String fileName) throws IOException {
133         ZipArchiveEntry entry = zf.getEntry(fileName);
134         assertNotNull("Entry doesn't exist", entry);
135         InputStream is = zf.getInputStream(entry);
136         assertNotNull("InputStream is null", is);
137         try {
138             is.read();
139         } finally {
140             is.close();
141         }
142     }
143 
144     public void testReadWinZipArchiveForStream() throws IOException {
145         FileInputStream archive =
146             new FileInputStream(getFile("utf8-winzip-test.zip"));
147         ZipArchiveInputStream zi = null;
148         try {
149             zi = new ZipArchiveInputStream(archive, null, true);
150             assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
151             assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
152             assertEquals(ASCII_TXT, zi.getNextEntry().getName());
153         } finally {
154             if (zi != null) {
155                 zi.close();
156             }
157         }
158     }
159 
160     public void testZipFileReadsUnicodeFields() throws IOException {
161         File file = File.createTempFile("unicode-test", ".zip");
162         file.deleteOnExit();
163         ZipArchiveInputStream zi = null;
164         try {
165             createTestFile(file, CharsetNames.US_ASCII, false, true);
166             FileInputStream archive = new FileInputStream(file);
167             zi = new ZipArchiveInputStream(archive, CharsetNames.US_ASCII, true);
168             assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
169             assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
170             assertEquals(ASCII_TXT, zi.getNextEntry().getName());
171         } finally {
172             if (zi != null) {
173                 zi.close();
174             }
175             tryHardToDelete(file);
176         }
177     }
178 
179     public void testZipArchiveInputStreamReadsUnicodeFields()
180         throws IOException {
181         File file = File.createTempFile("unicode-test", ".zip");
182         file.deleteOnExit();
183         ZipFile zf = null;
184         try {
185             createTestFile(file, CharsetNames.US_ASCII, false, true);
186             zf = new ZipFile(file, CharsetNames.US_ASCII, true);
187             assertNotNull(zf.getEntry(ASCII_TXT));
188             assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
189             assertNotNull(zf.getEntry(OIL_BARREL_TXT));
190         } finally {
191             ZipFile.closeQuietly(zf);
192             tryHardToDelete(file);
193         }
194     }
195 
196     public void testRawNameReadFromZipFile()
197         throws IOException {
198         File archive = getFile("utf8-7zip-test.zip");
199         ZipFile zf = null;
200         try {
201             zf = new ZipFile(archive, CP437, false);
202             assertRawNameOfAcsiiTxt(zf.getEntry(ASCII_TXT));
203         } finally {
204             ZipFile.closeQuietly(zf);
205         }
206     }
207 
208     public void testRawNameReadFromStream()
209         throws IOException {
210         FileInputStream archive =
211             new FileInputStream(getFile("utf8-7zip-test.zip"));
212         ZipArchiveInputStream zi = null;
213         try {
214             zi = new ZipArchiveInputStream(archive, CP437, false);
215             assertRawNameOfAcsiiTxt((ZipArchiveEntry) zi.getNextEntry());
216         } finally {
217             if (zi != null) {
218                 zi.close();
219             }
220         }
221     }
222 
223     private static void testFileRoundtrip(String encoding, boolean withEFS,
224                                           boolean withExplicitUnicodeExtra)
225         throws IOException {
226 
227         File file = File.createTempFile(encoding + "-test", ".zip");
228         file.deleteOnExit();
229         try {
230             createTestFile(file, encoding, withEFS, withExplicitUnicodeExtra);
231             testFile(file, encoding);
232         } finally {
233             tryHardToDelete(file);
234         }
235     }
236 
237     private static void createTestFile(File file, String encoding,
238                                        boolean withEFS,
239                                        boolean withExplicitUnicodeExtra)
240         throws UnsupportedEncodingException, IOException {
241 
242         ZipEncoding zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
243 
244         ZipArchiveOutputStream zos = null;
245         try {
246             zos = new ZipArchiveOutputStream(file);
247             zos.setEncoding(encoding);
248             zos.setUseLanguageEncodingFlag(withEFS);
249             zos.setCreateUnicodeExtraFields(withExplicitUnicodeExtra ? 
250                                             ZipArchiveOutputStream
251                                             .UnicodeExtraFieldPolicy.NEVER
252                                             : ZipArchiveOutputStream
253                                             .UnicodeExtraFieldPolicy.ALWAYS);
254 
255             ZipArchiveEntry ze = new ZipArchiveEntry(OIL_BARREL_TXT);
256             if (withExplicitUnicodeExtra
257                 && !zipEncoding.canEncode(ze.getName())) {
258 
259                 ByteBuffer en = zipEncoding.encode(ze.getName());
260 
261                 ze.addExtraField(new UnicodePathExtraField(ze.getName(),
262                                                            en.array(),
263                                                            en.arrayOffset(),
264                                                            en.limit()
265                                                            - en.position()));
266             }
267 
268             zos.putArchiveEntry(ze);
269             zos.write("Hello, world!".getBytes(CharsetNames.US_ASCII));
270             zos.closeArchiveEntry();
271 
272             ze = new ZipArchiveEntry(EURO_FOR_DOLLAR_TXT);
273             if (withExplicitUnicodeExtra
274                 && !zipEncoding.canEncode(ze.getName())) {
275 
276                 ByteBuffer en = zipEncoding.encode(ze.getName());
277 
278                 ze.addExtraField(new UnicodePathExtraField(ze.getName(),
279                                                            en.array(),
280                                                            en.arrayOffset(),
281                                                            en.limit()
282                                                            - en.position()));
283             }
284 
285             zos.putArchiveEntry(ze);
286             zos.write("Give me your money!".getBytes(CharsetNames.US_ASCII));
287             zos.closeArchiveEntry();
288 
289             ze = new ZipArchiveEntry(ASCII_TXT);
290 
291             if (withExplicitUnicodeExtra
292                 && !zipEncoding.canEncode(ze.getName())) {
293 
294                 ByteBuffer en = zipEncoding.encode(ze.getName());
295 
296                 ze.addExtraField(new UnicodePathExtraField(ze.getName(),
297                                                            en.array(),
298                                                            en.arrayOffset(),
299                                                            en.limit()
300                                                            - en.position()));
301             }
302 
303             zos.putArchiveEntry(ze);
304             zos.write("ascii".getBytes(CharsetNames.US_ASCII));
305             zos.closeArchiveEntry();
306 
307             zos.finish();
308         } finally {
309             if (zos != null) {
310                 try {
311                     zos.close();
312                 } catch (IOException e) { /* swallow */ }
313             }
314         }
315     }
316 
317     private static void testFile(File file, String encoding)
318         throws IOException {
319         ZipFile zf = null;
320         try {
321             zf = new ZipFile(file, encoding, false);
322 
323             Enumeration<ZipArchiveEntry> e = zf.getEntries();
324             while (e.hasMoreElements()) {
325                 ZipArchiveEntry ze = e.nextElement();
326 
327                 if (ze.getName().endsWith("sser.txt")) {
328                     assertUnicodeName(ze, OIL_BARREL_TXT, encoding);
329 
330                 } else if (ze.getName().endsWith("_for_Dollar.txt")) {
331                     assertUnicodeName(ze, EURO_FOR_DOLLAR_TXT, encoding);
332                 } else if (!ze.getName().equals(ASCII_TXT)) {
333                     throw new AssertionError("Unrecognized ZIP entry with name ["
334                                              + ze.getName() + "] found.");
335                 }
336             }
337         } finally {
338             ZipFile.closeQuietly(zf);
339         }
340     }
341 
342     private static UnicodePathExtraField findUniCodePath(ZipArchiveEntry ze) {
343         return (UnicodePathExtraField)
344             ze.getExtraField(UnicodePathExtraField.UPATH_ID);
345     }
346 
347     private static void assertUnicodeName(ZipArchiveEntry ze,
348                                           String expectedName,
349                                           String encoding)
350         throws IOException {
351         if (!expectedName.equals(ze.getName())) {
352             UnicodePathExtraField ucpf = findUniCodePath(ze);
353             assertNotNull(ucpf);
354 
355             ZipEncoding enc = ZipEncodingHelper.getZipEncoding(encoding);
356             ByteBuffer ne = enc.encode(ze.getName());
357 
358             CRC32 crc = new CRC32();
359             crc.update(ne.array(), ne.arrayOffset(),
360                        ne.limit() - ne.position());
361 
362             assertEquals(crc.getValue(), ucpf.getNameCRC32());
363             assertEquals(expectedName, new String(ucpf.getUnicodeName(),
364                                                   CharsetNames.UTF_8));
365         }
366     }
367 
368     public void testUtf8Interoperability() throws IOException {
369         File file1 = super.getFile("utf8-7zip-test.zip");
370         File file2 = super.getFile("utf8-winzip-test.zip");
371 
372         testFile(file1,CP437);
373         testFile(file2,CP437);
374 
375     }
376 
377     private static void assertRawNameOfAcsiiTxt(ZipArchiveEntry ze) {
378         byte[] b = ze.getRawName();
379         assertNotNull(b);
380         final int len = ASCII_TXT.length();
381         assertEquals(len, b.length);
382         for (int i = 0; i < len; i++) {
383             assertEquals("Byte " + i, (byte) ASCII_TXT.charAt(i), b[i]);
384         }
385         assertNotSame(b, ze.getRawName());
386     }
387 }
388