1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.commons.compress.archivers.zip;
20
21 import java.io.File;
22 import java.io.FileInputStream;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.UnsupportedEncodingException;
26 import java.nio.ByteBuffer;
27 import java.util.Enumeration;
28 import java.util.zip.CRC32;
29
30 import org.apache.commons.compress.AbstractTestCase;
31 import org.apache.commons.compress.utils.CharsetNames;
32
33 public class UTF8ZipFilesTest extends AbstractTestCase {
34
35 private static final String CP437 = "cp437";
36 private static final String ASCII_TXT = "ascii.txt";
37 private static final String EURO_FOR_DOLLAR_TXT = "\u20AC_for_Dollar.txt";
38 private static final String OIL_BARREL_TXT = "\u00D6lf\u00E4sser.txt";
39
40 public void testUtf8FileRoundtripExplicitUnicodeExtra()
41 throws IOException {
42 testFileRoundtrip(CharsetNames.UTF_8, true, true);
43 }
44
45 public void testUtf8FileRoundtripNoEFSExplicitUnicodeExtra()
46 throws IOException {
47 testFileRoundtrip(CharsetNames.UTF_8, false, true);
48 }
49
50 public void testCP437FileRoundtripExplicitUnicodeExtra()
51 throws IOException {
52 testFileRoundtrip(CP437, false, true);
53 }
54
55 public void testASCIIFileRoundtripExplicitUnicodeExtra()
56 throws IOException {
57 testFileRoundtrip(CharsetNames.US_ASCII, false, true);
58 }
59
60 public void testUtf8FileRoundtripImplicitUnicodeExtra()
61 throws IOException {
62 testFileRoundtrip(CharsetNames.UTF_8, true, false);
63 }
64
65 public void testUtf8FileRoundtripNoEFSImplicitUnicodeExtra()
66 throws IOException {
67 testFileRoundtrip(CharsetNames.UTF_8, false, false);
68 }
69
70 public void testCP437FileRoundtripImplicitUnicodeExtra()
71 throws IOException {
72 testFileRoundtrip(CP437, false, false);
73 }
74
75 public void testASCIIFileRoundtripImplicitUnicodeExtra()
76 throws IOException {
77 testFileRoundtrip(CharsetNames.US_ASCII, false, false);
78 }
79
80
81
82
83
84
85
86 public void testRead7ZipArchive() throws IOException {
87 File archive = getFile("utf8-7zip-test.zip");
88 ZipFile zf = null;
89 try {
90 zf = new ZipFile(archive, CP437, false);
91 assertNotNull(zf.getEntry(ASCII_TXT));
92 assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
93 assertNotNull(zf.getEntry(OIL_BARREL_TXT));
94 } finally {
95 ZipFile.closeQuietly(zf);
96 }
97 }
98
99 public void testRead7ZipArchiveForStream() throws IOException {
100 FileInputStream archive =
101 new FileInputStream(getFile("utf8-7zip-test.zip"));
102 ZipArchiveInputStream zi = null;
103 try {
104 zi = new ZipArchiveInputStream(archive, CP437, false);
105 assertEquals(ASCII_TXT, zi.getNextEntry().getName());
106 assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
107 assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
108 } finally {
109 if (zi != null) {
110 zi.close();
111 }
112 }
113 }
114
115
116
117
118
119 public void testReadWinZipArchive() throws IOException {
120 File archive = getFile("utf8-winzip-test.zip");
121 ZipFile zf = null;
122 try {
123 zf = new ZipFile(archive, null, true);
124 assertCanRead(zf, ASCII_TXT);
125 assertCanRead(zf, EURO_FOR_DOLLAR_TXT);
126 assertCanRead(zf, OIL_BARREL_TXT);
127 } finally {
128 ZipFile.closeQuietly(zf);
129 }
130 }
131
132 private void assertCanRead(ZipFile zf, String fileName) throws IOException {
133 ZipArchiveEntry entry = zf.getEntry(fileName);
134 assertNotNull("Entry doesn't exist", entry);
135 InputStream is = zf.getInputStream(entry);
136 assertNotNull("InputStream is null", is);
137 try {
138 is.read();
139 } finally {
140 is.close();
141 }
142 }
143
144 public void testReadWinZipArchiveForStream() throws IOException {
145 FileInputStream archive =
146 new FileInputStream(getFile("utf8-winzip-test.zip"));
147 ZipArchiveInputStream zi = null;
148 try {
149 zi = new ZipArchiveInputStream(archive, null, true);
150 assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
151 assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
152 assertEquals(ASCII_TXT, zi.getNextEntry().getName());
153 } finally {
154 if (zi != null) {
155 zi.close();
156 }
157 }
158 }
159
160 public void testZipFileReadsUnicodeFields() throws IOException {
161 File file = File.createTempFile("unicode-test", ".zip");
162 file.deleteOnExit();
163 ZipArchiveInputStream zi = null;
164 try {
165 createTestFile(file, CharsetNames.US_ASCII, false, true);
166 FileInputStream archive = new FileInputStream(file);
167 zi = new ZipArchiveInputStream(archive, CharsetNames.US_ASCII, true);
168 assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
169 assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
170 assertEquals(ASCII_TXT, zi.getNextEntry().getName());
171 } finally {
172 if (zi != null) {
173 zi.close();
174 }
175 tryHardToDelete(file);
176 }
177 }
178
179 public void testZipArchiveInputStreamReadsUnicodeFields()
180 throws IOException {
181 File file = File.createTempFile("unicode-test", ".zip");
182 file.deleteOnExit();
183 ZipFile zf = null;
184 try {
185 createTestFile(file, CharsetNames.US_ASCII, false, true);
186 zf = new ZipFile(file, CharsetNames.US_ASCII, true);
187 assertNotNull(zf.getEntry(ASCII_TXT));
188 assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
189 assertNotNull(zf.getEntry(OIL_BARREL_TXT));
190 } finally {
191 ZipFile.closeQuietly(zf);
192 tryHardToDelete(file);
193 }
194 }
195
196 public void testRawNameReadFromZipFile()
197 throws IOException {
198 File archive = getFile("utf8-7zip-test.zip");
199 ZipFile zf = null;
200 try {
201 zf = new ZipFile(archive, CP437, false);
202 assertRawNameOfAcsiiTxt(zf.getEntry(ASCII_TXT));
203 } finally {
204 ZipFile.closeQuietly(zf);
205 }
206 }
207
208 public void testRawNameReadFromStream()
209 throws IOException {
210 FileInputStream archive =
211 new FileInputStream(getFile("utf8-7zip-test.zip"));
212 ZipArchiveInputStream zi = null;
213 try {
214 zi = new ZipArchiveInputStream(archive, CP437, false);
215 assertRawNameOfAcsiiTxt((ZipArchiveEntry) zi.getNextEntry());
216 } finally {
217 if (zi != null) {
218 zi.close();
219 }
220 }
221 }
222
223 private static void testFileRoundtrip(String encoding, boolean withEFS,
224 boolean withExplicitUnicodeExtra)
225 throws IOException {
226
227 File file = File.createTempFile(encoding + "-test", ".zip");
228 file.deleteOnExit();
229 try {
230 createTestFile(file, encoding, withEFS, withExplicitUnicodeExtra);
231 testFile(file, encoding);
232 } finally {
233 tryHardToDelete(file);
234 }
235 }
236
237 private static void createTestFile(File file, String encoding,
238 boolean withEFS,
239 boolean withExplicitUnicodeExtra)
240 throws UnsupportedEncodingException, IOException {
241
242 ZipEncoding zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
243
244 ZipArchiveOutputStream zos = null;
245 try {
246 zos = new ZipArchiveOutputStream(file);
247 zos.setEncoding(encoding);
248 zos.setUseLanguageEncodingFlag(withEFS);
249 zos.setCreateUnicodeExtraFields(withExplicitUnicodeExtra ?
250 ZipArchiveOutputStream
251 .UnicodeExtraFieldPolicy.NEVER
252 : ZipArchiveOutputStream
253 .UnicodeExtraFieldPolicy.ALWAYS);
254
255 ZipArchiveEntry ze = new ZipArchiveEntry(OIL_BARREL_TXT);
256 if (withExplicitUnicodeExtra
257 && !zipEncoding.canEncode(ze.getName())) {
258
259 ByteBuffer en = zipEncoding.encode(ze.getName());
260
261 ze.addExtraField(new UnicodePathExtraField(ze.getName(),
262 en.array(),
263 en.arrayOffset(),
264 en.limit()
265 - en.position()));
266 }
267
268 zos.putArchiveEntry(ze);
269 zos.write("Hello, world!".getBytes(CharsetNames.US_ASCII));
270 zos.closeArchiveEntry();
271
272 ze = new ZipArchiveEntry(EURO_FOR_DOLLAR_TXT);
273 if (withExplicitUnicodeExtra
274 && !zipEncoding.canEncode(ze.getName())) {
275
276 ByteBuffer en = zipEncoding.encode(ze.getName());
277
278 ze.addExtraField(new UnicodePathExtraField(ze.getName(),
279 en.array(),
280 en.arrayOffset(),
281 en.limit()
282 - en.position()));
283 }
284
285 zos.putArchiveEntry(ze);
286 zos.write("Give me your money!".getBytes(CharsetNames.US_ASCII));
287 zos.closeArchiveEntry();
288
289 ze = new ZipArchiveEntry(ASCII_TXT);
290
291 if (withExplicitUnicodeExtra
292 && !zipEncoding.canEncode(ze.getName())) {
293
294 ByteBuffer en = zipEncoding.encode(ze.getName());
295
296 ze.addExtraField(new UnicodePathExtraField(ze.getName(),
297 en.array(),
298 en.arrayOffset(),
299 en.limit()
300 - en.position()));
301 }
302
303 zos.putArchiveEntry(ze);
304 zos.write("ascii".getBytes(CharsetNames.US_ASCII));
305 zos.closeArchiveEntry();
306
307 zos.finish();
308 } finally {
309 if (zos != null) {
310 try {
311 zos.close();
312 } catch (IOException e) {
313 }
314 }
315 }
316
317 private static void testFile(File file, String encoding)
318 throws IOException {
319 ZipFile zf = null;
320 try {
321 zf = new ZipFile(file, encoding, false);
322
323 Enumeration<ZipArchiveEntry> e = zf.getEntries();
324 while (e.hasMoreElements()) {
325 ZipArchiveEntry ze = e.nextElement();
326
327 if (ze.getName().endsWith("sser.txt")) {
328 assertUnicodeName(ze, OIL_BARREL_TXT, encoding);
329
330 } else if (ze.getName().endsWith("_for_Dollar.txt")) {
331 assertUnicodeName(ze, EURO_FOR_DOLLAR_TXT, encoding);
332 } else if (!ze.getName().equals(ASCII_TXT)) {
333 throw new AssertionError("Urecognized ZIP entry with name ["
334 + ze.getName() + "] found.");
335 }
336 }
337 } finally {
338 ZipFile.closeQuietly(zf);
339 }
340 }
341
342 private static UnicodePathExtraField findUniCodePath(ZipArchiveEntry ze) {
343 return (UnicodePathExtraField)
344 ze.getExtraField(UnicodePathExtraField.UPATH_ID);
345 }
346
347 private static void assertUnicodeName(ZipArchiveEntry ze,
348 String expectedName,
349 String encoding)
350 throws IOException {
351 if (!expectedName.equals(ze.getName())) {
352 UnicodePathExtraField ucpf = findUniCodePath(ze);
353 assertNotNull(ucpf);
354
355 ZipEncoding enc = ZipEncodingHelper.getZipEncoding(encoding);
356 ByteBuffer ne = enc.encode(ze.getName());
357
358 CRC32 crc = new CRC32();
359 crc.update(ne.array(), ne.arrayOffset(),
360 ne.limit() - ne.position());
361
362 assertEquals(crc.getValue(), ucpf.getNameCRC32());
363 assertEquals(expectedName, new String(ucpf.getUnicodeName(),
364 CharsetNames.UTF_8));
365 }
366 }
367
368 public void testUtf8Interoperability() throws IOException {
369 File file1 = super.getFile("utf8-7zip-test.zip");
370 File file2 = super.getFile("utf8-winzip-test.zip");
371
372 testFile(file1,CP437);
373 testFile(file2,CP437);
374
375 }
376
377 private static void assertRawNameOfAcsiiTxt(ZipArchiveEntry ze) {
378 byte[] b = ze.getRawName();
379 assertNotNull(b);
380 final int len = ASCII_TXT.length();
381 assertEquals(len, b.length);
382 for (int i = 0; i < len; i++) {
383 assertEquals("Byte " + i, (byte) ASCII_TXT.charAt(i), b[i]);
384 }
385 assertNotSame(b, ze.getRawName());
386 }
387 }
388