1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.compress.archivers.zip;
19
20 import static java.nio.charset.StandardCharsets.US_ASCII;
21 import static java.nio.charset.StandardCharsets.UTF_8;
22 import static org.junit.jupiter.api.Assertions.assertEquals;
23 import static org.junit.jupiter.api.Assertions.assertNotNull;
24 import static org.junit.jupiter.api.Assertions.assertNotSame;
25 import static org.junit.jupiter.api.Assertions.fail;
26
27 import java.io.File;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.nio.ByteBuffer;
31 import java.nio.charset.Charset;
32 import java.nio.charset.StandardCharsets;
33 import java.nio.file.Files;
34 import java.util.Enumeration;
35 import java.util.zip.CRC32;
36
37 import org.apache.commons.compress.AbstractTest;
38 import org.junit.jupiter.api.Test;
39
40 public class UTF8ZipFilesTest extends AbstractTest {
41
42 private static final String CP437 = "cp437";
43 private static final String ASCII_TXT = "ascii.txt";
44 private static final String EURO_FOR_DOLLAR_TXT = "\u20AC_for_Dollar.txt";
45 private static final String OIL_BARREL_TXT = "\u00D6lf\u00E4sser.txt";
46
47 private static void assertRawNameOfAcsiiTxt(final ZipArchiveEntry ze) {
48 final byte[] b = ze.getRawName();
49 assertNotNull(b);
50 final int len = ASCII_TXT.length();
51 assertEquals(len, b.length);
52 for (int i = 0; i < len; i++) {
53 assertEquals((byte) ASCII_TXT.charAt(i), b[i], "Byte " + i);
54 }
55 assertNotSame(b, ze.getRawName());
56 }
57
58 private static void assertUnicodeName(final ZipArchiveEntry ze, final String expectedName, final String encoding) throws IOException {
59 if (!expectedName.equals(ze.getName())) {
60 final UnicodePathExtraField ucpf = findUniCodePath(ze);
61 assertNotNull(ucpf);
62
63 final ZipEncoding enc = ZipEncodingHelper.getZipEncoding(encoding);
64 final ByteBuffer ne = enc.encode(ze.getName());
65
66 final CRC32 crc = new CRC32();
67 crc.update(ne.array(), ne.arrayOffset(), ne.limit() - ne.position());
68
69 assertEquals(crc.getValue(), ucpf.getNameCRC32());
70 assertEquals(expectedName, new String(ucpf.getUnicodeName(), UTF_8));
71 }
72 }
73
74 private static void createTestFile(final File file, final String encoding, final boolean withEFS, final boolean withExplicitUnicodeExtra)
75 throws IOException {
76
77 final ZipEncoding zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
78
79 try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(file)) {
80 zos.setEncoding(encoding);
81 zos.setUseLanguageEncodingFlag(withEFS);
82 zos.setCreateUnicodeExtraFields(
83 withExplicitUnicodeExtra ? ZipArchiveOutputStream.UnicodeExtraFieldPolicy.NEVER : ZipArchiveOutputStream.UnicodeExtraFieldPolicy.ALWAYS);
84
85 ZipArchiveEntry ze = new ZipArchiveEntry(OIL_BARREL_TXT);
86 if (withExplicitUnicodeExtra && !zipEncoding.canEncode(ze.getName())) {
87
88 final ByteBuffer en = zipEncoding.encode(ze.getName());
89
90 ze.addExtraField(new UnicodePathExtraField(ze.getName(), en.array(), en.arrayOffset(), en.limit() - en.position()));
91 }
92
93 zos.putArchiveEntry(ze);
94 zos.write("Hello, world!".getBytes(US_ASCII));
95 zos.closeArchiveEntry();
96
97 ze = new ZipArchiveEntry(EURO_FOR_DOLLAR_TXT);
98 if (withExplicitUnicodeExtra && !zipEncoding.canEncode(ze.getName())) {
99
100 final ByteBuffer en = zipEncoding.encode(ze.getName());
101
102 ze.addExtraField(new UnicodePathExtraField(ze.getName(), en.array(), en.arrayOffset(), en.limit() - en.position()));
103 }
104
105 zos.putArchiveEntry(ze);
106 zos.write("Give me your money!".getBytes(US_ASCII));
107 zos.closeArchiveEntry();
108
109 ze = new ZipArchiveEntry(ASCII_TXT);
110
111 if (withExplicitUnicodeExtra && !zipEncoding.canEncode(ze.getName())) {
112
113 final ByteBuffer en = zipEncoding.encode(ze.getName());
114
115 ze.addExtraField(new UnicodePathExtraField(ze.getName(), en.array(), en.arrayOffset(), en.limit() - en.position()));
116 }
117
118 zos.putArchiveEntry(ze);
119 zos.write("ascii".getBytes(US_ASCII));
120 zos.closeArchiveEntry();
121
122 zos.finish();
123 }
124 }
125
126 private static UnicodePathExtraField findUniCodePath(final ZipArchiveEntry ze) {
127 return (UnicodePathExtraField) ze.getExtraField(UnicodePathExtraField.UPATH_ID);
128 }
129
130 private static void testFile(final File file, final String encoding) throws IOException {
131 try (ZipFile zf = ZipFile.builder().setFile(file).setCharset(encoding).setUseUnicodeExtraFields(false).get()) {
132 final Enumeration<ZipArchiveEntry> e = zf.getEntries();
133 while (e.hasMoreElements()) {
134 final ZipArchiveEntry ze = e.nextElement();
135 if (ze.getName().endsWith("sser.txt")) {
136 assertUnicodeName(ze, OIL_BARREL_TXT, encoding);
137 } else if (ze.getName().endsWith("_for_Dollar.txt")) {
138 assertUnicodeName(ze, EURO_FOR_DOLLAR_TXT, encoding);
139 } else if (!ze.getName().equals(ASCII_TXT)) {
140 fail("Unrecognized ZIP entry with name [" + ze.getName() + "] found.");
141 }
142 }
143 }
144 }
145
146 private void assertCanRead(final ZipFile zf, final String fileName) throws IOException {
147 final ZipArchiveEntry entry = zf.getEntry(fileName);
148 assertNotNull(entry, "Entry doesn't exist");
149 try (InputStream is = zf.getInputStream(entry)) {
150 assertNotNull(is, "InputStream is null");
151 is.read();
152 }
153 }
154
155 @Test
156 public void testASCIIFileRoundtripExplicitUnicodeExtra() throws IOException {
157 testFileRoundtrip(StandardCharsets.US_ASCII.name(), false, true);
158 }
159
160 @Test
161 public void testASCIIFileRoundtripImplicitUnicodeExtra() throws IOException {
162 testFileRoundtrip(StandardCharsets.US_ASCII.name(), false, false);
163 }
164
165 @Test
166 public void testCP437FileRoundtripExplicitUnicodeExtra() throws IOException {
167 testFileRoundtrip(CP437, false, true);
168 }
169
170 @Test
171 public void testCP437FileRoundtripImplicitUnicodeExtra() throws IOException {
172 testFileRoundtrip(CP437, false, false);
173 }
174
175 private void testFileRoundtrip(final String encoding, final boolean withEFS, final boolean withExplicitUnicodeExtra) throws IOException {
176 final File file = createTempFile(encoding + "-test", ".zip");
177 createTestFile(file, encoding, withEFS, withExplicitUnicodeExtra);
178 testFile(file, encoding);
179 }
180
181 @Test
182 public void testRawNameReadFromStream() throws IOException {
183 try (ZipArchiveInputStream zi = new ZipArchiveInputStream(newInputStream("utf8-7zip-test.zip"), CP437, false)) {
184 assertRawNameOfAcsiiTxt(zi.getNextEntry());
185 }
186 }
187
188 @Test
189 public void testRawNameReadFromZipFile() throws IOException {
190 final File archive = getFile("utf8-7zip-test.zip");
191 try (ZipFile zf = ZipFile.builder().setFile(archive).setCharset(CP437).setUseUnicodeExtraFields(false).get()) {
192 assertRawNameOfAcsiiTxt(zf.getEntry(ASCII_TXT));
193 }
194 }
195
196
197
198
199
200
201 @Test
202 public void testRead7ZipArchive() throws IOException {
203 final File archive = getFile("utf8-7zip-test.zip");
204 try (ZipFile zf = new ZipFile(archive, CP437, false)) {
205 assertNotNull(zf.getEntry(ASCII_TXT));
206 assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
207 assertNotNull(zf.getEntry(OIL_BARREL_TXT));
208 }
209 }
210
211 @Test
212 public void testRead7ZipArchiveForStream() throws IOException {
213 try (ZipArchiveInputStream zi = new ZipArchiveInputStream(newInputStream("utf8-7zip-test.zip"), CP437, false)) {
214 assertEquals(ASCII_TXT, zi.getNextEntry().getName());
215 assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
216 assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
217 }
218 }
219
220
221
222
223 @Test
224 public void testReadWinZipArchive() throws IOException {
225 final File archive = getFile("utf8-winzip-test.zip");
226
227 String encoding = null;
228 if (Charset.defaultCharset() != UTF_8) {
229 encoding = UTF_8.name();
230 }
231 try (ZipFile zf = ZipFile.builder().setFile(archive).setCharset(encoding).setUseUnicodeExtraFields(true).get()) {
232 assertCanRead(zf, ASCII_TXT);
233 assertCanRead(zf, EURO_FOR_DOLLAR_TXT);
234 assertCanRead(zf, OIL_BARREL_TXT);
235 }
236 }
237
238 @Test
239 public void testReadWinZipArchiveForStream() throws IOException {
240
241 String encoding = null;
242 if (Charset.defaultCharset() != UTF_8) {
243 encoding = UTF_8.name();
244 }
245 try (InputStream archive = newInputStream("utf8-winzip-test.zip");
246 ZipArchiveInputStream zi = new ZipArchiveInputStream(archive, encoding, true)) {
247 assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
248 assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
249 assertEquals(ASCII_TXT, zi.getNextEntry().getName());
250 }
251 }
252
253
254
255
256 @Test
257 public void testStreamSkipsOverUnicodeExtraFieldWithUnsupportedVersion() throws IOException {
258 try (InputStream archive = newInputStream("COMPRESS-479.zip");
259 ZipArchiveInputStream zi = new ZipArchiveInputStream(archive)) {
260 assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
261 assertEquals("%U20AC_for_Dollar.txt", zi.getNextEntry().getName());
262 assertEquals(ASCII_TXT, zi.getNextEntry().getName());
263 }
264 }
265
266 @Test
267 public void testUtf8FileRoundtripExplicitUnicodeExtra() throws IOException {
268 testFileRoundtrip(StandardCharsets.UTF_8.name(), true, true);
269 }
270
271 @Test
272 public void testUtf8FileRoundtripImplicitUnicodeExtra() throws IOException {
273 testFileRoundtrip(StandardCharsets.UTF_8.name(), true, false);
274 }
275
276 @Test
277 public void testUtf8FileRoundtripNoEFSExplicitUnicodeExtra() throws IOException {
278 testFileRoundtrip(StandardCharsets.UTF_8.name(), false, true);
279 }
280
281 @Test
282 public void testUtf8FileRoundtripNoEFSImplicitUnicodeExtra() throws IOException {
283 testFileRoundtrip(StandardCharsets.UTF_8.name(), false, false);
284 }
285
286 @Test
287 public void testUtf8Interoperability() throws IOException {
288 final File file1 = getFile("utf8-7zip-test.zip");
289 final File file2 = getFile("utf8-winzip-test.zip");
290 testFile(file1, CP437);
291 testFile(file2, CP437);
292 }
293
294 @Test
295 public void testZipArchiveInputStreamReadsUnicodeFields() throws IOException {
296 final File file = createTempFile("unicode-test", ".zip");
297 createTestFile(file, StandardCharsets.US_ASCII.name(), false, true);
298 try (ZipFile zf = ZipFile.builder().setFile(file).setCharset(StandardCharsets.US_ASCII).setUseUnicodeExtraFields(true).get()) {
299 assertNotNull(zf.getEntry(ASCII_TXT));
300 assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
301 assertNotNull(zf.getEntry(OIL_BARREL_TXT));
302 }
303 }
304
305 @Test
306 public void testZipFileReadsUnicodeFields() throws IOException {
307 final File file = createTempFile("unicode-test", ".zip");
308 createTestFile(file, StandardCharsets.US_ASCII.name(), false, true);
309 try (ZipArchiveInputStream zi = new ZipArchiveInputStream(Files.newInputStream(file.toPath()), StandardCharsets.US_ASCII.name(), true)) {
310 assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
311 assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
312 assertEquals(ASCII_TXT, zi.getNextEntry().getName());
313 }
314 }
315
316
317
318
319 @Test
320 public void testZipFileSkipsOverUnicodeExtraFieldWithUnsupportedVersion() throws IOException {
321 try (ZipFile zf = ZipFile.builder().setFile(getFile("COMPRESS-479.zip")).get()) {
322 assertNotNull(zf.getEntry(ASCII_TXT));
323 assertNotNull(zf.getEntry("%U20AC_for_Dollar.txt"));
324 assertNotNull(zf.getEntry(OIL_BARREL_TXT));
325 }
326 }
327 }