1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.commons.compress.archivers.zip;
21
22 import static java.nio.charset.StandardCharsets.UTF_8;
23 import static org.junit.jupiter.api.Assertions.assertEquals;
24 import static org.junit.jupiter.api.Assertions.assertNotNull;
25 import static org.junit.jupiter.api.Assertions.assertNotSame;
26 import static org.junit.jupiter.api.Assertions.fail;
27
28 import java.io.File;
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.nio.ByteBuffer;
32 import java.nio.charset.Charset;
33 import java.nio.charset.StandardCharsets;
34 import java.nio.file.Files;
35 import java.util.zip.CRC32;
36
37 import org.apache.commons.compress.AbstractTest;
38 import org.junit.jupiter.api.Test;
39
40 class UTF8ZipFilesTest extends AbstractTest {
41
42 private static final String CP437 = "cp437";
43 private static final String ASCII_TXT = "ascii.txt";
44 private static final String EURO_FOR_DOLLAR_TXT = "\u20AC_for_Dollar.txt";
45 private static final String OIL_BARREL_TXT = "\u00D6lf\u00E4sser.txt";
46
47 private static void assertRawNameOfAcsiiTxt(final ZipArchiveEntry ze) {
48 final byte[] b = ze.getRawName();
49 assertNotNull(b);
50 final int len = ASCII_TXT.length();
51 assertEquals(len, b.length);
52 for (int i = 0; i < len; i++) {
53 assertEquals((byte) ASCII_TXT.charAt(i), b[i], "Byte " + i);
54 }
55 assertNotSame(b, ze.getRawName());
56 }
57
58 private static void assertUnicodeName(final ZipArchiveEntry ze, final String expectedName, final String encoding) throws IOException {
59 if (!expectedName.equals(ze.getName())) {
60 final UnicodePathExtraField ucpf = findUniCodePath(ze);
61 assertNotNull(ucpf);
62
63 final ZipEncoding enc = ZipEncodingHelper.getZipEncoding(encoding);
64 final ByteBuffer ne = enc.encode(ze.getName());
65
66 final CRC32 crc = new CRC32();
67 crc.update(ne.array(), ne.arrayOffset(), ne.limit() - ne.position());
68
69 assertEquals(crc.getValue(), ucpf.getNameCRC32());
70 assertEquals(expectedName, new String(ucpf.getUnicodeName(), UTF_8));
71 }
72 }
73
74 private static void createTestFile(final File file, final String encoding, final boolean withEFS, final boolean withExplicitUnicodeExtra)
75 throws IOException {
76
77 final ZipEncoding zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
78
79 try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(file)) {
80 zos.setEncoding(encoding);
81 zos.setUseLanguageEncodingFlag(withEFS);
82 zos.setCreateUnicodeExtraFields(
83 withExplicitUnicodeExtra ? ZipArchiveOutputStream.UnicodeExtraFieldPolicy.NEVER : ZipArchiveOutputStream.UnicodeExtraFieldPolicy.ALWAYS);
84
85 ZipArchiveEntry ze = new ZipArchiveEntry(OIL_BARREL_TXT);
86 if (withExplicitUnicodeExtra && !zipEncoding.canEncode(ze.getName())) {
87
88 final ByteBuffer en = zipEncoding.encode(ze.getName());
89
90 ze.addExtraField(new UnicodePathExtraField(ze.getName(), en.array(), en.arrayOffset(), en.limit() - en.position()));
91 }
92
93 zos.putArchiveEntry(ze);
94 zos.writeUsAscii("Hello, world!");
95 zos.closeArchiveEntry();
96
97 ze = new ZipArchiveEntry(EURO_FOR_DOLLAR_TXT);
98 if (withExplicitUnicodeExtra && !zipEncoding.canEncode(ze.getName())) {
99
100 final ByteBuffer en = zipEncoding.encode(ze.getName());
101
102 ze.addExtraField(new UnicodePathExtraField(ze.getName(), en.array(), en.arrayOffset(), en.limit() - en.position()));
103 }
104
105 zos.putArchiveEntry(ze);
106 zos.writeUsAscii("Give me your money!");
107 zos.closeArchiveEntry();
108
109 ze = new ZipArchiveEntry(ASCII_TXT);
110
111 if (withExplicitUnicodeExtra && !zipEncoding.canEncode(ze.getName())) {
112
113 final ByteBuffer en = zipEncoding.encode(ze.getName());
114
115 ze.addExtraField(new UnicodePathExtraField(ze.getName(), en.array(), en.arrayOffset(), en.limit() - en.position()));
116 }
117
118 zos.putArchiveEntry(ze);
119 zos.writeUsAscii("ascii");
120 zos.closeArchiveEntry();
121
122 zos.finish();
123 }
124 }
125
126 private static UnicodePathExtraField findUniCodePath(final ZipArchiveEntry ze) {
127 return (UnicodePathExtraField) ze.getExtraField(UnicodePathExtraField.UPATH_ID);
128 }
129
130 private static void testFile(final File file, final String encoding) throws IOException {
131 try (ZipFile zipFile = ZipFile.builder().setFile(file).setCharset(encoding).setUseUnicodeExtraFields(false).get()) {
132 zipFile.stream().forEach(ze -> {
133 if (ze.getName().endsWith("sser.txt")) {
134 assertUnicodeName(ze, OIL_BARREL_TXT, encoding);
135 } else if (ze.getName().endsWith("_for_Dollar.txt")) {
136 assertUnicodeName(ze, EURO_FOR_DOLLAR_TXT, encoding);
137 } else if (!ze.getName().equals(ASCII_TXT)) {
138 fail("Unrecognized ZIP entry with name [" + ze.getName() + "] found.");
139 }
140 });
141 }
142 }
143
144 private void assertCanRead(final ZipFile zf, final String fileName) throws IOException {
145 final ZipArchiveEntry entry = zf.getEntry(fileName);
146 assertNotNull(entry, "Entry doesn't exist");
147 try (InputStream is = zf.getInputStream(entry)) {
148 assertNotNull(is, "InputStream is null");
149 is.read();
150 }
151 }
152
153 @Test
154 void testASCIIFileRoundtripExplicitUnicodeExtra() throws IOException {
155 testFileRoundtrip(StandardCharsets.US_ASCII.name(), false, true);
156 }
157
158 @Test
159 void testASCIIFileRoundtripImplicitUnicodeExtra() throws IOException {
160 testFileRoundtrip(StandardCharsets.US_ASCII.name(), false, false);
161 }
162
163 @Test
164 void testCP437FileRoundtripExplicitUnicodeExtra() throws IOException {
165 testFileRoundtrip(CP437, false, true);
166 }
167
168 @Test
169 void testCP437FileRoundtripImplicitUnicodeExtra() throws IOException {
170 testFileRoundtrip(CP437, false, false);
171 }
172
173 private void testFileRoundtrip(final String encoding, final boolean withEFS, final boolean withExplicitUnicodeExtra) throws IOException {
174 final File file = createTempFile(encoding + "-test", ".zip");
175 createTestFile(file, encoding, withEFS, withExplicitUnicodeExtra);
176 testFile(file, encoding);
177 }
178
179 @Test
180 void testRawNameReadFromStream() throws IOException {
181 try (ZipArchiveInputStream zi = new ZipArchiveInputStream(newInputStream("utf8-7zip-test.zip"), CP437, false)) {
182 assertRawNameOfAcsiiTxt(zi.getNextEntry());
183 }
184 }
185
186 @Test
187 void testRawNameReadFromZipFile() throws IOException {
188 final File archive = getFile("utf8-7zip-test.zip");
189 try (ZipFile zf = ZipFile.builder().setFile(archive).setCharset(CP437).setUseUnicodeExtraFields(false).get()) {
190 assertRawNameOfAcsiiTxt(zf.getEntry(ASCII_TXT));
191 }
192 }
193
194
195
196
197
198
199 @Test
200 void testRead7ZipArchive() throws IOException {
201 final File archive = getFile("utf8-7zip-test.zip");
202 try (ZipFile zf = new ZipFile(archive, CP437, false)) {
203 assertNotNull(zf.getEntry(ASCII_TXT));
204 assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
205 assertNotNull(zf.getEntry(OIL_BARREL_TXT));
206 }
207 }
208
209 @Test
210 void testRead7ZipArchiveForStream() throws IOException {
211 try (ZipArchiveInputStream zi = new ZipArchiveInputStream(newInputStream("utf8-7zip-test.zip"), CP437, false)) {
212 assertEquals(ASCII_TXT, zi.getNextEntry().getName());
213 assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
214 assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
215 }
216 }
217
218
219
220
221 @Test
222 void testReadWinZipArchive() throws IOException {
223 final File archive = getFile("utf8-winzip-test.zip");
224
225 String encoding = null;
226 if (Charset.defaultCharset() != UTF_8) {
227 encoding = UTF_8.name();
228 }
229 try (ZipFile zf = ZipFile.builder().setFile(archive).setCharset(encoding).setUseUnicodeExtraFields(true).get()) {
230 assertCanRead(zf, ASCII_TXT);
231 assertCanRead(zf, EURO_FOR_DOLLAR_TXT);
232 assertCanRead(zf, OIL_BARREL_TXT);
233 }
234 }
235
236 @Test
237 void testReadWinZipArchiveForStream() throws IOException {
238
239 String encoding = null;
240 if (Charset.defaultCharset() != UTF_8) {
241 encoding = UTF_8.name();
242 }
243 try (InputStream archive = newInputStream("utf8-winzip-test.zip");
244 ZipArchiveInputStream zi = new ZipArchiveInputStream(archive, encoding, true)) {
245 assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
246 assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
247 assertEquals(ASCII_TXT, zi.getNextEntry().getName());
248 }
249 }
250
251
252
253
254 @Test
255 void testStreamSkipsOverUnicodeExtraFieldWithUnsupportedVersion() throws IOException {
256 try (InputStream archive = newInputStream("COMPRESS-479.zip");
257 ZipArchiveInputStream zi = new ZipArchiveInputStream(archive)) {
258 assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
259 assertEquals("%U20AC_for_Dollar.txt", zi.getNextEntry().getName());
260 assertEquals(ASCII_TXT, zi.getNextEntry().getName());
261 }
262 }
263
264 @Test
265 void testUtf8FileRoundtripExplicitUnicodeExtra() throws IOException {
266 testFileRoundtrip(StandardCharsets.UTF_8.name(), true, true);
267 }
268
269 @Test
270 void testUtf8FileRoundtripImplicitUnicodeExtra() throws IOException {
271 testFileRoundtrip(StandardCharsets.UTF_8.name(), true, false);
272 }
273
274 @Test
275 void testUtf8FileRoundtripNoEFSExplicitUnicodeExtra() throws IOException {
276 testFileRoundtrip(StandardCharsets.UTF_8.name(), false, true);
277 }
278
279 @Test
280 void testUtf8FileRoundtripNoEFSImplicitUnicodeExtra() throws IOException {
281 testFileRoundtrip(StandardCharsets.UTF_8.name(), false, false);
282 }
283
284 @Test
285 void testUtf8Interoperability() throws IOException {
286 final File file1 = getFile("utf8-7zip-test.zip");
287 final File file2 = getFile("utf8-winzip-test.zip");
288 testFile(file1, CP437);
289 testFile(file2, CP437);
290 }
291
292 @Test
293 void testZipArchiveInputStreamReadsUnicodeFields() throws IOException {
294 final File file = createTempFile("unicode-test", ".zip");
295 createTestFile(file, StandardCharsets.US_ASCII.name(), false, true);
296 try (ZipFile zf = ZipFile.builder().setFile(file).setCharset(StandardCharsets.US_ASCII).setUseUnicodeExtraFields(true).get()) {
297 assertNotNull(zf.getEntry(ASCII_TXT));
298 assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
299 assertNotNull(zf.getEntry(OIL_BARREL_TXT));
300 }
301 }
302
303 @Test
304 void testZipFileReadsUnicodeFields() throws IOException {
305 final File file = createTempFile("unicode-test", ".zip");
306 createTestFile(file, StandardCharsets.US_ASCII.name(), false, true);
307 try (ZipArchiveInputStream zi = new ZipArchiveInputStream(Files.newInputStream(file.toPath()), StandardCharsets.US_ASCII.name(), true)) {
308 assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
309 assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
310 assertEquals(ASCII_TXT, zi.getNextEntry().getName());
311 }
312 }
313
314
315
316
317 @Test
318 void testZipFileSkipsOverUnicodeExtraFieldWithUnsupportedVersion() throws IOException {
319 try (ZipFile zf = ZipFile.builder().setFile(getFile("COMPRESS-479.zip")).get()) {
320 assertNotNull(zf.getEntry(ASCII_TXT));
321 assertNotNull(zf.getEntry("%U20AC_for_Dollar.txt"));
322 assertNotNull(zf.getEntry(OIL_BARREL_TXT));
323 }
324 }
325 }