View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  package org.apache.commons.compress.compressors.gzip;
21  
22  import static org.junit.jupiter.api.Assertions.assertArrayEquals;
23  import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
24  import static org.junit.jupiter.api.Assertions.assertEquals;
25  import static org.junit.jupiter.api.Assertions.assertFalse;
26  import static org.junit.jupiter.api.Assertions.assertSame;
27  import static org.junit.jupiter.api.Assertions.assertThrows;
28  import static org.junit.jupiter.api.Assertions.assertTrue;
29  import static org.junit.jupiter.api.Assertions.fail;
30  import static org.junit.jupiter.api.Assumptions.assumeTrue;
31  
32  import java.io.ByteArrayInputStream;
33  import java.io.ByteArrayOutputStream;
34  import java.io.IOException;
35  import java.io.OutputStream;
36  import java.nio.charset.Charset;
37  import java.nio.charset.StandardCharsets;
38  import java.nio.file.Files;
39  import java.nio.file.Path;
40  import java.util.ArrayList;
41  import java.util.zip.GZIPInputStream;
42  import java.util.zip.ZipException;
43  
44  import org.apache.commons.codec.DecoderException;
45  import org.apache.commons.codec.binary.Hex;
46  import org.apache.commons.compress.compressors.gzip.ExtraField.SubField;
47  import org.apache.commons.compress.compressors.gzip.GzipParameters.OS;
48  import org.apache.commons.lang3.ArrayFill;
49  import org.junit.jupiter.api.Test;
50  import org.junit.jupiter.params.ParameterizedTest;
51  import org.junit.jupiter.params.provider.CsvSource;
52  
53  import shaded.org.apache.commons.io.IOUtils;
54  
55  /**
56   * Tests {@link GzipCompressorOutputStream}.
57   */
58  class GzipCompressorOutputStreamTest {
59  
60      private static final String EXPECTED_BASE_NAME = "\u6D4B\u8BD5\u4E2D\u6587\u540D\u79F0";
61      private static final String EXPECTED_FILE_NAME = EXPECTED_BASE_NAME + ".xml";
62  
63      private void testChineseFileName(final String expected, final String sourceFile, final Charset fileNameCharset) throws IOException {
64          final Path tempSourceFile = Files.createTempFile(sourceFile, sourceFile);
65          final byte[] bytes = "<text>Hello World!</text>".getBytes(StandardCharsets.ISO_8859_1);
66          Files.write(tempSourceFile, bytes);
67          final Path targetFile = Files.createTempFile(EXPECTED_BASE_NAME, ".gz");
68          final GzipParameters parameters = new GzipParameters();
69          // If your system is Windows with Chinese, and your file name is Chinese, you need set the fileNameCharset to "GBK"
70          // otherwise your file name is different using GzipCompressorOutputStream without a GzipParameters.
71          // On Linux, set the fileNameCharset to UTF-8.
72          parameters.setFileNameCharset(fileNameCharset);
73          assertEquals(fileNameCharset, parameters.getFileNameCharset());
74          parameters.setFileName(EXPECTED_FILE_NAME);
75          parameters.setComment("Comment on " + EXPECTED_FILE_NAME);
76          try (OutputStream fos = Files.newOutputStream(targetFile);
77                  GzipCompressorOutputStream gos = new GzipCompressorOutputStream(fos, parameters)) {
78              gos.write(tempSourceFile);
79          }
80          // Old construction doesn't allow configuration of reading the file name and comment Charset.
81          try (GzipCompressorInputStream gis = new GzipCompressorInputStream(Files.newInputStream(targetFile))) {
82              final byte[] fileNameBytes = gis.getMetaData().getFileName().getBytes(StandardCharsets.ISO_8859_1);
83              final String unicodeFileName = new String(fileNameBytes, fileNameCharset);
84              assertEquals(expected, unicodeFileName);
85              assertArrayEquals(bytes, IOUtils.toByteArray(gis));
86          }
87          // Construction allows configuration of reading the file name and comment Charset.
88          // @formatter:off
89          try (GzipCompressorInputStream gis = GzipCompressorInputStream.builder()
90                  .setPath(targetFile)
91                  .setFileNameCharset(fileNameCharset)
92                  .get()) {
93              // @formatter:on
94              final byte[] fileNameBytes = gis.getMetaData().getFileName().getBytes(fileNameCharset);
95              final String unicodeFileName = new String(fileNameBytes, fileNameCharset);
96              assertEquals(expected, unicodeFileName);
97              assertArrayEquals(bytes, IOUtils.toByteArray(gis));
98              // reset trailer values for a simple assertion.
99              gis.getMetaData().setTrailerCrc(0);
100             gis.getMetaData().setTrailerISize(0);
101             assertEquals(parameters, gis.getMetaData());
102         }
103     }
104 
105     /**
106      * Tests Chinese file name for Windows behavior.
107      *
108      * @throws IOException When the test fails.
109      */
110     @Test
111     void testChineseFileNameGBK() throws IOException {
112         assumeTrue(Charset.isSupported("GBK"));
113         testChineseFileName(EXPECTED_FILE_NAME, EXPECTED_FILE_NAME, Charset.forName("GBK"));
114     }
115 
116     /**
117      * Tests Chinese file name for Windows behavior.
118      *
119      * @throws IOException When the test fails.
120      */
121     @Test
122     void testChineseFileNameUTF8() throws IOException {
123         testChineseFileName(EXPECTED_FILE_NAME, EXPECTED_FILE_NAME, StandardCharsets.UTF_8);
124     }
125 
126     /**
127      * Tests the gzip extra header containing subfields.
128      *
129      * @throws IOException When the test has issues with the underlying file system or unexpected gzip operations.
130      */
131     @ParameterizedTest
132     // @formatter:off
133     @CsvSource({
134         "0,    42, false",
135         "1,      , true",
136         "1,     0, false",
137         "1, 65531, false",
138         "1, 65532, true",
139         "2,     0, false",
140         "2, 32764, true",
141         "2, 32763, false"
142     })
143     // @formatter:on
144     void testExtraSubfields(final int subFieldCount, final Integer payloadSize, final boolean shouldFail)
145             throws IOException {
146         final Path tempSourceFile = Files.createTempFile("test_gzip_extra_", ".txt");
147         final Path targetFile = Files.createTempFile("test_gzip_extra_", ".txt.gz");
148         Files.write(tempSourceFile, "Hello World!".getBytes(StandardCharsets.ISO_8859_1));
149         final GzipParameters parameters = new GzipParameters();
150         final ExtraField extra = new ExtraField();
151         boolean failed = false;
152         final byte[][] payloads = new byte[subFieldCount][];
153         for (int i = 0; i < subFieldCount; i++) {
154             if (payloadSize != null) {
155                 payloads[i] = ArrayFill.fill(new byte[payloadSize], (byte) ('a' + i));
156             }
157             try {
158                 extra.addSubField("z" + i, payloads[i]);
159             } catch (final NullPointerException | IOException e) {
160                 failed = true;
161                 break;
162             }
163         }
164         assertEquals(shouldFail, failed, "add subfield " + (shouldFail ? "succes" : "failure") + " was not expected.");
165         if (shouldFail) {
166             return;
167         }
168         if (subFieldCount > 0) {
169             assertThrows(UnsupportedOperationException.class, () -> extra.iterator().remove());
170         }
171         parameters.setExtraField(extra);
172         try (OutputStream fos = Files.newOutputStream(targetFile);
173                 GzipCompressorOutputStream gos = new GzipCompressorOutputStream(fos, parameters)) {
174             gos.write(tempSourceFile);
175             gos.close();
176             assertTrue(gos.isClosed());
177         }
178         try (GzipCompressorInputStream gis = new GzipCompressorInputStream(Files.newInputStream(targetFile))) {
179             final ExtraField extra2 = gis.getMetaData().getExtraField();
180             assertEquals(parameters, gis.getMetaData());
181             assertEquals(subFieldCount == 0, extra2.isEmpty());
182             assertEquals(subFieldCount, extra2.size());
183             assertEquals(4 * subFieldCount + subFieldCount * payloadSize, extra2.getEncodedSize());
184             final ArrayList<SubField> listCopy = new ArrayList<>();
185             extra2.forEach(listCopy::add);
186             assertEquals(subFieldCount, listCopy.size());
187             for (int i = 0; i < subFieldCount; i++) {
188                 final SubField sf = extra2.getSubField(i);
189                 assertSame(sf, listCopy.get(i));
190                 assertSame(sf, extra2.findFirstSubField("z" + i));
191                 assertEquals("z" + i, sf.getId()); // id was saved/loaded correctly
192                 assertArrayEquals(payloads[i], sf.getPayload(), "field " + i + " has wrong payload");
193             }
194             extra2.clear();
195             assertTrue(extra2.isEmpty());
196         }
197     }
198 
199     @Test
200     void testExtraSubfieldsEmpty() {
201         final ExtraField extra = new ExtraField();
202         assertEquals(0, extra.toByteArray().length);
203         assertFalse(extra.iterator().hasNext());
204         extra.forEach(e -> fail("Not empty."));
205         assertThrows(IndexOutOfBoundsException.class, () -> extra.getSubField(0));
206     }
207 
208     private void testFileName(final String expected, final String sourceFile) throws IOException {
209         final Path tempSourceFile = Files.createTempFile(sourceFile, sourceFile);
210         final byte[] bytes = "<text>Hello World!</text>".getBytes(StandardCharsets.ISO_8859_1);
211         Files.write(tempSourceFile, bytes);
212         final Path targetFile = Files.createTempFile("test", ".gz");
213         final GzipParameters parameters = new GzipParameters();
214         parameters.setFilename(sourceFile);
215         assertEquals(parameters.getFilename(), parameters.getFileName());
216         parameters.setFileName(sourceFile);
217         assertEquals(parameters.getFilename(), parameters.getFileName());
218         try (OutputStream fos = Files.newOutputStream(targetFile);
219                 GzipCompressorOutputStream gos = new GzipCompressorOutputStream(fos, parameters)) {
220             gos.write(tempSourceFile);
221         }
222         try (GzipCompressorInputStream gis = new GzipCompressorInputStream(Files.newInputStream(targetFile))) {
223             assertEquals(expected, gis.getMetaData().getFileName());
224             assertEquals(expected, gis.getMetaData().getFilename());
225             assertArrayEquals(bytes, IOUtils.toByteArray(gis));
226         }
227     }
228 
229     @Test
230     void testFileNameAscii() throws IOException {
231         testFileName("ASCII.xml", "ASCII.xml");
232     }
233 
234     /**
235      * Tests COMPRESS-638. Use {@link GzipParameters#setFileNameCharset(Charset)} if you want non-ISO-8859-1 characters.
236      *
237      * GZip RFC requires ISO 8859-1 (LATIN-1).
238      *
239      * @throws IOException When the test fails.
240      */
241     @Test
242     void testFileNameChinesePercentEncoded() throws IOException {
243         // "Test Chinese name"
244         testFileName("??????.xml", EXPECTED_FILE_NAME);
245     }
246 
247     /**
248      * Tests the gzip header CRC.
249      *
250      * @throws IOException When the test has issues with the underlying file system or unexpected gzip operations.
251      */
252     @Test
253     void testHeaderCrc() throws IOException, DecoderException {
254         final GzipParameters parameters = new GzipParameters();
255         parameters.setHeaderCRC(true);
256         parameters.setModificationTime(0x66554433); // avoid changing time
257         parameters.setFileName("AAAA");
258         parameters.setComment("ZZZZ");
259         parameters.setOS(OS.UNIX);
260         final ExtraField extra = new ExtraField();
261         extra.addSubField("BB", "CCCC".getBytes(StandardCharsets.ISO_8859_1));
262         parameters.setExtraField(extra);
263         final ByteArrayOutputStream baos = new ByteArrayOutputStream();
264         try (GzipCompressorOutputStream gos = new GzipCompressorOutputStream(baos, parameters)) {
265             // nothing to write for this test.
266         }
267         final byte[] result = baos.toByteArray();
268         final byte[] expected = Hex.decodeHex("1f8b" // id1 id2
269                 + "08" // cm
270                 + "1e" // flg(FEXTRA|FNAME|FCOMMENT|FHCRC)
271                 + "33445566" // mtime little endian
272                 + "00" + "03" // xfl os
273                 + "0800" + "4242" + "0400" + "43434343" // xlen sfid sflen "CCCC"
274                 + "4141414100" // "AAAA" with \0
275                 + "5a5a5a5a00" // "ZZZZ" with \0
276                 + "d842" // crc32 = 839242d8
277                 + "0300" // empty deflate stream
278                 + "00000000" // crs32
279                 + "00000000" // isize
280         );
281         assertArrayEquals(expected, result);
282         assertDoesNotThrow(() -> {
283             try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(result))) {
284                 // if it does not fail, the hcrc is good.
285             }
286         });
287         try (GzipCompressorInputStream gis = new GzipCompressorInputStream(new ByteArrayInputStream(result))) {
288             final GzipParameters metaData = gis.getMetaData();
289             assertTrue(metaData.getHeaderCRC());
290             assertEquals(0x66554433, metaData.getModificationTime());
291             assertEquals(1, metaData.getExtraField().size());
292             final SubField sf = metaData.getExtraField().iterator().next();
293             assertEquals("BB", sf.getId());
294             assertEquals("CCCC", new String(sf.getPayload(), StandardCharsets.ISO_8859_1));
295             assertEquals("AAAA", metaData.getFileName());
296             assertEquals("ZZZZ", metaData.getComment());
297             assertEquals(OS.UNIX, metaData.getOS());
298             assertEquals(parameters, metaData);
299         }
300         // verify that the constructor normally fails on bad HCRC
301         assertThrows(ZipException.class, () -> {
302             result[30] = 0x77; // corrupt the low byte of header CRC
303             try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(result))) {
304                 // if it does not fail, the hcrc is good.
305             }
306         }, "Header CRC verification is no longer feasible with JDK classes. The earlier assertion would have passed despite a bad header CRC.");
307     }
308 
309 }