View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.lz77support;
20  
21  import static java.nio.charset.StandardCharsets.US_ASCII;
22  import static org.junit.jupiter.api.Assertions.assertArrayEquals;
23  import static org.junit.jupiter.api.Assertions.assertEquals;
24  import static org.junit.jupiter.api.Assertions.assertThrows;
25  
26  import java.io.IOException;
27  import java.util.ArrayList;
28  import java.util.Arrays;
29  import java.util.List;
30  
31  import org.junit.jupiter.api.Test;
32  
33  public class LZ77CompressorTest {
34  
35      private static final byte[] BLA, SAM, ONE_TO_TEN;
36  
37      static {
38          /*
39           * Example from "An Explanation of the Deflate Algorithm" by "Antaeus Feldspar".
40           *
41           * @see "https://zlib.net/feldspar.html"
42           */
43          BLA = "Blah blah blah blah blah!".getBytes(US_ASCII);
44  
45          /*
46           * Example from Wikipedia article about LZSS. Note the example uses indices instead of offsets.
47           *
48           * @see "https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Storer%E2%80%93Szymanski"
49           */
50          SAM = ("I am Sam\n" + "\n" + "Sam I am\n" + "\n" + "That Sam-I-am!\n" + "That Sam-I-am!\n" + "I do not like\n" + "that Sam-I-am!\n" + "\n"
51                  + "Do you like green eggs and ham?\n" + "\n" + "I do not like them, Sam-I-am.\n" + "I do not like green eggs and ham.").getBytes(US_ASCII);
52          ONE_TO_TEN = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
53      }
54  
55      private static void assertBackReference(final int expectedOffset, final int expectedLength, final LZ77Compressor.Block block) {
56          assertEquals(LZ77Compressor.BackReference.class, block.getClass());
57          final LZ77Compressor.BackReference b = (LZ77Compressor.BackReference) block;
58          assertEquals(expectedOffset, b.getOffset());
59          assertEquals(expectedLength, b.getLength());
60      }
61  
62      private static void assertLiteralBlock(final byte[] expectedContent, final LZ77Compressor.Block block) {
63          assertEquals(LZ77Compressor.LiteralBlock.class, block.getClass());
64          assertArrayEquals(expectedContent, ((LZ77Compressor.LiteralBlock) block).getData());
65      }
66  
67      private static void assertLiteralBlock(final String expectedContent, final LZ77Compressor.Block block) {
68          assertLiteralBlock(expectedContent.getBytes(US_ASCII), block);
69      }
70  
71      private static void assertSize(final int expectedSize, final List<LZ77Compressor.Block> blocks) {
72          assertEquals(expectedSize, blocks.size());
73          assertEquals(LZ77Compressor.Block.BlockType.EOD, blocks.get(expectedSize - 1).getType());
74      }
75  
76      private static Parameters newParameters(final int windowSize) {
77          return Parameters.builder(windowSize).build();
78      }
79  
80      private static Parameters newParameters(final int windowSize, final int minBackReferenceLength, final int maxBackReferenceLength, final int maxOffset,
81              final int maxLiteralLength) {
82          return Parameters.builder(windowSize).withMinBackReferenceLength(minBackReferenceLength).withMaxBackReferenceLength(maxBackReferenceLength)
83                  .withMaxOffset(maxOffset).withMaxLiteralLength(maxLiteralLength).tunedForCompressionRatio().build();
84      }
85  
86      private static byte[][] stagger(final byte[] data) {
87          final byte[][] r = new byte[data.length][1];
88          for (int i = 0; i < data.length; i++) {
89              r[i][0] = data[i];
90          }
91          return r;
92      }
93  
94      private List<LZ77Compressor.Block> compress(final Parameters params, final byte[]... chunks) throws IOException {
95          final List<LZ77Compressor.Block> blocks = new ArrayList<>();
96          final LZ77Compressor c = new LZ77Compressor(params, block -> {
97              // System.err.println(block);
98              if (block instanceof LZ77Compressor.LiteralBlock) {
99                  // replace with a real copy of data so tests
100                 // can see the results as they've been when
101                 // the callback has been called
102                 final LZ77Compressor.LiteralBlock b = (LZ77Compressor.LiteralBlock) block;
103                 final int len = b.getLength();
104                 block = new LZ77Compressor.LiteralBlock(Arrays.copyOfRange(b.getData(), b.getOffset(), b.getOffset() + len), 0, len);
105             }
106             blocks.add(block);
107         });
108         for (final byte[] chunk : chunks) {
109             c.compress(chunk);
110         }
111         c.finish();
112         return blocks;
113     }
114 
115     @Test
116     public void testBlaExampleSmallerWindowSize() throws IOException {
117         final List<LZ77Compressor.Block> blocks = compress(newParameters(8), BLA);
118         assertSize(6, blocks);
119         assertLiteralBlock("Blah b", blocks.get(0));
120         assertBackReference(5, 7, blocks.get(1));
121         assertBackReference(5, 3, blocks.get(2));
122         assertBackReference(5, 7, blocks.get(3));
123         assertLiteralBlock("h!", blocks.get(4));
124     }
125 
126     @Test
127     public void testBlaExampleWithFullArrayAvailableForCompression() throws IOException {
128         final List<LZ77Compressor.Block> blocks = compress(newParameters(128), BLA);
129         assertSize(4, blocks);
130         assertLiteralBlock("Blah b", blocks.get(0));
131         assertBackReference(5, 18, blocks.get(1));
132         assertLiteralBlock("!", blocks.get(2));
133     }
134 
135     @Test
136     public void testBlaExampleWithPrefill() throws IOException {
137         final List<LZ77Compressor.Block> blocks = new ArrayList<>();
138         final LZ77Compressor c = new LZ77Compressor(newParameters(128), block -> {
139             // System.err.println(block);
140             if (block instanceof LZ77Compressor.LiteralBlock) {
141                 // replace with a real copy of data so tests
142                 // can see the results as they've been when
143                 // the callback has been called
144                 final LZ77Compressor.LiteralBlock b = (LZ77Compressor.LiteralBlock) block;
145                 final int len = b.getLength();
146                 block = new LZ77Compressor.LiteralBlock(Arrays.copyOfRange(b.getData(), b.getOffset(), b.getOffset() + len), 0, len);
147             }
148             blocks.add(block);
149         });
150         c.prefill(Arrays.copyOfRange(BLA, 0, 6));
151         c.compress(Arrays.copyOfRange(BLA, 6, BLA.length));
152         c.finish();
153         assertSize(3, blocks);
154         assertBackReference(5, 18, blocks.get(0));
155         assertLiteralBlock("!", blocks.get(1));
156     }
157 
158     @Test
159     public void testBlaExampleWithPrefillBiggerThanWindowSize() throws IOException {
160         final List<LZ77Compressor.Block> blocks = new ArrayList<>();
161         final LZ77Compressor c = new LZ77Compressor(newParameters(4), block -> {
162             // System.err.println(block);
163             if (block instanceof LZ77Compressor.LiteralBlock) {
164                 // replace with a real copy of data so tests
165                 // can see the results as they've been when
166                 // the callback has been called
167                 final LZ77Compressor.LiteralBlock b = (LZ77Compressor.LiteralBlock) block;
168                 final int len = b.getLength();
169                 block = new LZ77Compressor.LiteralBlock(Arrays.copyOfRange(b.getData(), b.getOffset(), b.getOffset() + len), 0, len);
170             }
171             blocks.add(block);
172         });
173         c.prefill(Arrays.copyOfRange(BLA, 0, 6));
174         c.compress(Arrays.copyOfRange(BLA, 6, BLA.length));
175         c.finish();
176         assertSize(6, blocks);
177         assertLiteralBlock("lah ", blocks.get(0));
178         assertLiteralBlock("blah", blocks.get(1));
179         assertLiteralBlock(" bla", blocks.get(2));
180         assertLiteralBlock("h bl", blocks.get(3));
181         assertLiteralBlock("ah!", blocks.get(4));
182     }
183 
184     @Test
185     public void testBlaExampleWithShorterBackReferenceLength() throws IOException {
186         final List<LZ77Compressor.Block> blocks = compress(newParameters(128, 3, 5, 0, 0), BLA);
187         assertSize(7, blocks);
188         assertLiteralBlock("Blah b", blocks.get(0));
189         assertBackReference(5, 5, blocks.get(1));
190         assertBackReference(5, 5, blocks.get(2));
191         assertBackReference(5, 5, blocks.get(3));
192         assertBackReference(5, 3, blocks.get(4));
193         assertLiteralBlock("!", blocks.get(5));
194     }
195 
196     @Test
197     public void testBlaExampleWithShortPrefill() throws IOException {
198         final List<LZ77Compressor.Block> blocks = new ArrayList<>();
199         final LZ77Compressor c = new LZ77Compressor(newParameters(128), block -> {
200             // System.err.println(block);
201             if (block instanceof LZ77Compressor.LiteralBlock) {
202                 // replace with a real copy of data so tests
203                 // can see the results as they've been when
204                 // the callback has been called
205                 final LZ77Compressor.LiteralBlock b = (LZ77Compressor.LiteralBlock) block;
206                 final int len = b.getLength();
207                 block = new LZ77Compressor.LiteralBlock(Arrays.copyOfRange(b.getData(), b.getOffset(), b.getOffset() + len), 0, len);
208             }
209             blocks.add(block);
210         });
211         c.prefill(Arrays.copyOfRange(BLA, 0, 2));
212         c.compress(Arrays.copyOfRange(BLA, 2, BLA.length));
213         c.finish();
214         assertSize(4, blocks);
215         assertLiteralBlock("ah b", blocks.get(0));
216         assertBackReference(5, 18, blocks.get(1));
217         assertLiteralBlock("!", blocks.get(2));
218     }
219 
220     @Test
221     public void testBlaExampleWithSingleByteWrites() throws IOException {
222         final List<LZ77Compressor.Block> blocks = compress(newParameters(128), stagger(BLA));
223         assertEquals(9, blocks.size());
224         assertLiteralBlock("Blah b", blocks.get(0));
225         assertBackReference(5, 3, blocks.get(1));
226         assertBackReference(5, 3, blocks.get(2));
227         assertBackReference(5, 3, blocks.get(3));
228         assertBackReference(5, 3, blocks.get(4));
229         assertBackReference(5, 3, blocks.get(5));
230         assertBackReference(5, 3, blocks.get(6));
231         assertLiteralBlock("!", blocks.get(7));
232     }
233 
234     @Test
235     public void testCantPrefillAfterCompress() throws IOException {
236         final LZ77Compressor c = new LZ77Compressor(newParameters(128), block -> {
237         });
238         c.compress(Arrays.copyOfRange(BLA, 0, 2));
239         assertThrows(IllegalStateException.class, () -> c.prefill(Arrays.copyOfRange(BLA, 2, 4)));
240     }
241 
242     @Test
243     public void testCantPrefillTwice() {
244         final LZ77Compressor c = new LZ77Compressor(newParameters(128), block -> {
245         });
246         c.prefill(Arrays.copyOfRange(BLA, 0, 2));
247         assertThrows(IllegalStateException.class, () -> c.prefill(Arrays.copyOfRange(BLA, 2, 4)));
248     }
249 
250     @Test
251     public void testNonCompressableSentAsSingleBytes() throws IOException {
252         final List<LZ77Compressor.Block> blocks = compress(newParameters(8), stagger(ONE_TO_TEN));
253         assertSize(3, blocks);
254         assertLiteralBlock(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 }, blocks.get(0));
255         assertLiteralBlock(new byte[] { 9, 10 }, blocks.get(1));
256     }
257 
258     @Test
259     public void testNonCompressableWithLengthGreaterThanLiteralMaxButLessThanTwiceWindowSize() throws IOException {
260         final List<LZ77Compressor.Block> blocks = compress(newParameters(8), ONE_TO_TEN);
261         assertSize(3, blocks);
262         assertLiteralBlock(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 }, blocks.get(0));
263         assertLiteralBlock(new byte[] { 9, 10 }, blocks.get(1));
264     }
265 
266     @Test
267     public void testNonCompressableWithLengthSmallerThanLiteralMax() throws IOException {
268         final List<LZ77Compressor.Block> blocks = compress(newParameters(128), ONE_TO_TEN);
269         assertSize(2, blocks);
270         assertLiteralBlock(ONE_TO_TEN, blocks.get(0));
271     }
272 
273     @Test
274     public void testNonCompressableWithLengthThatForcesWindowSlide() throws IOException {
275         final List<LZ77Compressor.Block> blocks = compress(newParameters(4), ONE_TO_TEN);
276         assertSize(4, blocks);
277         assertLiteralBlock(new byte[] { 1, 2, 3, 4, }, blocks.get(0));
278         assertLiteralBlock(new byte[] { 5, 6, 7, 8 }, blocks.get(1));
279         assertLiteralBlock(new byte[] { 9, 10 }, blocks.get(2));
280     }
281 
282     @Test
283     public void testSamIAmExampleWithFullArrayAvailableForCompression() throws IOException {
284         final List<LZ77Compressor.Block> blocks = compress(newParameters(1024), SAM);
285         assertEquals(21, blocks.size());
286         assertLiteralBlock("I am Sam\n\n", blocks.get(0));
287         assertBackReference(5, 3, blocks.get(1));
288         assertLiteralBlock(" ", blocks.get(2));
289         assertBackReference(14, 4, blocks.get(3));
290         assertLiteralBlock("\n\nThat", blocks.get(4));
291         assertBackReference(20, 4, blocks.get(5));
292         assertLiteralBlock("-I-am!", blocks.get(6));
293         assertBackReference(15, 16, blocks.get(7));
294         assertLiteralBlock("I do not like\nt", blocks.get(8));
295         assertBackReference(29, 14, blocks.get(9));
296         assertLiteralBlock("\nDo you", blocks.get(10));
297         assertBackReference(28, 5, blocks.get(11));
298         assertLiteralBlock(" green eggs and ham?\n", blocks.get(12));
299         assertBackReference(63, 14, blocks.get(13));
300         assertLiteralBlock(" them,", blocks.get(14));
301         assertBackReference(64, 9, blocks.get(15));
302         assertLiteralBlock(".", blocks.get(16));
303         assertBackReference(30, 15, blocks.get(17));
304         assertBackReference(65, 18, blocks.get(18));
305         assertLiteralBlock(".", blocks.get(19));
306     }
307 }