View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.lz4;
20  
21  import static org.junit.jupiter.api.Assertions.assertArrayEquals;
22  import static org.junit.jupiter.api.Assertions.assertEquals;
23  import static org.junit.jupiter.api.Assertions.assertFalse;
24  import static org.junit.jupiter.api.Assertions.assertTrue;
25  
26  import java.io.ByteArrayOutputStream;
27  import java.io.IOException;
28  import java.util.Arrays;
29  
30  import org.apache.commons.compress.compressors.lz77support.LZ77Compressor;
31  import org.junit.jupiter.api.Disabled;
32  import org.junit.jupiter.api.Test;
33  
34  public class BlockLZ4CompressorOutputStreamTest {
35  
36      @Test
37      @Disabled("would pass if the algorithm used for rewriting the final pairs was smarter")
38      public void canWriteBackReferenceFollowedByShortLiteralIfLengthIsBigEnough() {
39          final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
40          p.setBackReference(new LZ77Compressor.BackReference(1, 10));
41          assertTrue(p.canBeWritten(5));
42      }
43  
44      @Test
45      @Disabled("would pass if the algorithm used for rewriting the final pairs was smarter")
46      public void canWriteBackReferenceFollowedByShortLiteralIfOffsetIsBigEnough() {
47          final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
48          p.setBackReference(new LZ77Compressor.BackReference(10, 4));
49          assertTrue(p.canBeWritten(5));
50      }
51  
52      private byte[] compress(final byte[] input, final int... lengthOfTrailers) throws IOException {
53          try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
54                  BlockLZ4CompressorOutputStream lo = new BlockLZ4CompressorOutputStream(baos)) {
55              lo.write(input);
56              for (int i = 0; i < lengthOfTrailers.length; i++) {
57                  final int lengthOfTrailer = lengthOfTrailers[i];
58                  for (int j = 0; j < lengthOfTrailer; j++) {
59                      lo.write(i + 1);
60                  }
61              }
62              lo.close();
63              return baos.toByteArray();
64          }
65      }
66  
67      private byte[] compress(final int length) throws IOException {
68          return compress(length, 0);
69      }
70  
71      private byte[] compress(final int lengthBeforeTrailer, final int... lengthOfTrailers) throws IOException {
72          final byte[] b = prepareExpected(lengthBeforeTrailer);
73          return compress(b, lengthOfTrailers);
74      }
75  
76      private byte[] prepareExpected(final int length) {
77          final byte[] b = new byte[length];
78          Arrays.fill(b, (byte) -1);
79          return b;
80      }
81  
82      @Test
83      public void testCantWriteBackReferenceFollowedByLiteralThatIsTooShort() {
84          final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
85          p.setBackReference(new LZ77Compressor.BackReference(10, 14));
86          assertFalse(p.canBeWritten(4));
87      }
88  
89      @Test
90      public void testCantWriteBackReferenceIfAccumulatedOffsetIsTooShort() {
91          final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
92          p.setBackReference(new LZ77Compressor.BackReference(1, 4));
93          assertFalse(p.canBeWritten(5));
94      }
95  
96      @Test
97      public void testCanWriteBackReferenceFollowedByLongLiteral() {
98          final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
99          p.setBackReference(new LZ77Compressor.BackReference(1, 4));
100         // a length of 11 would be enough according to the spec, but
101         // the algorithm we use for rewriting the last block requires
102         // 16 bytes
103         assertTrue(p.canBeWritten(16));
104     }
105 
106     @Test
107     public void testCanWritePairWithoutBackReference() throws IOException {
108         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
109         final byte[] b = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
110         p.addLiteral(new LZ77Compressor.LiteralBlock(b, 1, 4));
111         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
112         p.writeTo(bos);
113         assertArrayEquals(new byte[] { 4 << 4, 2, 3, 4, 5 }, bos.toByteArray());
114     }
115 
116     @Test
117     public void testCanWritePairWithoutLiterals() throws IOException {
118         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
119         p.setBackReference(new LZ77Compressor.BackReference(1, 4));
120         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
121         p.writeTo(bos);
122         assertArrayEquals(new byte[] { 0, 1, 0 }, bos.toByteArray());
123     }
124 
125     @Test
126     public void testPairAccumulatesLengths() {
127         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
128         p.setBackReference(new LZ77Compressor.BackReference(1, 4));
129         final byte[] b = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
130         p.addLiteral(new LZ77Compressor.LiteralBlock(b, 1, 4));
131         p.addLiteral(new LZ77Compressor.LiteralBlock(b, 2, 5));
132         assertEquals(13, p.length());
133     }
134 
135     @Test
136     public void testPairSeesBackReferenceWhenSet() {
137         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
138         assertFalse(p.hasBackReference());
139         p.setBackReference(new LZ77Compressor.BackReference(1, 4));
140         assertTrue(p.hasBackReference());
141     }
142 
143     @Test
144     public void testRewritingOfFinalBlockWithoutTrailingLZ77Literals() throws IOException {
145         for (int i = 1; i < 13; i++) {
146             // according to the spec these are all too short be compressed
147             // LZ77Compressor will create a single byte literal
148             // followed by a back-reference starting with i = 5,
149             // though. (4 is the minimum length for a back-reference
150             // in LZ4
151             final byte[] compressed = compress(i);
152             final byte[] expected = prepareExpected(i + 1);
153             expected[0] = (byte) (i << 4);
154             assertArrayEquals(expected, compressed, "input length is " + i);
155         }
156 
157         for (int i = 13; i < 17; i++) {
158             // LZ77Compressor will still create a single byte literal
159             // followed by a back-reference
160             // according to the spec the back-reference could be split
161             // as we can cut out a five byte literal and the offset
162             // would be big enough, but our algorithm insists on a
163             // twelve byte literal trailer and the back-reference
164             // would fall below the minimal size
165             final byte[] compressed = compress(i);
166             final byte[] expected = prepareExpected(i < 15 ? i + 1 : i + 2);
167             if (i < 15) {
168                 expected[0] = (byte) (i << 4);
169             } else {
170                 expected[0] = (byte) (15 << 4);
171                 expected[1] = (byte) (i - 15);
172             }
173             assertArrayEquals(expected, compressed, "input length is " + i);
174         }
175 
176         for (int i = 17; i < 20; i++) {
177             // LZ77Compressor will still create a single byte literal
178             // followed by a back-reference
179             // this time even our algorithm is willing to break up the
180             // back-reference
181             final byte[] compressed = compress(i);
182             final byte[] expected = prepareExpected(17);
183             expected[0] = (byte) (1 << 4 | i - 17);
184             // two-byte offset
185             expected[2] = 1;
186             expected[3] = 0;
187             expected[4] = (byte) (12 << 4);
188             assertArrayEquals(expected, compressed, "input length is " + i);
189         }
190     }
191 
192     @Test
193     public void testRewritingOfFinalBlockWithTrailingLZ77Literals() throws IOException {
194         for (int i = 1; i < 5; i++) {
195             // LZ77Compressor will create a single byte literal
196             // followed by a back-reference of length 15 followed by a
197             // literal of length i
198             // we can split the back-reference and merge it with the literal
199             final byte[] compressed = compress(16, i);
200             final byte[] expected = prepareExpected(17);
201             expected[0] = (byte) (1 << 4 | i - 1);
202             // two-byte offset
203             expected[2] = 1;
204             expected[3] = 0;
205             expected[4] = (byte) (12 << 4);
206             for (int j = 0; j < i; j++) {
207                 expected[expected.length - 1 - j] = 1;
208             }
209             assertArrayEquals(expected, compressed, "trailer length is " + i);
210         }
211         for (int i = 5; i < 12; i++) {
212             // LZ77Compressor will create a single byte literal
213             // followed by a back-reference of length 15 followed by
214             // another single byte literal and another back-reference
215             // of length i-1
216             // according to the spec we could completely satisfy the
217             // requirements by just rewriting the last Pair, but our
218             // algorithm will chip off a few bytes from the first Pair
219             final byte[] compressed = compress(16, i);
220             final byte[] expected = prepareExpected(17);
221             expected[0] = (byte) (1 << 4 | i - 1);
222             // two-byte offset
223             expected[2] = 1;
224             expected[3] = 0;
225             expected[4] = (byte) (12 << 4);
226             for (int j = 0; j < i; j++) {
227                 expected[expected.length - 1 - j] = 1;
228             }
229             assertArrayEquals(expected, compressed, "trailer length is " + i);
230         }
231         for (int i = 12; i < 15; i++) {
232             // LZ77Compressor will create a single byte literal
233             // followed by a back-reference of length 15 followed by
234             // another single byte literal and another back-reference
235             // of length i-1
236             // this shouldn't affect the first pair at all as
237             // rewriting the second one is sufficient
238             final byte[] compressed = compress(16, i);
239             final byte[] expected = prepareExpected(i + 5);
240             expected[0] = (byte) (1 << 4 | 11);
241             // two-byte offset
242             expected[2] = 1;
243             expected[3] = 0;
244             expected[4] = (byte) (i << 4);
245             for (int j = 0; j < i; j++) {
246                 expected[expected.length - 1 - j] = 1;
247             }
248             assertArrayEquals(expected, compressed, "trailer length is " + i);
249         }
250     }
251 
252     @Test
253     public void testRewritingOfFourPairs() throws IOException {
254         // LZ77Compressor creates three times a literal block followed
255         // by a back-reference (once 5 bytes long and twice four bytes
256         // long and a final literal block of length 1
257         // in the result the three last pairs are merged into a single
258         // literal and one byte is chopped off of the first pair's
259         // back-reference
260         final byte[] compressed = compress(6, 5, 5, 1);
261         final byte[] expected = prepareExpected(17);
262         expected[0] = (byte) (1 << 4);
263         // two-byte offset
264         expected[2] = 1;
265         expected[3] = 0;
266         expected[4] = (byte) (12 << 4);
267         for (int i = 6; i < 11; i++) {
268             expected[i] = 1;
269         }
270         for (int i = 11; i < 16; i++) {
271             expected[i] = 2;
272         }
273         expected[16] = 3;
274         assertArrayEquals(expected, compressed);
275     }
276 
277     @Test
278     public void testRewritingWithFinalBackreferenceAndOffsetBiggerThan1() throws IOException {
279         // this caused trouble when expandFromList() fell into the "offsetRemaining is negative" self-copy case as the
280         // calculation of copyOffset was wrong
281         final byte[] toCompress = prepareExpected(25);
282         for (int i = 0; i < toCompress.length; i += 4) {
283             toCompress[i] = 1;
284         }
285         // LZ77Compressor creates a four byte literal and a back-reference with offset 4 and length 21
286         // we'll need to split the back-reference and chop off the last 12 bytes
287         final byte[] compressed = compress(toCompress);
288         final byte[] expected = prepareExpected(1 + 4 + 2 + 1 + 12);
289         expected[0] = (byte) (4 << 4 | 5);
290         expected[1] = 1;
291         expected[5] = 4;
292         expected[6] = 0;
293         expected[7] = (byte) (12 << 4);
294         for (int i = 11; i < expected.length; i += 4) {
295             expected[i] = 1;
296         }
297         assertArrayEquals(expected, compressed);
298     }
299 
300     @Test
301     public void testWritesCompletePair() throws IOException {
302         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
303         final byte[] b = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
304         p.addLiteral(new LZ77Compressor.LiteralBlock(b, 1, 4));
305         b[2] = 19;
306         p.setBackReference(new LZ77Compressor.BackReference(1, 5));
307         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
308         p.writeTo(bos);
309         assertArrayEquals(new byte[] { (4 << 4) + 1, 2, 3, 4, 5, 1, 0 }, bos.toByteArray());
310     }
311 
312     @Test
313     public void testWritesCorrectSizeFor15ByteLengthLiteral() throws IOException {
314         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
315         final byte[] b = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
316         p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 9));
317         p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 6));
318         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
319         p.writeTo(bos);
320         assertArrayEquals(new byte[] { (byte) (15 << 4), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6 }, bos.toByteArray());
321     }
322 
323     @Test
324     public void testWritesCorrectSizeFor19ByteLengthBackReference() throws IOException {
325         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
326         p.setBackReference(new LZ77Compressor.BackReference(1, 19));
327         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
328         p.writeTo(bos);
329         assertArrayEquals(new byte[] { 15, 1, 0, 0 }, bos.toByteArray());
330     }
331 
332     @Test
333     public void testWritesCorrectSizeFor269ByteLengthLiteral() throws IOException {
334         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
335         final byte[] b = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
336         for (int i = 0; i < 26; i++) {
337             p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 10));
338         }
339         p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 9));
340         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
341         p.writeTo(bos);
342         assertArrayEquals(new byte[] { (byte) (15 << 4), (byte) 254, 1 }, Arrays.copyOfRange(bos.toByteArray(), 0, 3));
343     }
344 
345     @Test
346     public void testWritesCorrectSizeFor270ByteLengthLiteral() throws IOException {
347         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
348         final byte[] b = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
349         for (int i = 0; i < 27; i++) {
350             p.addLiteral(new LZ77Compressor.LiteralBlock(b, 0, 10));
351         }
352         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
353         p.writeTo(bos);
354         assertArrayEquals(new byte[] { (byte) (15 << 4), (byte) 255, 0, 1 }, Arrays.copyOfRange(bos.toByteArray(), 0, 4));
355     }
356 
357     @Test
358     public void testWritesCorrectSizeFor273ByteLengthBackReference() throws IOException {
359         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
360         p.setBackReference(new LZ77Compressor.BackReference(1, 273));
361         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
362         p.writeTo(bos);
363         assertArrayEquals(new byte[] { 15, 1, 0, (byte) 254 }, bos.toByteArray());
364     }
365 
366     @Test
367     public void testWritesCorrectSizeFor274ByteLengthBackReference() throws IOException {
368         final BlockLZ4CompressorOutputStream.Pair p = new BlockLZ4CompressorOutputStream.Pair();
369         p.setBackReference(new LZ77Compressor.BackReference(1, 274));
370         final ByteArrayOutputStream bos = new ByteArrayOutputStream();
371         p.writeTo(bos);
372         assertArrayEquals(new byte[] { 15, 1, 0, (byte) 255, 0 }, bos.toByteArray());
373     }
374 }