1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.commons.compress.compressors.lz4;
20
21 import java.io.IOException;
22 import java.io.OutputStream;
23 import java.util.Arrays;
24 import java.util.Deque;
25 import java.util.Iterator;
26 import java.util.LinkedList;
27
28 import org.apache.commons.compress.compressors.CompressorOutputStream;
29 import org.apache.commons.compress.compressors.lz77support.LZ77Compressor;
30 import org.apache.commons.compress.compressors.lz77support.Parameters;
31 import org.apache.commons.compress.utils.ByteUtils;
32
33
34
35
36
37
38
39
40 public class BlockLZ4CompressorOutputStream extends CompressorOutputStream<OutputStream> {
41
42 static final class Pair {
43
44 private static int lengths(final int litLength, final int brLength) {
45 final int l = Math.min(litLength, 15);
46 final int br = brLength < 4 ? 0 : brLength < 19 ? brLength - 4 : 15;
47 return l << BlockLZ4CompressorInputStream.SIZE_BITS | br;
48 }
49
50 private static void writeLength(int length, final OutputStream out) throws IOException {
51 while (length >= 255) {
52 out.write(255);
53 length -= 255;
54 }
55 out.write(length);
56 }
57
58 private final Deque<byte[]> literals = new LinkedList<>();
59
60 private int literalLength;
61
62 private int brOffset;
63
64 private int brLength;
65
66 private boolean written;
67
68 byte[] addLiteral(final LZ77Compressor.LiteralBlock block) {
69 final byte[] copy = Arrays.copyOfRange(block.getData(), block.getOffset(), block.getOffset() + block.getLength());
70 literals.add(copy);
71 literalLength += copy.length;
72 return copy;
73 }
74
75 private int backReferenceLength() {
76 return brLength;
77 }
78
79 boolean canBeWritten(final int lengthOfBlocksAfterThisPair) {
80 return hasBackReference() && lengthOfBlocksAfterThisPair >= MIN_OFFSET_OF_LAST_BACK_REFERENCE + MIN_BACK_REFERENCE_LENGTH;
81 }
82
83 boolean hasBackReference() {
84 return brOffset > 0;
85 }
86
87 private boolean hasBeenWritten() {
88 return written;
89 }
90
91 int length() {
92 return literalLength() + brLength;
93 }
94
95 private int literalLength() {
96
97 if (literalLength != 0) {
98 return literalLength;
99 }
100 int length = 0;
101 for (final byte[] b : literals) {
102 length += b.length;
103 }
104 return literalLength = length;
105 }
106
107 private void prependLiteral(final byte[] data) {
108 literals.addFirst(data);
109 literalLength += data.length;
110 }
111
112 private void prependTo(final Pair other) {
113 final Iterator<byte[]> listBackwards = literals.descendingIterator();
114 while (listBackwards.hasNext()) {
115 other.prependLiteral(listBackwards.next());
116 }
117 }
118
119 void setBackReference(final LZ77Compressor.BackReference block) {
120 if (hasBackReference()) {
121 throw new IllegalStateException();
122 }
123 brOffset = block.getOffset();
124 brLength = block.getLength();
125 }
126
127 private Pair splitWithNewBackReferenceLengthOf(final int newBackReferenceLength) {
128 final Pair p = new Pair();
129 p.literals.addAll(literals);
130 p.brOffset = brOffset;
131 p.brLength = newBackReferenceLength;
132 return p;
133 }
134
135 void writeTo(final OutputStream out) throws IOException {
136 final int litLength = literalLength();
137 out.write(lengths(litLength, brLength));
138 if (litLength >= BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK) {
139 writeLength(litLength - BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK, out);
140 }
141 for (final byte[] b : literals) {
142 out.write(b);
143 }
144 if (hasBackReference()) {
145 ByteUtils.toLittleEndian(out, brOffset, 2);
146 if (brLength - MIN_BACK_REFERENCE_LENGTH >= BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK) {
147 writeLength(brLength - MIN_BACK_REFERENCE_LENGTH - BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK, out);
148 }
149 }
150 written = true;
151 }
152 }
153
154 private static final int MIN_BACK_REFERENCE_LENGTH = 4;
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175 private static final int MIN_OFFSET_OF_LAST_BACK_REFERENCE = 12;
176
177
178
179
180
181
182 public static Parameters.Builder createParameterBuilder() {
183 final int maxLen = BlockLZ4CompressorInputStream.WINDOW_SIZE - 1;
184 return Parameters.builder(BlockLZ4CompressorInputStream.WINDOW_SIZE).withMinBackReferenceLength(MIN_BACK_REFERENCE_LENGTH)
185 .withMaxBackReferenceLength(maxLen).withMaxOffset(maxLen).withMaxLiteralLength(maxLen);
186 }
187
188 private final LZ77Compressor compressor;
189
190
191 private final byte[] oneByte = new byte[1];
192 private final Deque<Pair> pairs = new LinkedList<>();
193
194
195
196 private final Deque<byte[]> expandedBlocks = new LinkedList<>();
197
198
199
200
201
202
203 public BlockLZ4CompressorOutputStream(final OutputStream out) {
204 this(out, createParameterBuilder().build());
205 }
206
207
208
209
210
211
212
213 public BlockLZ4CompressorOutputStream(final OutputStream out, final Parameters params) {
214 super(out);
215 compressor = new LZ77Compressor(params, block -> {
216 switch (block.getType()) {
217 case LITERAL:
218 addLiteralBlock((LZ77Compressor.LiteralBlock) block);
219 break;
220 case BACK_REFERENCE:
221 addBackReference((LZ77Compressor.BackReference) block);
222 break;
223 case EOD:
224 writeFinalLiteralBlock();
225 break;
226 }
227 });
228 }
229
230 private void addBackReference(final LZ77Compressor.BackReference block) throws IOException {
231 final Pair last = writeBlocksAndReturnUnfinishedPair(block.getLength());
232 last.setBackReference(block);
233 recordBackReference(block);
234 clearUnusedBlocksAndPairs();
235 }
236
237 private void addLiteralBlock(final LZ77Compressor.LiteralBlock block) throws IOException {
238 final Pair last = writeBlocksAndReturnUnfinishedPair(block.getLength());
239 recordLiteral(last.addLiteral(block));
240 clearUnusedBlocksAndPairs();
241 }
242
243 private void clearUnusedBlocks() {
244 int blockLengths = 0;
245 int blocksToKeep = 0;
246 for (final byte[] b : expandedBlocks) {
247 blocksToKeep++;
248 blockLengths += b.length;
249 if (blockLengths >= BlockLZ4CompressorInputStream.WINDOW_SIZE) {
250 break;
251 }
252 }
253 final int size = expandedBlocks.size();
254 for (int i = blocksToKeep; i < size; i++) {
255 expandedBlocks.removeLast();
256 }
257 }
258
259 private void clearUnusedBlocksAndPairs() {
260 clearUnusedBlocks();
261 clearUnusedPairs();
262 }
263
264 private void clearUnusedPairs() {
265 int pairLengths = 0;
266 int pairsToKeep = 0;
267 for (final Iterator<Pair> it = pairs.descendingIterator(); it.hasNext();) {
268 final Pair p = it.next();
269 pairsToKeep++;
270 pairLengths += p.length();
271 if (pairLengths >= BlockLZ4CompressorInputStream.WINDOW_SIZE) {
272 break;
273 }
274 }
275 final int size = pairs.size();
276 for (int i = pairsToKeep; i < size; i++) {
277 final Pair p = pairs.peekFirst();
278 if (!p.hasBeenWritten()) {
279 break;
280 }
281 pairs.removeFirst();
282 }
283 }
284
285 @Override
286 public void close() throws IOException {
287 try {
288 finish();
289 } finally {
290 super.close();
291 }
292 }
293
294 private byte[] expand(final int offset, final int length) {
295 final byte[] expanded = new byte[length];
296 if (offset == 1) {
297 final byte[] block = expandedBlocks.peekFirst();
298 final byte b = block[block.length - 1];
299 if (b != 0) {
300 Arrays.fill(expanded, b);
301 }
302 } else {
303 expandFromList(expanded, offset, length);
304 }
305 return expanded;
306 }
307
308 private void expandFromList(final byte[] expanded, final int offset, final int length) {
309 int offsetRemaining = offset;
310 int lengthRemaining = length;
311 int writeOffset = 0;
312 while (lengthRemaining > 0) {
313
314 byte[] block = null;
315 final int copyLen;
316 final int copyOffset;
317 if (offsetRemaining > 0) {
318 int blockOffset = 0;
319 for (final byte[] b : expandedBlocks) {
320 if (b.length + blockOffset >= offsetRemaining) {
321 block = b;
322 break;
323 }
324 blockOffset += b.length;
325 }
326 if (block == null) {
327
328 throw new IllegalStateException("Failed to find a block containing offset " + offset);
329 }
330 copyOffset = blockOffset + block.length - offsetRemaining;
331 copyLen = Math.min(lengthRemaining, block.length - copyOffset);
332 } else {
333
334 block = expanded;
335 copyOffset = -offsetRemaining;
336 copyLen = Math.min(lengthRemaining, writeOffset + offsetRemaining);
337 }
338 System.arraycopy(block, copyOffset, expanded, writeOffset, copyLen);
339 offsetRemaining -= copyLen;
340 lengthRemaining -= copyLen;
341 writeOffset += copyLen;
342 }
343 }
344
345
346
347
348
349
350 @Override
351 public void finish() throws IOException {
352 if (!isFinished()) {
353 compressor.finish();
354 super.finish();
355 }
356 }
357
358
359
360
361
362
363
364
365
366
367 public void prefill(final byte[] data, final int off, final int len) {
368 if (len > 0) {
369 final byte[] b = Arrays.copyOfRange(data, off, off + len);
370 compressor.prefill(b);
371 recordLiteral(b);
372 }
373 }
374
375 private void recordBackReference(final LZ77Compressor.BackReference block) {
376 expandedBlocks.addFirst(expand(block.getOffset(), block.getLength()));
377 }
378
379 private void recordLiteral(final byte[] b) {
380 expandedBlocks.addFirst(b);
381 }
382
383 private void rewriteLastPairs() {
384 final LinkedList<Pair> lastPairs = new LinkedList<>();
385 final LinkedList<Integer> pairLength = new LinkedList<>();
386 int offset = 0;
387 for (final Iterator<Pair> it = pairs.descendingIterator(); it.hasNext();) {
388 final Pair p = it.next();
389 if (p.hasBeenWritten()) {
390 break;
391 }
392 final int len = p.length();
393 pairLength.addFirst(len);
394 lastPairs.addFirst(p);
395 offset += len;
396 if (offset >= MIN_OFFSET_OF_LAST_BACK_REFERENCE) {
397 break;
398 }
399 }
400 lastPairs.forEach(pairs::remove);
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425 final int lastPairsSize = lastPairs.size();
426 int toExpand = 0;
427 for (int i = 1; i < lastPairsSize; i++) {
428 toExpand += pairLength.get(i);
429 }
430 final Pair replacement = new Pair();
431 if (toExpand > 0) {
432 replacement.prependLiteral(expand(toExpand, toExpand));
433 }
434 final Pair splitCandidate = lastPairs.get(0);
435 final int stillNeeded = MIN_OFFSET_OF_LAST_BACK_REFERENCE - toExpand;
436 final int brLen = splitCandidate.hasBackReference() ? splitCandidate.backReferenceLength() : 0;
437 if (splitCandidate.hasBackReference() && brLen >= MIN_BACK_REFERENCE_LENGTH + stillNeeded) {
438 replacement.prependLiteral(expand(toExpand + stillNeeded, stillNeeded));
439 pairs.add(splitCandidate.splitWithNewBackReferenceLengthOf(brLen - stillNeeded));
440 } else {
441 if (splitCandidate.hasBackReference()) {
442 replacement.prependLiteral(expand(toExpand + brLen, brLen));
443 }
444 splitCandidate.prependTo(replacement);
445 }
446 pairs.add(replacement);
447 }
448
449 @Override
450 public void write(final byte[] data, final int off, final int len) throws IOException {
451 compressor.compress(data, off, len);
452 }
453
454 @Override
455 public void write(final int b) throws IOException {
456 oneByte[0] = (byte) (b & 0xff);
457 write(oneByte);
458 }
459
460 private Pair writeBlocksAndReturnUnfinishedPair(final int length) throws IOException {
461 writeWritablePairs(length);
462 Pair last = pairs.peekLast();
463 if (last == null || last.hasBackReference()) {
464 last = new Pair();
465 pairs.addLast(last);
466 }
467 return last;
468 }
469
470 private void writeFinalLiteralBlock() throws IOException {
471 rewriteLastPairs();
472 for (final Pair p : pairs) {
473 if (!p.hasBeenWritten()) {
474 p.writeTo(out);
475 }
476 }
477 pairs.clear();
478 }
479
480 private void writeWritablePairs(final int lengthOfBlocksAfterLastPair) throws IOException {
481 int unwrittenLength = lengthOfBlocksAfterLastPair;
482 for (final Iterator<Pair> it = pairs.descendingIterator(); it.hasNext();) {
483 final Pair p = it.next();
484 if (p.hasBeenWritten()) {
485 break;
486 }
487 unwrittenLength += p.length();
488 }
489 for (final Pair p : pairs) {
490 if (p.hasBeenWritten()) {
491 continue;
492 }
493 unwrittenLength -= p.length();
494 if (!p.canBeWritten(unwrittenLength)) {
495 break;
496 }
497 p.writeTo(out);
498 }
499 }
500 }