1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.commons.compress.compressors.lz4;
20
21 import java.io.IOException;
22 import java.io.OutputStream;
23 import java.util.Arrays;
24 import java.util.Deque;
25 import java.util.Iterator;
26 import java.util.LinkedList;
27
28 import org.apache.commons.compress.compressors.CompressorOutputStream;
29 import org.apache.commons.compress.compressors.lz77support.LZ77Compressor;
30 import org.apache.commons.compress.compressors.lz77support.Parameters;
31 import org.apache.commons.compress.utils.ByteUtils;
32
33
34
35
36
37
38
39
40 public class BlockLZ4CompressorOutputStream extends CompressorOutputStream {
41
42 static final class Pair {
43
44 private static int lengths(final int litLength, final int brLength) {
45 final int l = Math.min(litLength, 15);
46 final int br = brLength < 4 ? 0 : brLength < 19 ? brLength - 4 : 15;
47 return l << BlockLZ4CompressorInputStream.SIZE_BITS | br;
48 }
49
50 private static void writeLength(int length, final OutputStream out) throws IOException {
51 while (length >= 255) {
52 out.write(255);
53 length -= 255;
54 }
55 out.write(length);
56 }
57
58 private final Deque<byte[]> literals = new LinkedList<>();
59
60 private int literalLength;
61
62 private int brOffset, brLength;
63
64 private boolean written;
65
66 byte[] addLiteral(final LZ77Compressor.LiteralBlock block) {
67 final byte[] copy = Arrays.copyOfRange(block.getData(), block.getOffset(), block.getOffset() + block.getLength());
68 literals.add(copy);
69 literalLength += copy.length;
70 return copy;
71 }
72
73 private int backReferenceLength() {
74 return brLength;
75 }
76
77 boolean canBeWritten(final int lengthOfBlocksAfterThisPair) {
78 return hasBackReference() && lengthOfBlocksAfterThisPair >= MIN_OFFSET_OF_LAST_BACK_REFERENCE + MIN_BACK_REFERENCE_LENGTH;
79 }
80
81 boolean hasBackReference() {
82 return brOffset > 0;
83 }
84
85 private boolean hasBeenWritten() {
86 return written;
87 }
88
89 int length() {
90 return literalLength() + brLength;
91 }
92
93 private int literalLength() {
94
95 if (literalLength != 0) {
96 return literalLength;
97 }
98 int length = 0;
99 for (final byte[] b : literals) {
100 length += b.length;
101 }
102 return literalLength = length;
103 }
104
105 private void prependLiteral(final byte[] data) {
106 literals.addFirst(data);
107 literalLength += data.length;
108 }
109
110 private void prependTo(final Pair other) {
111 final Iterator<byte[]> listBackwards = literals.descendingIterator();
112 while (listBackwards.hasNext()) {
113 other.prependLiteral(listBackwards.next());
114 }
115 }
116
117 void setBackReference(final LZ77Compressor.BackReference block) {
118 if (hasBackReference()) {
119 throw new IllegalStateException();
120 }
121 brOffset = block.getOffset();
122 brLength = block.getLength();
123 }
124
125 private Pair splitWithNewBackReferenceLengthOf(final int newBackReferenceLength) {
126 final Pair p = new Pair();
127 p.literals.addAll(literals);
128 p.brOffset = brOffset;
129 p.brLength = newBackReferenceLength;
130 return p;
131 }
132
133 void writeTo(final OutputStream out) throws IOException {
134 final int litLength = literalLength();
135 out.write(lengths(litLength, brLength));
136 if (litLength >= BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK) {
137 writeLength(litLength - BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK, out);
138 }
139 for (final byte[] b : literals) {
140 out.write(b);
141 }
142 if (hasBackReference()) {
143 ByteUtils.toLittleEndian(out, brOffset, 2);
144 if (brLength - MIN_BACK_REFERENCE_LENGTH >= BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK) {
145 writeLength(brLength - MIN_BACK_REFERENCE_LENGTH - BlockLZ4CompressorInputStream.BACK_REFERENCE_SIZE_MASK, out);
146 }
147 }
148 written = true;
149 }
150 }
151
152 private static final int MIN_BACK_REFERENCE_LENGTH = 4;
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173 private static final int MIN_OFFSET_OF_LAST_BACK_REFERENCE = 12;
174
175
176
177
178
179
180 public static Parameters.Builder createParameterBuilder() {
181 final int maxLen = BlockLZ4CompressorInputStream.WINDOW_SIZE - 1;
182 return Parameters.builder(BlockLZ4CompressorInputStream.WINDOW_SIZE).withMinBackReferenceLength(MIN_BACK_REFERENCE_LENGTH)
183 .withMaxBackReferenceLength(maxLen).withMaxOffset(maxLen).withMaxLiteralLength(maxLen);
184 }
185
186 private final LZ77Compressor compressor;
187
188 private final OutputStream os;
189
190
191 private final byte[] oneByte = new byte[1];
192 private boolean finished;
193
194 private final Deque<Pair> pairs = new LinkedList<>();
195
196
197
198 private final Deque<byte[]> expandedBlocks = new LinkedList<>();
199
200
201
202
203
204
205 public BlockLZ4CompressorOutputStream(final OutputStream os) {
206 this(os, createParameterBuilder().build());
207 }
208
209
210
211
212
213
214
215 public BlockLZ4CompressorOutputStream(final OutputStream os, final Parameters params) {
216 this.os = os;
217 compressor = new LZ77Compressor(params, block -> {
218 switch (block.getType()) {
219 case LITERAL:
220 addLiteralBlock((LZ77Compressor.LiteralBlock) block);
221 break;
222 case BACK_REFERENCE:
223 addBackReference((LZ77Compressor.BackReference) block);
224 break;
225 case EOD:
226 writeFinalLiteralBlock();
227 break;
228 }
229 });
230 }
231
232 private void addBackReference(final LZ77Compressor.BackReference block) throws IOException {
233 final Pair last = writeBlocksAndReturnUnfinishedPair(block.getLength());
234 last.setBackReference(block);
235 recordBackReference(block);
236 clearUnusedBlocksAndPairs();
237 }
238
239 private void addLiteralBlock(final LZ77Compressor.LiteralBlock block) throws IOException {
240 final Pair last = writeBlocksAndReturnUnfinishedPair(block.getLength());
241 recordLiteral(last.addLiteral(block));
242 clearUnusedBlocksAndPairs();
243 }
244
245 private void clearUnusedBlocks() {
246 int blockLengths = 0;
247 int blocksToKeep = 0;
248 for (final byte[] b : expandedBlocks) {
249 blocksToKeep++;
250 blockLengths += b.length;
251 if (blockLengths >= BlockLZ4CompressorInputStream.WINDOW_SIZE) {
252 break;
253 }
254 }
255 final int size = expandedBlocks.size();
256 for (int i = blocksToKeep; i < size; i++) {
257 expandedBlocks.removeLast();
258 }
259 }
260
261 private void clearUnusedBlocksAndPairs() {
262 clearUnusedBlocks();
263 clearUnusedPairs();
264 }
265
266 private void clearUnusedPairs() {
267 int pairLengths = 0;
268 int pairsToKeep = 0;
269 for (final Iterator<Pair> it = pairs.descendingIterator(); it.hasNext();) {
270 final Pair p = it.next();
271 pairsToKeep++;
272 pairLengths += p.length();
273 if (pairLengths >= BlockLZ4CompressorInputStream.WINDOW_SIZE) {
274 break;
275 }
276 }
277 final int size = pairs.size();
278 for (int i = pairsToKeep; i < size; i++) {
279 final Pair p = pairs.peekFirst();
280 if (!p.hasBeenWritten()) {
281 break;
282 }
283 pairs.removeFirst();
284 }
285 }
286
287 @Override
288 public void close() throws IOException {
289 try {
290 finish();
291 } finally {
292 os.close();
293 }
294 }
295
296 private byte[] expand(final int offset, final int length) {
297 final byte[] expanded = new byte[length];
298 if (offset == 1) {
299 final byte[] block = expandedBlocks.peekFirst();
300 final byte b = block[block.length - 1];
301 if (b != 0) {
302 Arrays.fill(expanded, b);
303 }
304 } else {
305 expandFromList(expanded, offset, length);
306 }
307 return expanded;
308 }
309
310 private void expandFromList(final byte[] expanded, final int offset, final int length) {
311 int offsetRemaining = offset;
312 int lengthRemaining = length;
313 int writeOffset = 0;
314 while (lengthRemaining > 0) {
315
316 byte[] block = null;
317 final int copyLen;
318 final int copyOffset;
319 if (offsetRemaining > 0) {
320 int blockOffset = 0;
321 for (final byte[] b : expandedBlocks) {
322 if (b.length + blockOffset >= offsetRemaining) {
323 block = b;
324 break;
325 }
326 blockOffset += b.length;
327 }
328 if (block == null) {
329
330 throw new IllegalStateException("Failed to find a block containing offset " + offset);
331 }
332 copyOffset = blockOffset + block.length - offsetRemaining;
333 copyLen = Math.min(lengthRemaining, block.length - copyOffset);
334 } else {
335
336 block = expanded;
337 copyOffset = -offsetRemaining;
338 copyLen = Math.min(lengthRemaining, writeOffset + offsetRemaining);
339 }
340 System.arraycopy(block, copyOffset, expanded, writeOffset, copyLen);
341 offsetRemaining -= copyLen;
342 lengthRemaining -= copyLen;
343 writeOffset += copyLen;
344 }
345 }
346
347
348
349
350
351
352 public void finish() throws IOException {
353 if (!finished) {
354 compressor.finish();
355 finished = true;
356 }
357 }
358
359
360
361
362
363
364
365
366
367
368 public void prefill(final byte[] data, final int off, final int len) {
369 if (len > 0) {
370 final byte[] b = Arrays.copyOfRange(data, off, off + len);
371 compressor.prefill(b);
372 recordLiteral(b);
373 }
374 }
375
376 private void recordBackReference(final LZ77Compressor.BackReference block) {
377 expandedBlocks.addFirst(expand(block.getOffset(), block.getLength()));
378 }
379
380 private void recordLiteral(final byte[] b) {
381 expandedBlocks.addFirst(b);
382 }
383
384 private void rewriteLastPairs() {
385 final LinkedList<Pair> lastPairs = new LinkedList<>();
386 final LinkedList<Integer> pairLength = new LinkedList<>();
387 int offset = 0;
388 for (final Iterator<Pair> it = pairs.descendingIterator(); it.hasNext();) {
389 final Pair p = it.next();
390 if (p.hasBeenWritten()) {
391 break;
392 }
393 final int len = p.length();
394 pairLength.addFirst(len);
395 lastPairs.addFirst(p);
396 offset += len;
397 if (offset >= MIN_OFFSET_OF_LAST_BACK_REFERENCE) {
398 break;
399 }
400 }
401 lastPairs.forEach(pairs::remove);
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426 final int lastPairsSize = lastPairs.size();
427 int toExpand = 0;
428 for (int i = 1; i < lastPairsSize; i++) {
429 toExpand += pairLength.get(i);
430 }
431 final Pair replacement = new Pair();
432 if (toExpand > 0) {
433 replacement.prependLiteral(expand(toExpand, toExpand));
434 }
435 final Pair splitCandidate = lastPairs.get(0);
436 final int stillNeeded = MIN_OFFSET_OF_LAST_BACK_REFERENCE - toExpand;
437 final int brLen = splitCandidate.hasBackReference() ? splitCandidate.backReferenceLength() : 0;
438 if (splitCandidate.hasBackReference() && brLen >= MIN_BACK_REFERENCE_LENGTH + stillNeeded) {
439 replacement.prependLiteral(expand(toExpand + stillNeeded, stillNeeded));
440 pairs.add(splitCandidate.splitWithNewBackReferenceLengthOf(brLen - stillNeeded));
441 } else {
442 if (splitCandidate.hasBackReference()) {
443 replacement.prependLiteral(expand(toExpand + brLen, brLen));
444 }
445 splitCandidate.prependTo(replacement);
446 }
447 pairs.add(replacement);
448 }
449
450 @Override
451 public void write(final byte[] data, final int off, final int len) throws IOException {
452 compressor.compress(data, off, len);
453 }
454
455 @Override
456 public void write(final int b) throws IOException {
457 oneByte[0] = (byte) (b & 0xff);
458 write(oneByte);
459 }
460
461 private Pair writeBlocksAndReturnUnfinishedPair(final int length) throws IOException {
462 writeWritablePairs(length);
463 Pair last = pairs.peekLast();
464 if (last == null || last.hasBackReference()) {
465 last = new Pair();
466 pairs.addLast(last);
467 }
468 return last;
469 }
470
471 private void writeFinalLiteralBlock() throws IOException {
472 rewriteLastPairs();
473 for (final Pair p : pairs) {
474 if (!p.hasBeenWritten()) {
475 p.writeTo(os);
476 }
477 }
478 pairs.clear();
479 }
480
481 private void writeWritablePairs(final int lengthOfBlocksAfterLastPair) throws IOException {
482 int unwrittenLength = lengthOfBlocksAfterLastPair;
483 for (final Iterator<Pair> it = pairs.descendingIterator(); it.hasNext();) {
484 final Pair p = it.next();
485 if (p.hasBeenWritten()) {
486 break;
487 }
488 unwrittenLength += p.length();
489 }
490 for (final Pair p : pairs) {
491 if (p.hasBeenWritten()) {
492 continue;
493 }
494 unwrittenLength -= p.length();
495 if (!p.canBeWritten(unwrittenLength)) {
496 break;
497 }
498 p.writeTo(os);
499 }
500 }
501 }