1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.commons.compress.compressors.lz77support;
20
21 import java.io.IOException;
22 import java.util.Objects;
23
24 import org.apache.commons.lang3.ArrayFill;
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79 public class LZ77Compressor {
80
81
82
83
84 public abstract static class AbstractReference extends Block {
85
86 private final int offset;
87 private final int length;
88
89
90
91
92
93
94
95
96 public AbstractReference(final BlockType blockType, final int offset, final int length) {
97 super(blockType);
98 this.offset = offset;
99 this.length = length;
100 }
101
102
103
104
105
106
107 public int getLength() {
108 return length;
109 }
110
111
112
113
114
115
116 public int getOffset() {
117 return offset;
118 }
119
120 @Override
121 public String toString() {
122 return super.toString() + " with offset " + offset + " and length " + length;
123 }
124 }
125
126
127
128
129 public static final class BackReference extends AbstractReference {
130
131
132
133
134
135
136
137 public BackReference(final int offset, final int length) {
138 super(BlockType.BACK_REFERENCE, offset, length);
139 }
140
141 }
142
143
144
145
146
147
148
149
150
151 public abstract static class Block {
152
153
154
155
156 public enum BlockType {
157
158
159
160
161 LITERAL,
162
163
164
165
166 BACK_REFERENCE,
167
168
169
170
171 EOD
172 }
173
174 private final BlockType type;
175
176
177
178
179
180
181 @Deprecated
182 public Block() {
183 this.type = null;
184 }
185
186
187
188
189
190
191 protected Block(final BlockType type) {
192 this.type = Objects.requireNonNull(type);
193 }
194
195
196
197
198
199
200 public BlockType getType() {
201 return type;
202 }
203
204 @Override
205 public String toString() {
206 return getClass().getSimpleName() + " " + getType();
207 }
208 }
209
210
211
212
213
214
215
216
217
218 public interface Callback {
219
220
221
222
223
224
225
226 void accept(Block b) throws IOException;
227 }
228
229
230 public static final class EOD extends Block {
231
232
233
234
235 private static final EOD INSTANCE = new EOD();
236
237
238
239
240 public EOD() {
241 super(BlockType.EOD);
242 }
243
244 }
245
246
247
248
249
250
251
252
253
254 public static final class LiteralBlock extends AbstractReference {
255
256 private final byte[] data;
257
258
259
260
261
262
263
264
265 public LiteralBlock(final byte[] data, final int offset, final int length) {
266 super(BlockType.LITERAL, offset, length);
267 this.data = data;
268 }
269
270
271
272
273
274
275
276
277
278
279 public byte[] getData() {
280 return data;
281 }
282
283 }
284
285 static final int NUMBER_OF_BYTES_IN_HASH = 3;
286 private static final int NO_MATCH = -1;
287
288
289 private static final int HASH_SIZE = 1 << 15;
290 private static final int HASH_MASK = HASH_SIZE - 1;
291
292 private static final int H_SHIFT = 5;
293 private final Parameters params;
294 private final Callback callback;
295
296
297 private final byte[] window;
298
299
300
301
302 private final int[] head;
303
304
305
306
307 private final int[] prev;
308
309 private final int wMask;
310 private boolean initialized;
311
312 private int currentPosition;
313
314
315 private int lookahead;
316
317 private int insertHash;
318
319
320
321 private int blockStart;
322
323
324 private int matchStart = NO_MATCH;
325
326
327
328
329 private int missedInserts;
330
331
332
333
334
335
336
337
338 public LZ77Compressor(final Parameters params, final Callback callback) {
339 Objects.requireNonNull(params, "params");
340 Objects.requireNonNull(callback, "callback");
341
342 this.params = params;
343 this.callback = callback;
344
345 final int wSize = params.getWindowSize();
346 window = new byte[wSize * 2];
347 wMask = wSize - 1;
348 head = ArrayFill.fill(new int[HASH_SIZE], NO_MATCH);
349 prev = new int[wSize];
350 }
351
352 private void catchUpMissedInserts() {
353 while (missedInserts > 0) {
354 insertString(currentPosition - missedInserts--);
355 }
356 }
357
358 private void compress() throws IOException {
359 final int minMatch = params.getMinBackReferenceLength();
360 final boolean lazy = params.getLazyMatching();
361 final int lazyThreshold = params.getLazyMatchingThreshold();
362
363 while (lookahead >= minMatch) {
364 catchUpMissedInserts();
365 int matchLength = 0;
366 final int hashHead = insertString(currentPosition);
367 if (hashHead != NO_MATCH && hashHead - currentPosition <= params.getMaxOffset()) {
368
369 matchLength = longestMatch(hashHead);
370
371 if (lazy && matchLength <= lazyThreshold && lookahead > minMatch) {
372
373 matchLength = longestMatchForNextPosition(matchLength);
374 }
375 }
376 if (matchLength >= minMatch) {
377 if (blockStart != currentPosition) {
378
379 flushLiteralBlock();
380 blockStart = NO_MATCH;
381 }
382 flushBackReference(matchLength);
383 insertStringsInMatch(matchLength);
384 lookahead -= matchLength;
385 currentPosition += matchLength;
386 blockStart = currentPosition;
387 } else {
388
389 lookahead--;
390 currentPosition++;
391 if (currentPosition - blockStart >= params.getMaxLiteralLength()) {
392 flushLiteralBlock();
393 blockStart = currentPosition;
394 }
395 }
396 }
397 }
398
399
400
401
402
403
404
405 public void compress(final byte[] data) throws IOException {
406 compress(data, 0, data.length);
407 }
408
409
410
411
412
413
414
415
416
417 public void compress(final byte[] data, int off, int len) throws IOException {
418 final int wSize = params.getWindowSize();
419 while (len > wSize) {
420 doCompress(data, off, wSize);
421 off += wSize;
422 len -= wSize;
423 }
424 if (len > 0) {
425 doCompress(data, off, len);
426 }
427 }
428
429
430 private void doCompress(final byte[] data, final int off, final int len) throws IOException {
431 final int spaceLeft = window.length - currentPosition - lookahead;
432 if (len > spaceLeft) {
433 slide();
434 }
435 System.arraycopy(data, off, window, currentPosition + lookahead, len);
436 lookahead += len;
437 if (!initialized && lookahead >= params.getMinBackReferenceLength()) {
438 initialize();
439 }
440 if (initialized) {
441 compress();
442 }
443 }
444
445
446
447
448
449
450
451
452
453
454 public void finish() throws IOException {
455 if (blockStart != currentPosition || lookahead > 0) {
456 currentPosition += lookahead;
457 flushLiteralBlock();
458 }
459 callback.accept(EOD.INSTANCE);
460 }
461
462 private void flushBackReference(final int matchLength) throws IOException {
463 callback.accept(new BackReference(currentPosition - matchStart, matchLength));
464 }
465
466 private void flushLiteralBlock() throws IOException {
467 callback.accept(new LiteralBlock(window, blockStart, currentPosition - blockStart));
468 }
469
470 private void initialize() {
471 for (int i = 0; i < NUMBER_OF_BYTES_IN_HASH - 1; i++) {
472 insertHash = nextHash(insertHash, window[i]);
473 }
474 initialized = true;
475 }
476
477
478
479
480
481
482
483
484 private int insertString(final int pos) {
485 insertHash = nextHash(insertHash, window[pos - 1 + NUMBER_OF_BYTES_IN_HASH]);
486 final int hashHead = head[insertHash];
487 prev[pos & wMask] = hashHead;
488 head[insertHash] = pos;
489 return hashHead;
490 }
491
492 private void insertStringsInMatch(final int matchLength) {
493
494
495 final int stop = Math.min(matchLength - 1, lookahead - NUMBER_OF_BYTES_IN_HASH);
496
497 for (int i = 1; i <= stop; i++) {
498 insertString(currentPosition + i);
499 }
500 missedInserts = matchLength - stop - 1;
501 }
502
503
504
505
506
507
508
509
510 private int longestMatch(int matchHead) {
511 final int minLength = params.getMinBackReferenceLength();
512 int longestMatchLength = minLength - 1;
513 final int maxPossibleLength = Math.min(params.getMaxBackReferenceLength(), lookahead);
514 final int minIndex = Math.max(0, currentPosition - params.getMaxOffset());
515 final int niceBackReferenceLength = Math.min(maxPossibleLength, params.getNiceBackReferenceLength());
516 final int maxCandidates = params.getMaxCandidates();
517 for (int candidates = 0; candidates < maxCandidates && matchHead >= minIndex; candidates++) {
518 int currentLength = 0;
519 for (int i = 0; i < maxPossibleLength; i++) {
520 if (window[matchHead + i] != window[currentPosition + i]) {
521 break;
522 }
523 currentLength++;
524 }
525 if (currentLength > longestMatchLength) {
526 longestMatchLength = currentLength;
527 matchStart = matchHead;
528 if (currentLength >= niceBackReferenceLength) {
529
530 break;
531 }
532 }
533 matchHead = prev[matchHead & wMask];
534 }
535 return longestMatchLength;
536 }
537
538 private int longestMatchForNextPosition(final int prevMatchLength) {
539
540 final int prevMatchStart = matchStart;
541 final int prevInsertHash = insertHash;
542
543 lookahead--;
544 currentPosition++;
545 final int hashHead = insertString(currentPosition);
546 final int prevHashHead = prev[currentPosition & wMask];
547 int matchLength = longestMatch(hashHead);
548
549 if (matchLength <= prevMatchLength) {
550
551 matchLength = prevMatchLength;
552 matchStart = prevMatchStart;
553
554
555 head[insertHash] = prevHashHead;
556 insertHash = prevInsertHash;
557 currentPosition--;
558 lookahead++;
559 }
560 return matchLength;
561 }
562
563
564
565
566
567
568
569
570
571 private int nextHash(final int oldHash, final byte nextByte) {
572 final int nextVal = nextByte & 0xFF;
573 return (oldHash << H_SHIFT ^ nextVal) & HASH_MASK;
574 }
575
576
577
578
579
580
581
582
583
584
585
586
587 public void prefill(final byte[] data) {
588 if (currentPosition != 0 || lookahead != 0) {
589 throw new IllegalStateException("The compressor has already started to accept data, can't prefill anymore");
590 }
591
592
593 final int len = Math.min(params.getWindowSize(), data.length);
594 System.arraycopy(data, data.length - len, window, 0, len);
595
596 if (len >= NUMBER_OF_BYTES_IN_HASH) {
597 initialize();
598 final int stop = len - NUMBER_OF_BYTES_IN_HASH + 1;
599 for (int i = 0; i < stop; i++) {
600 insertString(i);
601 }
602 missedInserts = NUMBER_OF_BYTES_IN_HASH - 1;
603 } else {
604 missedInserts = len;
605 }
606 blockStart = currentPosition = len;
607 }
608
609 private void slide() throws IOException {
610 final int wSize = params.getWindowSize();
611 if (blockStart != currentPosition && blockStart < wSize) {
612 flushLiteralBlock();
613 blockStart = currentPosition;
614 }
615 System.arraycopy(window, wSize, window, 0, wSize);
616 currentPosition -= wSize;
617 matchStart -= wSize;
618 blockStart -= wSize;
619 for (int i = 0; i < HASH_SIZE; i++) {
620 final int h = head[i];
621 head[i] = h >= wSize ? h - wSize : NO_MATCH;
622 }
623 for (int i = 0; i < wSize; i++) {
624 final int p = prev[i];
625 prev[i] = p >= wSize ? p - wSize : NO_MATCH;
626 }
627 }
628 }