1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.commons.compress.compressors.lz77support;
20
21 import java.io.IOException;
22 import java.util.Arrays;
23 import java.util.Objects;
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78 public class LZ77Compressor {
79
80
81
82
83 public static final class BackReference extends Block {
84 private final int offset, length;
85
86 public BackReference(final int offset, final int length) {
87 this.offset = offset;
88 this.length = length;
89 }
90
91
92
93
94
95
96 public int getLength() {
97 return length;
98 }
99
100
101
102
103
104
105 public int getOffset() {
106 return offset;
107 }
108
109 @Override
110 public BlockType getType() {
111 return BlockType.BACK_REFERENCE;
112 }
113
114 @Override
115 public String toString() {
116 return "BackReference with offset " + offset + " and length " + length;
117 }
118 }
119
120
121
122
123
124
125
126
127
128 public abstract static class Block {
129
130 public enum BlockType {
131 LITERAL, BACK_REFERENCE, EOD
132 }
133
134 public abstract BlockType getType();
135 }
136
137
138
139
140
141
142
143
144
145 public interface Callback {
146
147
148
149
150
151
152 void accept(Block b) throws IOException;
153 }
154
155
156 public static final class EOD extends Block {
157 @Override
158 public BlockType getType() {
159 return BlockType.EOD;
160 }
161 }
162
163
164
165
166
167
168
169
170
171 public static final class LiteralBlock extends Block {
172 private final byte[] data;
173 private final int offset, length;
174
175 public LiteralBlock(final byte[] data, final int offset, final int length) {
176 this.data = data;
177 this.offset = offset;
178 this.length = length;
179 }
180
181
182
183
184
185
186
187
188
189
190 public byte[] getData() {
191 return data;
192 }
193
194
195
196
197
198
199 public int getLength() {
200 return length;
201 }
202
203
204
205
206
207
208 public int getOffset() {
209 return offset;
210 }
211
212 @Override
213 public BlockType getType() {
214 return BlockType.LITERAL;
215 }
216
217 @Override
218 public String toString() {
219 return "LiteralBlock starting at " + offset + " with length " + length;
220 }
221 }
222
223 private static final Block THE_EOD = new EOD();
224
225 static final int NUMBER_OF_BYTES_IN_HASH = 3;
226 private static final int NO_MATCH = -1;
227
228
229 private static final int HASH_SIZE = 1 << 15;
230 private static final int HASH_MASK = HASH_SIZE - 1;
231
232 private static final int H_SHIFT = 5;
233 private final Parameters params;
234 private final Callback callback;
235
236
237 private final byte[] window;
238
239
240
241
242 private final int[] head;
243
244
245
246
247 private final int[] prev;
248
249 private final int wMask;
250 private boolean initialized;
251
252 private int currentPosition;
253
254
255 private int lookahead;
256
257 private int insertHash;
258
259
260
261 private int blockStart;
262
263
264 private int matchStart = NO_MATCH;
265
266
267
268
269 private int missedInserts;
270
271
272
273
274
275
276
277
278 public LZ77Compressor(final Parameters params, final Callback callback) {
279 Objects.requireNonNull(params, "params");
280 Objects.requireNonNull(callback, "callback");
281
282 this.params = params;
283 this.callback = callback;
284
285 final int wSize = params.getWindowSize();
286 window = new byte[wSize * 2];
287 wMask = wSize - 1;
288 head = new int[HASH_SIZE];
289 Arrays.fill(head, NO_MATCH);
290 prev = new int[wSize];
291 }
292
293 private void catchUpMissedInserts() {
294 while (missedInserts > 0) {
295 insertString(currentPosition - missedInserts--);
296 }
297 }
298
299 private void compress() throws IOException {
300 final int minMatch = params.getMinBackReferenceLength();
301 final boolean lazy = params.getLazyMatching();
302 final int lazyThreshold = params.getLazyMatchingThreshold();
303
304 while (lookahead >= minMatch) {
305 catchUpMissedInserts();
306 int matchLength = 0;
307 final int hashHead = insertString(currentPosition);
308 if (hashHead != NO_MATCH && hashHead - currentPosition <= params.getMaxOffset()) {
309
310 matchLength = longestMatch(hashHead);
311
312 if (lazy && matchLength <= lazyThreshold && lookahead > minMatch) {
313
314 matchLength = longestMatchForNextPosition(matchLength);
315 }
316 }
317 if (matchLength >= minMatch) {
318 if (blockStart != currentPosition) {
319
320 flushLiteralBlock();
321 blockStart = NO_MATCH;
322 }
323 flushBackReference(matchLength);
324 insertStringsInMatch(matchLength);
325 lookahead -= matchLength;
326 currentPosition += matchLength;
327 blockStart = currentPosition;
328 } else {
329
330 lookahead--;
331 currentPosition++;
332 if (currentPosition - blockStart >= params.getMaxLiteralLength()) {
333 flushLiteralBlock();
334 blockStart = currentPosition;
335 }
336 }
337 }
338 }
339
340
341
342
343
344
345
346 public void compress(final byte[] data) throws IOException {
347 compress(data, 0, data.length);
348 }
349
350
351
352
353
354
355
356
357
358 public void compress(final byte[] data, int off, int len) throws IOException {
359 final int wSize = params.getWindowSize();
360 while (len > wSize) {
361 doCompress(data, off, wSize);
362 off += wSize;
363 len -= wSize;
364 }
365 if (len > 0) {
366 doCompress(data, off, len);
367 }
368 }
369
370
371 private void doCompress(final byte[] data, final int off, final int len) throws IOException {
372 final int spaceLeft = window.length - currentPosition - lookahead;
373 if (len > spaceLeft) {
374 slide();
375 }
376 System.arraycopy(data, off, window, currentPosition + lookahead, len);
377 lookahead += len;
378 if (!initialized && lookahead >= params.getMinBackReferenceLength()) {
379 initialize();
380 }
381 if (initialized) {
382 compress();
383 }
384 }
385
386
387
388
389
390
391
392
393
394
395 public void finish() throws IOException {
396 if (blockStart != currentPosition || lookahead > 0) {
397 currentPosition += lookahead;
398 flushLiteralBlock();
399 }
400 callback.accept(THE_EOD);
401 }
402
403 private void flushBackReference(final int matchLength) throws IOException {
404 callback.accept(new BackReference(currentPosition - matchStart, matchLength));
405 }
406
407 private void flushLiteralBlock() throws IOException {
408 callback.accept(new LiteralBlock(window, blockStart, currentPosition - blockStart));
409 }
410
411 private void initialize() {
412 for (int i = 0; i < NUMBER_OF_BYTES_IN_HASH - 1; i++) {
413 insertHash = nextHash(insertHash, window[i]);
414 }
415 initialized = true;
416 }
417
418
419
420
421
422
423
424
425 private int insertString(final int pos) {
426 insertHash = nextHash(insertHash, window[pos - 1 + NUMBER_OF_BYTES_IN_HASH]);
427 final int hashHead = head[insertHash];
428 prev[pos & wMask] = hashHead;
429 head[insertHash] = pos;
430 return hashHead;
431 }
432
433 private void insertStringsInMatch(final int matchLength) {
434
435
436 final int stop = Math.min(matchLength - 1, lookahead - NUMBER_OF_BYTES_IN_HASH);
437
438 for (int i = 1; i <= stop; i++) {
439 insertString(currentPosition + i);
440 }
441 missedInserts = matchLength - stop - 1;
442 }
443
444
445
446
447
448
449
450
451 private int longestMatch(int matchHead) {
452 final int minLength = params.getMinBackReferenceLength();
453 int longestMatchLength = minLength - 1;
454 final int maxPossibleLength = Math.min(params.getMaxBackReferenceLength(), lookahead);
455 final int minIndex = Math.max(0, currentPosition - params.getMaxOffset());
456 final int niceBackReferenceLength = Math.min(maxPossibleLength, params.getNiceBackReferenceLength());
457 final int maxCandidates = params.getMaxCandidates();
458 for (int candidates = 0; candidates < maxCandidates && matchHead >= minIndex; candidates++) {
459 int currentLength = 0;
460 for (int i = 0; i < maxPossibleLength; i++) {
461 if (window[matchHead + i] != window[currentPosition + i]) {
462 break;
463 }
464 currentLength++;
465 }
466 if (currentLength > longestMatchLength) {
467 longestMatchLength = currentLength;
468 matchStart = matchHead;
469 if (currentLength >= niceBackReferenceLength) {
470
471 break;
472 }
473 }
474 matchHead = prev[matchHead & wMask];
475 }
476 return longestMatchLength;
477 }
478
479 private int longestMatchForNextPosition(final int prevMatchLength) {
480
481 final int prevMatchStart = matchStart;
482 final int prevInsertHash = insertHash;
483
484 lookahead--;
485 currentPosition++;
486 final int hashHead = insertString(currentPosition);
487 final int prevHashHead = prev[currentPosition & wMask];
488 int matchLength = longestMatch(hashHead);
489
490 if (matchLength <= prevMatchLength) {
491
492 matchLength = prevMatchLength;
493 matchStart = prevMatchStart;
494
495
496 head[insertHash] = prevHashHead;
497 insertHash = prevInsertHash;
498 currentPosition--;
499 lookahead++;
500 }
501 return matchLength;
502 }
503
504
505
506
507
508
509
510
511
512 private int nextHash(final int oldHash, final byte nextByte) {
513 final int nextVal = nextByte & 0xFF;
514 return (oldHash << H_SHIFT ^ nextVal) & HASH_MASK;
515 }
516
517
518
519
520
521
522
523
524
525
526
527
528 public void prefill(final byte[] data) {
529 if (currentPosition != 0 || lookahead != 0) {
530 throw new IllegalStateException("The compressor has already started to accept data, can't prefill anymore");
531 }
532
533
534 final int len = Math.min(params.getWindowSize(), data.length);
535 System.arraycopy(data, data.length - len, window, 0, len);
536
537 if (len >= NUMBER_OF_BYTES_IN_HASH) {
538 initialize();
539 final int stop = len - NUMBER_OF_BYTES_IN_HASH + 1;
540 for (int i = 0; i < stop; i++) {
541 insertString(i);
542 }
543 missedInserts = NUMBER_OF_BYTES_IN_HASH - 1;
544 } else {
545 missedInserts = len;
546 }
547 blockStart = currentPosition = len;
548 }
549
550 private void slide() throws IOException {
551 final int wSize = params.getWindowSize();
552 if (blockStart != currentPosition && blockStart < wSize) {
553 flushLiteralBlock();
554 blockStart = currentPosition;
555 }
556 System.arraycopy(window, wSize, window, 0, wSize);
557 currentPosition -= wSize;
558 matchStart -= wSize;
559 blockStart -= wSize;
560 for (int i = 0; i < HASH_SIZE; i++) {
561 final int h = head[i];
562 head[i] = h >= wSize ? h - wSize : NO_MATCH;
563 }
564 for (int i = 0; i < wSize; i++) {
565 final int p = prev[i];
566 prev[i] = p >= wSize ? p - wSize : NO_MATCH;
567 }
568 }
569 }