View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  package org.apache.commons.compress.compressors.deflate64;
18  
19  import static org.apache.commons.compress.compressors.deflate64.HuffmanState.DYNAMIC_CODES;
20  import static org.apache.commons.compress.compressors.deflate64.HuffmanState.FIXED_CODES;
21  import static org.apache.commons.compress.compressors.deflate64.HuffmanState.INITIAL;
22  import static org.apache.commons.compress.compressors.deflate64.HuffmanState.STORED;
23  
24  import java.io.Closeable;
25  import java.io.EOFException;
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.nio.ByteOrder;
29  import java.util.Arrays;
30  
31  import org.apache.commons.compress.utils.BitInputStream;
32  import org.apache.commons.compress.utils.ByteUtils;
33  import org.apache.commons.compress.utils.ExactMath;
34  
35  /**
36   * TODO This class can't be final because it is mocked by Mockito.
37   */
38  class HuffmanDecoder implements Closeable {
39  
40      private static final class BinaryTreeNode {
41          private final int bits;
42          int literal = -1;
43          BinaryTreeNode leftNode;
44          BinaryTreeNode rightNode;
45  
46          private BinaryTreeNode(final int bits) {
47              this.bits = bits;
48          }
49  
50          void leaf(final int symbol) {
51              literal = symbol;
52              leftNode = null;
53              rightNode = null;
54          }
55  
56          BinaryTreeNode left() {
57              if (leftNode == null && literal == -1) {
58                  leftNode = new BinaryTreeNode(bits + 1);
59              }
60              return leftNode;
61          }
62  
63          BinaryTreeNode right() {
64              if (rightNode == null && literal == -1) {
65                  rightNode = new BinaryTreeNode(bits + 1);
66              }
67              return rightNode;
68          }
69      }
70  
71      private abstract static class DecoderState {
72          abstract int available() throws IOException;
73  
74          abstract boolean hasData();
75  
76          abstract int read(byte[] b, int off, int len) throws IOException;
77  
78          abstract HuffmanState state();
79      }
80  
81      private static final class DecodingMemory {
82          private final byte[] memory;
83          private final int mask;
84          private int wHead;
85          private boolean wrappedAround;
86  
87          private DecodingMemory() {
88              this(16);
89          }
90  
91          private DecodingMemory(final int bits) {
92              memory = new byte[1 << bits];
93              mask = memory.length - 1;
94          }
95  
96          byte add(final byte b) {
97              memory[wHead] = b;
98              wHead = incCounter(wHead);
99              return b;
100         }
101 
102         void add(final byte[] b, final int off, final int len) {
103             for (int i = off; i < off + len; i++) {
104                 add(b[i]);
105             }
106         }
107 
108         private int incCounter(final int counter) {
109             final int newCounter = counter + 1 & mask;
110             if (!wrappedAround && newCounter < counter) {
111                 wrappedAround = true;
112             }
113             return newCounter;
114         }
115 
116         void recordToBuffer(final int distance, final int length, final byte[] buff) {
117             if (distance > memory.length) {
118                 throw new IllegalStateException("Illegal distance parameter: " + distance);
119             }
120             final int start = wHead - distance & mask;
121             if (!wrappedAround && start >= wHead) {
122                 throw new IllegalStateException("Attempt to read beyond memory: dist=" + distance);
123             }
124             for (int i = 0, pos = start; i < length; i++, pos = incCounter(pos)) {
125                 buff[i] = add(memory[pos]);
126             }
127         }
128     }
129 
130     private final class HuffmanCodes extends DecoderState {
131         private boolean endOfBlock;
132         private final HuffmanState state;
133         private final BinaryTreeNode lengthTree;
134         private final BinaryTreeNode distanceTree;
135 
136         private int runBufferPos;
137         private byte[] runBuffer = ByteUtils.EMPTY_BYTE_ARRAY;
138         private int runBufferLength;
139 
140         HuffmanCodes(final HuffmanState state, final int[] lengths, final int[] distance) {
141             this.state = state;
142             lengthTree = buildTree(lengths);
143             distanceTree = buildTree(distance);
144         }
145 
146         @Override
147         int available() {
148             return runBufferLength - runBufferPos;
149         }
150 
151         private int copyFromRunBuffer(final byte[] b, final int off, final int len) {
152             final int bytesInBuffer = runBufferLength - runBufferPos;
153             int copiedBytes = 0;
154             if (bytesInBuffer > 0) {
155                 copiedBytes = Math.min(len, bytesInBuffer);
156                 System.arraycopy(runBuffer, runBufferPos, b, off, copiedBytes);
157                 runBufferPos += copiedBytes;
158             }
159             return copiedBytes;
160         }
161 
162         private int decodeNext(final byte[] b, final int off, final int len) throws IOException {
163             if (endOfBlock) {
164                 return -1;
165             }
166             int result = copyFromRunBuffer(b, off, len);
167 
168             while (result < len) {
169                 final int symbol = nextSymbol(reader, lengthTree);
170                 if (symbol < 256) {
171                     b[off + result++] = memory.add((byte) symbol);
172                 } else if (symbol > 256) {
173                     final int runMask = RUN_LENGTH_TABLE[symbol - 257];
174                     int run = runMask >>> 5;
175                     final int runXtra = runMask & 0x1F;
176                     run = ExactMath.add(run, readBits(runXtra));
177 
178                     final int distSym = nextSymbol(reader, distanceTree);
179 
180                     final int distMask = DISTANCE_TABLE[distSym];
181                     int dist = distMask >>> 4;
182                     final int distXtra = distMask & 0xF;
183                     dist = ExactMath.add(dist, readBits(distXtra));
184 
185                     if (runBuffer.length < run) {
186                         runBuffer = new byte[run];
187                     }
188                     runBufferLength = run;
189                     runBufferPos = 0;
190                     memory.recordToBuffer(dist, run, runBuffer);
191 
192                     result += copyFromRunBuffer(b, off + result, len - result);
193                 } else {
194                     endOfBlock = true;
195                     return result;
196                 }
197             }
198 
199             return result;
200         }
201 
202         @Override
203         boolean hasData() {
204             return !endOfBlock;
205         }
206 
207         @Override
208         int read(final byte[] b, final int off, final int len) throws IOException {
209             if (len == 0) {
210                 return 0;
211             }
212             return decodeNext(b, off, len);
213         }
214 
215         @Override
216         HuffmanState state() {
217             return endOfBlock ? INITIAL : state;
218         }
219     }
220 
221     private static final class InitialState extends DecoderState {
222         @Override
223         int available() {
224             return 0;
225         }
226 
227         @Override
228         boolean hasData() {
229             return false;
230         }
231 
232         @Override
233         int read(final byte[] b, final int off, final int len) throws IOException {
234             if (len == 0) {
235                 return 0;
236             }
237             throw new IllegalStateException("Cannot read in this state");
238         }
239 
240         @Override
241         HuffmanState state() {
242             return INITIAL;
243         }
244     }
245 
246     private final class UncompressedState extends DecoderState {
247         private final long blockLength;
248         private long read;
249 
250         private UncompressedState(final long blockLength) {
251             this.blockLength = blockLength;
252         }
253 
254         @Override
255         int available() throws IOException {
256             return (int) Math.min(blockLength - read, reader.bitsAvailable() / Byte.SIZE);
257         }
258 
259         @Override
260         boolean hasData() {
261             return read < blockLength;
262         }
263 
264         @Override
265         int read(final byte[] b, final int off, final int len) throws IOException {
266             if (len == 0) {
267                 return 0;
268             }
269             // as len is an int and (blockLength - read) is >= 0 the min must fit into an int as well
270             final int max = (int) Math.min(blockLength - read, len);
271             int readSoFar = 0;
272             while (readSoFar < max) {
273                 final int readNow;
274                 if (reader.bitsCached() > 0) {
275                     final byte next = (byte) readBits(Byte.SIZE);
276                     b[off + readSoFar] = memory.add(next);
277                     readNow = 1;
278                 } else {
279                     readNow = in.read(b, off + readSoFar, max - readSoFar);
280                     if (readNow == -1) {
281                         throw new EOFException("Truncated Deflate64 Stream");
282                     }
283                     memory.add(b, off + readSoFar, readNow);
284                 }
285                 read += readNow;
286                 readSoFar += readNow;
287             }
288             return max;
289         }
290 
291         @Override
292         HuffmanState state() {
293             return read < blockLength ? STORED : INITIAL;
294         }
295     }
296 
297     /**
298      * <pre>
299      * --------------------------------------------------------------------
300      * idx  xtra  base     idx  xtra  base     idx  xtra  base
301      * --------------------------------------------------------------------
302      * 257   0     3       267   1   15,16     277   4   67-82
303      * 258   0     4       268   1   17,18     278   4   83-98
304      * 259   0     5       269   2   19-22     279   4   99-114
305      * 260   0     6       270   2   23-26     280   4   115-130
306      * 261   0     7       271   2   27-30     281   5   131-162
307      * 262   0     8       272   2   31-34     282   5   163-194
308      * 263   0     9       273   3   35-42     283   5   195-226
309      * 264   0     10      274   3   43-50     284   5   227-257
310      * 265   1     11,12   275   3   51-58     285   16  3
311      * 266   1     13,14   276   3   59-66
312      * --------------------------------------------------------------------
313      * </pre>
314      *
315      * value = (base of run length) << 5 | (number of extra bits to read)
316      */
317     private static final short[] RUN_LENGTH_TABLE = { 96, 128, 160, 192, 224, 256, 288, 320, 353, 417, 481, 545, 610, 738, 866, 994, 1123, 1379, 1635, 1891,
318             2148, 2660, 3172, 3684, 4197, 5221, 6245, 7269, 112 };
319     /**
320      * <pre>
321      * --------------------------------------------------------------------
322      * idx  xtra  dist     idx  xtra  dist       idx  xtra  dist
323      * --------------------------------------------------------------------
324      * 0    0     1        10   4     33-48      20    9   1025-1536
325      * 1    0     2        11   4     49-64      21    9   1537-2048
326      * 2    0     3        12   5     65-96      22   10   2049-3072
327      * 3    0     4        13   5     97-128     23   10   3073-4096
328      * 4    1     5,6      14   6     129-192    24   11   4097-6144
329      * 5    1     7,8      15   6     193-256    25   11   6145-8192
330      * 6    2     9-12     16   7     257-384    26   12   8193-12288
331      * 7    2     13-16    17   7     385-512    27   12   12289-16384
332      * 8    3     17-24    18   8     513-768    28   13   16385-24576
333      * 9    3     25-32    19   8     769-1024   29   13   24577-32768
334      * 30   14   32769-49152
335      * 31   14   49153-65536
336      * --------------------------------------------------------------------
337      * </pre>
338      *
339      * value = (base of distance) << 4 | (number of extra bits to read)
340      */
341     private static final int[] DISTANCE_TABLE = { 16, 32, 48, 64, 81, 113, 146, 210, 275, 403, // 0-9
342             532, 788, 1045, 1557, 2070, 3094, 4119, 6167, 8216, 12312, // 10-19
343             16409, 24601, 32794, 49178, 65563, 98331, 131100, 196636, 262173, 393245, // 20-29
344             524318, 786462 // 30-31
345     };
346     /**
347      * When using dynamic huffman codes the order in which the values are stored follows the positioning below
348      */
349     private static final int[] CODE_LENGTHS_ORDER = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
350     /**
351      * Huffman Fixed Literal / Distance tables for mode 1
352      */
353     private static final int[] FIXED_LITERALS;
354 
355     private static final int[] FIXED_DISTANCE;
356 
357     static {
358         FIXED_LITERALS = new int[288];
359         Arrays.fill(FIXED_LITERALS, 0, 144, 8);
360         Arrays.fill(FIXED_LITERALS, 144, 256, 9);
361         Arrays.fill(FIXED_LITERALS, 256, 280, 7);
362         Arrays.fill(FIXED_LITERALS, 280, 288, 8);
363 
364         FIXED_DISTANCE = new int[32];
365         Arrays.fill(FIXED_DISTANCE, 5);
366     }
367 
368     private static BinaryTreeNode buildTree(final int[] litTable) {
369         final int[] literalCodes = getCodes(litTable);
370 
371         final BinaryTreeNode root = new BinaryTreeNode(0);
372 
373         for (int i = 0; i < litTable.length; i++) {
374             final int len = litTable[i];
375             if (len != 0) {
376                 BinaryTreeNode node = root;
377                 final int lit = literalCodes[len - 1];
378                 for (int p = len - 1; p >= 0; p--) {
379                     final int bit = lit & 1 << p;
380                     node = bit == 0 ? node.left() : node.right();
381                     if (node == null) {
382                         throw new IllegalStateException("node doesn't exist in Huffman tree");
383                     }
384                 }
385                 node.leaf(i);
386                 literalCodes[len - 1]++;
387             }
388         }
389         return root;
390     }
391 
392     private static int[] getCodes(final int[] litTable) {
393         int max = 0;
394         int[] blCount = new int[65];
395 
396         for (final int aLitTable : litTable) {
397             if (aLitTable < 0 || aLitTable > 64) {
398                 throw new IllegalArgumentException("Invalid code " + aLitTable + " in literal table");
399             }
400             max = Math.max(max, aLitTable);
401             blCount[aLitTable]++;
402         }
403         blCount = Arrays.copyOf(blCount, max + 1);
404 
405         int code = 0;
406         final int[] nextCode = new int[max + 1];
407         for (int i = 0; i <= max; i++) {
408             code = code + blCount[i] << 1;
409             nextCode[i] = code;
410         }
411 
412         return nextCode;
413     }
414 
415     private static int nextSymbol(final BitInputStream reader, final BinaryTreeNode tree) throws IOException {
416         BinaryTreeNode node = tree;
417         while (node != null && node.literal == -1) {
418             final long bit = readBits(reader, 1);
419             node = bit == 0 ? node.leftNode : node.rightNode;
420         }
421         return node != null ? node.literal : -1;
422     }
423 
424     private static void populateDynamicTables(final BitInputStream reader, final int[] literals, final int[] distances) throws IOException {
425         final int codeLengths = (int) (readBits(reader, 4) + 4);
426 
427         final int[] codeLengthValues = new int[19];
428         for (int cLen = 0; cLen < codeLengths; cLen++) {
429             codeLengthValues[CODE_LENGTHS_ORDER[cLen]] = (int) readBits(reader, 3);
430         }
431 
432         final BinaryTreeNode codeLengthTree = buildTree(codeLengthValues);
433 
434         final int[] auxBuffer = new int[literals.length + distances.length];
435 
436         int value = -1;
437         int length = 0;
438         int off = 0;
439         while (off < auxBuffer.length) {
440             if (length > 0) {
441                 auxBuffer[off++] = value;
442                 length--;
443             } else {
444                 final int symbol = nextSymbol(reader, codeLengthTree);
445                 if (symbol < 16) {
446                     value = symbol;
447                     auxBuffer[off++] = value;
448                 } else {
449                     switch (symbol) {
450                     case 16:
451                         length = (int) (readBits(reader, 2) + 3);
452                         break;
453                     case 17:
454                         value = 0;
455                         length = (int) (readBits(reader, 3) + 3);
456                         break;
457                     case 18:
458                         value = 0;
459                         length = (int) (readBits(reader, 7) + 11);
460                         break;
461                     default:
462                         break;
463                     }
464                 }
465             }
466         }
467 
468         System.arraycopy(auxBuffer, 0, literals, 0, literals.length);
469         System.arraycopy(auxBuffer, literals.length, distances, 0, distances.length);
470     }
471 
472     private static long readBits(final BitInputStream reader, final int numBits) throws IOException {
473         final long r = reader.readBits(numBits);
474         if (r == -1) {
475             throw new EOFException("Truncated Deflate64 Stream");
476         }
477         return r;
478     }
479 
480     private boolean finalBlock;
481 
482     private DecoderState state;
483 
484     private BitInputStream reader;
485 
486     private final InputStream in;
487 
488     private final DecodingMemory memory = new DecodingMemory();
489 
490     HuffmanDecoder(final InputStream in) {
491         this.reader = new BitInputStream(in, ByteOrder.LITTLE_ENDIAN);
492         this.in = in;
493         state = new InitialState();
494     }
495 
496     int available() throws IOException {
497         return state.available();
498     }
499 
500     @Override
501     public void close() {
502         state = new InitialState();
503         reader = null;
504     }
505 
506     public int decode(final byte[] b) throws IOException {
507         return decode(b, 0, b.length);
508     }
509 
510     public int decode(final byte[] b, final int off, final int len) throws IOException {
511         while (!finalBlock || state.hasData()) {
512             if (state.state() == INITIAL) {
513                 finalBlock = readBits(1) == 1;
514                 final int mode = (int) readBits(2);
515                 switch (mode) {
516                 case 0:
517                     switchToUncompressedState();
518                     break;
519                 case 1:
520                     state = new HuffmanCodes(FIXED_CODES, FIXED_LITERALS, FIXED_DISTANCE);
521                     break;
522                 case 2:
523                     final int[][] tables = readDynamicTables();
524                     state = new HuffmanCodes(DYNAMIC_CODES, tables[0], tables[1]);
525                     break;
526                 default:
527                     throw new IllegalStateException("Unsupported compression: " + mode);
528                 }
529             } else {
530                 final int r = state.read(b, off, len);
531                 if (r != 0) {
532                     return r;
533                 }
534             }
535         }
536         return -1;
537     }
538 
539     /**
540      * @since 1.17
541      */
542     long getBytesRead() {
543         return reader.getBytesRead();
544     }
545 
546     private long readBits(final int numBits) throws IOException {
547         return readBits(reader, numBits);
548     }
549 
550     private int[][] readDynamicTables() throws IOException {
551         final int[][] result = new int[2][];
552         final int literals = (int) (readBits(5) + 257);
553         result[0] = new int[literals];
554 
555         final int distances = (int) (readBits(5) + 1);
556         result[1] = new int[distances];
557 
558         populateDynamicTables(reader, result[0], result[1]);
559         return result;
560     }
561 
562     private void switchToUncompressedState() throws IOException {
563         reader.alignWithByteBoundary();
564         final long bLen = readBits(16);
565         final long bNLen = readBits(16);
566         if (((bLen ^ 0xFFFF) & 0xFFFF) != bNLen) {
567             // noinspection DuplicateStringLiteralInspection
568             throw new IllegalStateException("Illegal LEN / NLEN values");
569         }
570         state = new UncompressedState(bLen);
571     }
572 }