View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.deflate64;
20  
21  import static org.apache.commons.compress.compressors.deflate64.HuffmanState.DYNAMIC_CODES;
22  import static org.apache.commons.compress.compressors.deflate64.HuffmanState.FIXED_CODES;
23  import static org.apache.commons.compress.compressors.deflate64.HuffmanState.INITIAL;
24  import static org.apache.commons.compress.compressors.deflate64.HuffmanState.STORED;
25  
26  import java.io.Closeable;
27  import java.io.EOFException;
28  import java.io.IOException;
29  import java.io.InputStream;
30  import java.nio.ByteOrder;
31  import java.util.Arrays;
32  
33  import org.apache.commons.compress.utils.BitInputStream;
34  import org.apache.commons.compress.utils.ByteUtils;
35  import org.apache.commons.compress.utils.ExactMath;
36  import org.apache.commons.lang3.ArrayFill;
37  
38  /**
39   * TODO This class can't be final because it is mocked by Mockito.
40   */
41  class HuffmanDecoder implements Closeable {
42  
43      private static final class BinaryTreeNode {
44          private final int bits;
45          int literal = -1;
46          BinaryTreeNode leftNode;
47          BinaryTreeNode rightNode;
48  
49          private BinaryTreeNode(final int bits) {
50              this.bits = bits;
51          }
52  
53          void leaf(final int symbol) {
54              literal = symbol;
55              leftNode = null;
56              rightNode = null;
57          }
58  
59          BinaryTreeNode left() {
60              if (leftNode == null && literal == -1) {
61                  leftNode = new BinaryTreeNode(bits + 1);
62              }
63              return leftNode;
64          }
65  
66          BinaryTreeNode right() {
67              if (rightNode == null && literal == -1) {
68                  rightNode = new BinaryTreeNode(bits + 1);
69              }
70              return rightNode;
71          }
72      }
73  
74      private abstract static class DecoderState {
75          abstract int available() throws IOException;
76  
77          abstract boolean hasData();
78  
79          abstract int read(byte[] b, int off, int len) throws IOException;
80  
81          abstract HuffmanState state();
82      }
83  
84      private static final class DecodingMemory {
85          private final byte[] memory;
86          private final int mask;
87          private int wHead;
88          private boolean wrappedAround;
89  
90          private DecodingMemory() {
91              this(16);
92          }
93  
94          private DecodingMemory(final int bits) {
95              memory = new byte[1 << bits];
96              mask = memory.length - 1;
97          }
98  
99          byte add(final byte b) {
100             memory[wHead] = b;
101             wHead = incCounter(wHead);
102             return b;
103         }
104 
105         void add(final byte[] b, final int off, final int len) {
106             for (int i = off; i < off + len; i++) {
107                 add(b[i]);
108             }
109         }
110 
111         private int incCounter(final int counter) {
112             final int newCounter = counter + 1 & mask;
113             if (!wrappedAround && newCounter < counter) {
114                 wrappedAround = true;
115             }
116             return newCounter;
117         }
118 
119         void recordToBuffer(final int distance, final int length, final byte[] buff) {
120             if (distance > memory.length) {
121                 throw new IllegalStateException("Illegal distance parameter: " + distance);
122             }
123             final int start = wHead - distance & mask;
124             if (!wrappedAround && start >= wHead) {
125                 throw new IllegalStateException("Attempt to read beyond memory: dist=" + distance);
126             }
127             for (int i = 0, pos = start; i < length; i++, pos = incCounter(pos)) {
128                 buff[i] = add(memory[pos]);
129             }
130         }
131     }
132 
133     private final class HuffmanCodes extends DecoderState {
134         private boolean endOfBlock;
135         private final HuffmanState state;
136         private final BinaryTreeNode lengthTree;
137         private final BinaryTreeNode distanceTree;
138 
139         private int runBufferPos;
140         private byte[] runBuffer = ByteUtils.EMPTY_BYTE_ARRAY;
141         private int runBufferLength;
142 
143         HuffmanCodes(final HuffmanState state, final int[] lengths, final int[] distance) {
144             this.state = state;
145             lengthTree = buildTree(lengths);
146             distanceTree = buildTree(distance);
147         }
148 
149         @Override
150         int available() {
151             return runBufferLength - runBufferPos;
152         }
153 
154         private int copyFromRunBuffer(final byte[] b, final int off, final int len) {
155             final int bytesInBuffer = runBufferLength - runBufferPos;
156             int copiedBytes = 0;
157             if (bytesInBuffer > 0) {
158                 copiedBytes = Math.min(len, bytesInBuffer);
159                 System.arraycopy(runBuffer, runBufferPos, b, off, copiedBytes);
160                 runBufferPos += copiedBytes;
161             }
162             return copiedBytes;
163         }
164 
165         private int decodeNext(final byte[] b, final int off, final int len) throws IOException {
166             if (endOfBlock) {
167                 return -1;
168             }
169             int result = copyFromRunBuffer(b, off, len);
170 
171             while (result < len) {
172                 final int symbol = nextSymbol(reader, lengthTree);
173                 if (symbol < 256) {
174                     b[off + result++] = memory.add((byte) symbol);
175                 } else if (symbol > 256) {
176                     final int runMask = RUN_LENGTH_TABLE[symbol - 257];
177                     int run = runMask >>> 5;
178                     final int runXtra = runMask & 0x1F;
179                     run = ExactMath.add(run, readBits(runXtra));
180 
181                     final int distSym = nextSymbol(reader, distanceTree);
182 
183                     final int distMask = DISTANCE_TABLE[distSym];
184                     int dist = distMask >>> 4;
185                     final int distXtra = distMask & 0xF;
186                     dist = ExactMath.add(dist, readBits(distXtra));
187 
188                     if (runBuffer.length < run) {
189                         runBuffer = new byte[run];
190                     }
191                     runBufferLength = run;
192                     runBufferPos = 0;
193                     memory.recordToBuffer(dist, run, runBuffer);
194 
195                     result += copyFromRunBuffer(b, off + result, len - result);
196                 } else {
197                     endOfBlock = true;
198                     return result;
199                 }
200             }
201 
202             return result;
203         }
204 
205         @Override
206         boolean hasData() {
207             return !endOfBlock;
208         }
209 
210         @Override
211         int read(final byte[] b, final int off, final int len) throws IOException {
212             if (len == 0) {
213                 return 0;
214             }
215             return decodeNext(b, off, len);
216         }
217 
218         @Override
219         HuffmanState state() {
220             return endOfBlock ? INITIAL : state;
221         }
222     }
223 
224     private static final class InitialState extends DecoderState {
225         @Override
226         int available() {
227             return 0;
228         }
229 
230         @Override
231         boolean hasData() {
232             return false;
233         }
234 
235         @Override
236         int read(final byte[] b, final int off, final int len) throws IOException {
237             if (len == 0) {
238                 return 0;
239             }
240             throw new IllegalStateException("Cannot read in this state");
241         }
242 
243         @Override
244         HuffmanState state() {
245             return INITIAL;
246         }
247     }
248 
249     private final class UncompressedState extends DecoderState {
250         private final long blockLength;
251         private long read;
252 
253         private UncompressedState(final long blockLength) {
254             this.blockLength = blockLength;
255         }
256 
257         @Override
258         int available() throws IOException {
259             return (int) Math.min(blockLength - read, reader.bitsAvailable() / Byte.SIZE);
260         }
261 
262         @Override
263         boolean hasData() {
264             return read < blockLength;
265         }
266 
267         @Override
268         int read(final byte[] b, final int off, final int len) throws IOException {
269             if (len == 0) {
270                 return 0;
271             }
272             // as len is an int and (blockLength - read) is >= 0 the min must fit into an int as well
273             final int max = (int) Math.min(blockLength - read, len);
274             int readSoFar = 0;
275             while (readSoFar < max) {
276                 final int readNow;
277                 if (reader.bitsCached() > 0) {
278                     final byte next = (byte) readBits(Byte.SIZE);
279                     b[off + readSoFar] = memory.add(next);
280                     readNow = 1;
281                 } else {
282                     readNow = in.read(b, off + readSoFar, max - readSoFar);
283                     if (readNow == -1) {
284                         throw new EOFException("Truncated Deflate64 Stream");
285                     }
286                     memory.add(b, off + readSoFar, readNow);
287                 }
288                 read += readNow;
289                 readSoFar += readNow;
290             }
291             return max;
292         }
293 
294         @Override
295         HuffmanState state() {
296             return read < blockLength ? STORED : INITIAL;
297         }
298     }
299 
300     /**
301      * <pre>
302      * --------------------------------------------------------------------
303      * idx  xtra  base     idx  xtra  base     idx  xtra  base
304      * --------------------------------------------------------------------
305      * 257   0     3       267   1   15,16     277   4   67-82
306      * 258   0     4       268   1   17,18     278   4   83-98
307      * 259   0     5       269   2   19-22     279   4   99-114
308      * 260   0     6       270   2   23-26     280   4   115-130
309      * 261   0     7       271   2   27-30     281   5   131-162
310      * 262   0     8       272   2   31-34     282   5   163-194
311      * 263   0     9       273   3   35-42     283   5   195-226
312      * 264   0     10      274   3   43-50     284   5   227-257
313      * 265   1     11,12   275   3   51-58     285   16  3
314      * 266   1     13,14   276   3   59-66
315      * --------------------------------------------------------------------
316      * </pre>
317      *
318      * value = (base of run length) << 5 | (number of extra bits to read)
319      */
320     private static final short[] RUN_LENGTH_TABLE = { 96, 128, 160, 192, 224, 256, 288, 320, 353, 417, 481, 545, 610, 738, 866, 994, 1123, 1379, 1635, 1891,
321             2148, 2660, 3172, 3684, 4197, 5221, 6245, 7269, 112 };
322     /**
323      * <pre>
324      * --------------------------------------------------------------------
325      * idx  xtra  dist     idx  xtra  dist       idx  xtra  dist
326      * --------------------------------------------------------------------
327      * 0    0     1        10   4     33-48      20    9   1025-1536
328      * 1    0     2        11   4     49-64      21    9   1537-2048
329      * 2    0     3        12   5     65-96      22   10   2049-3072
330      * 3    0     4        13   5     97-128     23   10   3073-4096
331      * 4    1     5,6      14   6     129-192    24   11   4097-6144
332      * 5    1     7,8      15   6     193-256    25   11   6145-8192
333      * 6    2     9-12     16   7     257-384    26   12   8193-12288
334      * 7    2     13-16    17   7     385-512    27   12   12289-16384
335      * 8    3     17-24    18   8     513-768    28   13   16385-24576
336      * 9    3     25-32    19   8     769-1024   29   13   24577-32768
337      * 30   14   32769-49152
338      * 31   14   49153-65536
339      * --------------------------------------------------------------------
340      * </pre>
341      *
342      * value = (base of distance) << 4 | (number of extra bits to read)
343      */
344     private static final int[] DISTANCE_TABLE = { 16, 32, 48, 64, 81, 113, 146, 210, 275, 403, // 0-9
345             532, 788, 1045, 1557, 2070, 3094, 4119, 6167, 8216, 12312, // 10-19
346             16409, 24601, 32794, 49178, 65563, 98331, 131100, 196636, 262173, 393245, // 20-29
347             524318, 786462 // 30-31
348     };
349     /**
350      * When using dynamic huffman codes the order in which the values are stored follows the positioning below
351      */
352     private static final int[] CODE_LENGTHS_ORDER = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
353     /**
354      * Huffman Fixed Literal / Distance tables for mode 1
355      */
356     private static final int[] FIXED_LITERALS;
357 
358     private static final int[] FIXED_DISTANCE;
359 
360     static {
361         FIXED_LITERALS = new int[288];
362         Arrays.fill(FIXED_LITERALS, 0, 144, 8);
363         Arrays.fill(FIXED_LITERALS, 144, 256, 9);
364         Arrays.fill(FIXED_LITERALS, 256, 280, 7);
365         Arrays.fill(FIXED_LITERALS, 280, 288, 8);
366 
367         FIXED_DISTANCE = ArrayFill.fill(new int[32], 5);
368     }
369 
370     private static BinaryTreeNode buildTree(final int[] litTable) {
371         final int[] literalCodes = getCodes(litTable);
372 
373         final BinaryTreeNode root = new BinaryTreeNode(0);
374 
375         for (int i = 0; i < litTable.length; i++) {
376             final int len = litTable[i];
377             if (len != 0) {
378                 BinaryTreeNode node = root;
379                 final int lit = literalCodes[len - 1];
380                 for (int p = len - 1; p >= 0; p--) {
381                     final int bit = lit & 1 << p;
382                     node = bit == 0 ? node.left() : node.right();
383                     if (node == null) {
384                         throw new IllegalStateException("node doesn't exist in Huffman tree");
385                     }
386                 }
387                 node.leaf(i);
388                 literalCodes[len - 1]++;
389             }
390         }
391         return root;
392     }
393 
394     private static int[] getCodes(final int[] litTable) {
395         int max = 0;
396         int[] blCount = new int[65];
397 
398         for (final int aLitTable : litTable) {
399             if (aLitTable < 0 || aLitTable > 64) {
400                 throw new IllegalArgumentException("Invalid code " + aLitTable + " in literal table");
401             }
402             max = Math.max(max, aLitTable);
403             blCount[aLitTable]++;
404         }
405         blCount = Arrays.copyOf(blCount, max + 1);
406 
407         int code = 0;
408         final int[] nextCode = new int[max + 1];
409         for (int i = 0; i <= max; i++) {
410             code = code + blCount[i] << 1;
411             nextCode[i] = code;
412         }
413 
414         return nextCode;
415     }
416 
417     private static int nextSymbol(final BitInputStream reader, final BinaryTreeNode tree) throws IOException {
418         BinaryTreeNode node = tree;
419         while (node != null && node.literal == -1) {
420             final long bit = readBits(reader, 1);
421             node = bit == 0 ? node.leftNode : node.rightNode;
422         }
423         return node != null ? node.literal : -1;
424     }
425 
426     private static void populateDynamicTables(final BitInputStream reader, final int[] literals, final int[] distances) throws IOException {
427         final int codeLengths = (int) (readBits(reader, 4) + 4);
428 
429         final int[] codeLengthValues = new int[19];
430         for (int cLen = 0; cLen < codeLengths; cLen++) {
431             codeLengthValues[CODE_LENGTHS_ORDER[cLen]] = (int) readBits(reader, 3);
432         }
433 
434         final BinaryTreeNode codeLengthTree = buildTree(codeLengthValues);
435 
436         final int[] auxBuffer = new int[literals.length + distances.length];
437 
438         int value = -1;
439         int length = 0;
440         int off = 0;
441         while (off < auxBuffer.length) {
442             if (length > 0) {
443                 auxBuffer[off++] = value;
444                 length--;
445             } else {
446                 final int symbol = nextSymbol(reader, codeLengthTree);
447                 if (symbol < 16) {
448                     value = symbol;
449                     auxBuffer[off++] = value;
450                 } else {
451                     switch (symbol) {
452                     case 16:
453                         length = (int) (readBits(reader, 2) + 3);
454                         break;
455                     case 17:
456                         value = 0;
457                         length = (int) (readBits(reader, 3) + 3);
458                         break;
459                     case 18:
460                         value = 0;
461                         length = (int) (readBits(reader, 7) + 11);
462                         break;
463                     default:
464                         break;
465                     }
466                 }
467             }
468         }
469 
470         System.arraycopy(auxBuffer, 0, literals, 0, literals.length);
471         System.arraycopy(auxBuffer, literals.length, distances, 0, distances.length);
472     }
473 
474     private static long readBits(final BitInputStream reader, final int numBits) throws IOException {
475         final long r = reader.readBits(numBits);
476         if (r == -1) {
477             throw new EOFException("Truncated Deflate64 Stream");
478         }
479         return r;
480     }
481 
482     private boolean finalBlock;
483 
484     private DecoderState state;
485 
486     private BitInputStream reader;
487 
488     private final InputStream in;
489 
490     private final DecodingMemory memory = new DecodingMemory();
491 
492     HuffmanDecoder(final InputStream in) {
493         this.reader = new BitInputStream(in, ByteOrder.LITTLE_ENDIAN);
494         this.in = in;
495         state = new InitialState();
496     }
497 
498     int available() throws IOException {
499         return state.available();
500     }
501 
502     @Override
503     public void close() {
504         state = new InitialState();
505         reader = null;
506     }
507 
508     public int decode(final byte[] b) throws IOException {
509         return decode(b, 0, b.length);
510     }
511 
512     public int decode(final byte[] b, final int off, final int len) throws IOException {
513         while (!finalBlock || state.hasData()) {
514             if (state.state() == INITIAL) {
515                 finalBlock = readBits(1) == 1;
516                 final int mode = (int) readBits(2);
517                 switch (mode) {
518                 case 0:
519                     switchToUncompressedState();
520                     break;
521                 case 1:
522                     state = new HuffmanCodes(FIXED_CODES, FIXED_LITERALS, FIXED_DISTANCE);
523                     break;
524                 case 2:
525                     final int[][] tables = readDynamicTables();
526                     state = new HuffmanCodes(DYNAMIC_CODES, tables[0], tables[1]);
527                     break;
528                 default:
529                     throw new IllegalStateException("Unsupported compression: " + mode);
530                 }
531             } else {
532                 final int r = state.read(b, off, len);
533                 if (r != 0) {
534                     return r;
535                 }
536             }
537         }
538         return -1;
539     }
540 
541     /**
542      * @since 1.17
543      */
544     long getBytesRead() {
545         return reader.getBytesRead();
546     }
547 
548     private long readBits(final int numBits) throws IOException {
549         return readBits(reader, numBits);
550     }
551 
552     private int[][] readDynamicTables() throws IOException {
553         final int[][] result = new int[2][];
554         final int literals = (int) (readBits(5) + 257);
555         result[0] = new int[literals];
556 
557         final int distances = (int) (readBits(5) + 1);
558         result[1] = new int[distances];
559 
560         populateDynamicTables(reader, result[0], result[1]);
561         return result;
562     }
563 
564     private void switchToUncompressedState() throws IOException {
565         reader.alignWithByteBoundary();
566         final long bLen = readBits(16);
567         final long bNLen = readBits(16);
568         if (((bLen ^ 0xFFFF) & 0xFFFF) != bNLen) {
569             // noinspection DuplicateStringLiteralInspection
570             throw new IllegalStateException("Illegal LEN / NLEN values");
571         }
572         state = new UncompressedState(bLen);
573     }
574 }