View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.lzw;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.nio.ByteOrder;
24  
25  import org.apache.commons.compress.MemoryLimitException;
26  import org.apache.commons.compress.compressors.CompressorInputStream;
27  import org.apache.commons.compress.utils.BitInputStream;
28  import org.apache.commons.compress.utils.InputStreamStatistics;
29  
30  /**
31   * <p>
32   * Generic LZW implementation. It is used internally for the Z decompressor and the Unshrinking Zip file compression method, but may be useful for third-party
33   * projects in implementing their own LZW variations.
34   * </p>
35   *
36   * @NotThreadSafe
37   * @since 1.10
38   */
39  public abstract class LZWInputStream extends CompressorInputStream implements InputStreamStatistics {
40  
41      /**
42       * Maximum code size in bits.
43       */
44      private static final int MAX_CODE_SIZE = 31;
45  
46      /**
47       * Default code size in bits.
48       */
49      protected static final int DEFAULT_CODE_SIZE = 9;
50  
51      /**
52       * Unused marker.
53       */
54      protected static final int UNUSED_PREFIX = -1;
55  
56      private final byte[] oneByte = new byte[1];
57  
58      /**
59       * Input.
60       */
61      protected final BitInputStream in;
62      private int clearCode = -1;
63      private int codeSize = DEFAULT_CODE_SIZE;
64      private byte previousCodeFirstChar;
65      private int previousCode = UNUSED_PREFIX;
66      private int tableSize;
67      private int[] prefixes;
68      private byte[] characters;
69      private byte[] outputStack;
70      private int outputStackLocation;
71  
72      /**
73       * Constructs a new instance.
74       *
75       * @param inputStream The underlying input stream.
76       * @param byteOrder the input byte order.
77       */
78      protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) {
79          this.in = new BitInputStream(inputStream, byteOrder);
80      }
81  
82      /**
83       * Add a new entry to the dictionary.
84       *
85       * @param previousCode the previous code
86       * @param character    the next character to append
87       * @return the new code
88       * @throws IOException on error
89       */
90      protected abstract int addEntry(int previousCode, byte character) throws IOException;
91  
92      /**
93       * Adds a new entry if the maximum table size hasn't been exceeded and returns the new index.
94       *
95       * @param previousCode the previous code
96       * @param character    the character to append
97       * @param maxTableSize the maximum table size
98       * @return the new code or -1 if maxTableSize has been reached already
99       */
100     protected int addEntry(final int previousCode, final byte character, final int maxTableSize) {
101         if (tableSize < maxTableSize) {
102             prefixes[tableSize] = previousCode;
103             characters[tableSize] = character;
104             return tableSize++;
105         }
106         return -1;
107     }
108 
109     /**
110      * Add entry for repeat of previousCode we haven't added, yet.
111      *
112      * @return new code for a repeat of the previous code or -1 if maxTableSize has been reached already
113      * @throws IOException on error
114      */
115     protected int addRepeatOfPreviousCode() throws IOException {
116         if (previousCode == -1) {
117             // can't have a repeat for the very first code
118             throw new IOException("The first code can't be a reference to its preceding code");
119         }
120         return addEntry(previousCode, previousCodeFirstChar);
121     }
122 
123     @Override
124     public void close() throws IOException {
125         in.close();
126     }
127 
128     /**
129      * Reads the next code and expand it.
130      *
131      * @return the expanded next code, negative on EOF
132      * @throws IOException on error
133      */
134     protected abstract int decompressNextSymbol() throws IOException;
135 
136     /**
137      * Expands the entry with index code to the output stack and may create a new entry.
138      *
139      * @param code                 the code.
140      * @param addedUnfinishedEntry whether unfinished entries have been added.
141      * @return the new location of the output stack.
142      * @throws IOException if an I/O error occurs.
143      */
144     protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry) throws IOException {
145         for (int entry = code; entry >= 0; entry = prefixes[entry]) {
146             outputStack[--outputStackLocation] = characters[entry];
147         }
148         if (previousCode != -1 && !addedUnfinishedEntry) {
149             addEntry(previousCode, outputStack[outputStackLocation]);
150         }
151         previousCode = code;
152         previousCodeFirstChar = outputStack[outputStackLocation];
153         return outputStackLocation;
154     }
155 
156     /**
157      * Gets the clear code.
158      *
159      * @return the clear code.
160      */
161     protected int getClearCode() {
162         return clearCode;
163     }
164 
165     /**
166      * Gets the code size in bits.
167      *
168      * @return the code size in bits.
169      */
170     protected int getCodeSize() {
171         return codeSize;
172     }
173 
174     /**
175      * @since 1.17
176      */
177     @Override
178     public long getCompressedCount() {
179         return in.getBytesRead();
180     }
181 
182     /**
183      * Gets the prefix at the given offset.
184      *
185      * @param offset offset to query.
186      * @return the prefix at the given offset.
187      */
188     protected int getPrefix(final int offset) {
189         return prefixes[offset];
190     }
191 
192     /**
193      * Gets the prefixes length.
194      *
195      * @return the prefixes length.
196      */
197     protected int getPrefixesLength() {
198         return prefixes.length;
199     }
200 
201     /**
202      * Gets the table size.
203      *
204      * @return the table size.
205      */
206     protected int getTableSize() {
207         return tableSize;
208     }
209 
210     /**
211      * Increments the code size by one.
212      */
213     protected void incrementCodeSize() {
214         codeSize++;
215     }
216 
217     /**
218      * Initializes the arrays based on the maximum code size.
219      *
220      * @param maxCodeSize maximum code size
221      * @throws IllegalArgumentException if {@code maxCodeSize} is out of bounds for {@code prefixes} and {@code characters}.
222      */
223     protected void initializeTables(final int maxCodeSize) {
224         // maxCodeSize shifted cannot be less than 256, otherwise the loop in initializeTables() will throw an ArrayIndexOutOfBoundsException
225         // maxCodeSize cannot be smaller than getCodeSize(), otherwise addEntry() will throw an ArrayIndexOutOfBoundsException
226         if (1 << maxCodeSize < 256 || getCodeSize() > maxCodeSize) {
227             // TODO test against prefixes.length and characters.length?
228             throw new IllegalArgumentException("maxCodeSize " + maxCodeSize + " is out of bounds.");
229         }
230         final int maxTableSize = 1 << maxCodeSize;
231         prefixes = new int[maxTableSize];
232         characters = new byte[maxTableSize];
233         outputStack = new byte[maxTableSize];
234         outputStackLocation = maxTableSize;
235         final int max = 1 << 8;
236         for (int i = 0; i < max; i++) {
237             prefixes[i] = -1;
238             characters[i] = (byte) i;
239         }
240     }
241 
242     /**
243      * Initializes the arrays based on the maximum code size. First checks that the estimated memory usage is below memoryLimitInKb
244      *
245      * @param maxCodeSize     maximum code size
246      * @param memoryLimiKiB maximum allowed estimated memory usage in kibibytes (KiB).
247      * @throws MemoryLimitException     if estimated memory usage is greater than memoryLimitKiB
248      * @throws IllegalArgumentException if {@code maxCodeSize} is not bigger than 0
249      */
250     protected void initializeTables(final int maxCodeSize, final int memoryLimiKiB) throws MemoryLimitException {
251         if (maxCodeSize <= 0) {
252             throw new IllegalArgumentException("maxCodeSize is " + maxCodeSize + ", must be bigger than 0");
253         }
254 
255         if (memoryLimiKiB > -1) {
256             final int maxTableSize = 1 << maxCodeSize;
257             // account for potential overflow
258             final long memoryUsageBytes = (long) maxTableSize * 6; // (4 (prefixes) + 1 (characters) +1 (outputStack))
259             final long memoryUsageKiB = memoryUsageBytes >> 10;
260 
261             if (memoryUsageKiB > memoryLimiKiB) {
262                 throw new MemoryLimitException(memoryUsageKiB, memoryLimiKiB);
263             }
264         }
265         initializeTables(maxCodeSize);
266     }
267 
268     @Override
269     public int read() throws IOException {
270         final int ret = read(oneByte);
271         if (ret < 0) {
272             return ret;
273         }
274         return 0xff & oneByte[0];
275     }
276 
277     @Override
278     public int read(final byte[] b, final int off, final int len) throws IOException {
279         if (len == 0) {
280             return 0;
281         }
282         int bytesRead = readFromStack(b, off, len);
283         while (len - bytesRead > 0) {
284             final int result = decompressNextSymbol();
285             if (result < 0) {
286                 if (bytesRead > 0) {
287                     count(bytesRead);
288                     return bytesRead;
289                 }
290                 return result;
291             }
292             bytesRead += readFromStack(b, off + bytesRead, len - bytesRead);
293         }
294         count(bytesRead);
295         return bytesRead;
296     }
297 
298     private int readFromStack(final byte[] b, final int off, final int len) {
299         final int remainingInStack = outputStack.length - outputStackLocation;
300         if (remainingInStack > 0) {
301             final int maxLength = Math.min(remainingInStack, len);
302             System.arraycopy(outputStack, outputStackLocation, b, off, maxLength);
303             outputStackLocation += maxLength;
304             return maxLength;
305         }
306         return 0;
307     }
308 
309     /**
310      * Reads the next code from the stream.
311      *
312      * @return the next code
313      * @throws IOException on error
314      */
315     protected int readNextCode() throws IOException {
316         if (codeSize > MAX_CODE_SIZE) {
317             throw new IllegalArgumentException("Code size must not be bigger than 31");
318         }
319         return (int) in.readBits(codeSize);
320     }
321 
322     /**
323      * Resets the code size to its default value.
324      */
325     protected void resetCodeSize() {
326         setCodeSize(DEFAULT_CODE_SIZE);
327     }
328 
329     /**
330      * Resets the previous code to its default value.
331      */
332     protected void resetPreviousCode() {
333         this.previousCode = -1;
334     }
335 
336     /**
337      * Sets the clear code based on the code size.
338      *
339      * @param codeSize code size
340      */
341     protected void setClearCode(final int codeSize) {
342         clearCode = 1 << codeSize - 1;
343     }
344 
345     /**
346      * Sets the code size in bits.
347      *
348      * @param codeSize the code size in bits.
349      */
350     protected void setCodeSize(final int codeSize) {
351         this.codeSize = codeSize;
352     }
353 
354     /**
355      * Sets the prefix at the given offset.
356      *
357      * @param offset the target offset.
358      * @param value the new value.
359      */
360     protected void setPrefix(final int offset, final int value) {
361         prefixes[offset] = value;
362     }
363 
364     /**
365      * Sets the table size.
366      *
367      * @param tableSize the new table size.
368      */
369     protected void setTableSize(final int tableSize) {
370         this.tableSize = tableSize;
371     }
372 
373 }