001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lzw;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.nio.ByteOrder;
024
025import org.apache.commons.compress.MemoryLimitException;
026import org.apache.commons.compress.compressors.CompressorInputStream;
027import org.apache.commons.compress.utils.BitInputStream;
028import org.apache.commons.compress.utils.InputStreamStatistics;
029
030/**
031 * <p>
032 * Generic LZW implementation. It is used internally for the Z decompressor and the Unshrinking Zip file compression method, but may be useful for third-party
033 * projects in implementing their own LZW variations.
034 * </p>
035 *
036 * @NotThreadSafe
037 * @since 1.10
038 */
039public abstract class LZWInputStream extends CompressorInputStream implements InputStreamStatistics {
040
041    /**
042     * Maximum code size in bits.
043     */
044    private static final int MAX_CODE_SIZE = 31;
045
046    /**
047     * Default code size in bits.
048     */
049    protected static final int DEFAULT_CODE_SIZE = 9;
050
051    /**
052     * Unused marker.
053     */
054    protected static final int UNUSED_PREFIX = -1;
055
056    private final byte[] oneByte = new byte[1];
057
058    /**
059     * Input.
060     */
061    protected final BitInputStream in;
062    private int clearCode = -1;
063    private int codeSize = DEFAULT_CODE_SIZE;
064    private byte previousCodeFirstChar;
065    private int previousCode = UNUSED_PREFIX;
066    private int tableSize;
067    private int[] prefixes;
068    private byte[] characters;
069    private byte[] outputStack;
070    private int outputStackLocation;
071
072    /**
073     * Constructs a new instance.
074     *
075     * @param inputStream The underlying input stream.
076     * @param byteOrder the input byte order.
077     */
078    protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) {
079        this.in = new BitInputStream(inputStream, byteOrder);
080    }
081
082    /**
083     * Add a new entry to the dictionary.
084     *
085     * @param previousCode the previous code
086     * @param character    the next character to append
087     * @return the new code
088     * @throws IOException on error
089     */
090    protected abstract int addEntry(int previousCode, byte character) throws IOException;
091
092    /**
093     * Adds a new entry if the maximum table size hasn't been exceeded and returns the new index.
094     *
095     * @param previousCode the previous code
096     * @param character    the character to append
097     * @param maxTableSize the maximum table size
098     * @return the new code or -1 if maxTableSize has been reached already
099     */
100    protected int addEntry(final int previousCode, final byte character, final int maxTableSize) {
101        if (tableSize < maxTableSize) {
102            prefixes[tableSize] = previousCode;
103            characters[tableSize] = character;
104            return tableSize++;
105        }
106        return -1;
107    }
108
109    /**
110     * Add entry for repeat of previousCode we haven't added, yet.
111     *
112     * @return new code for a repeat of the previous code or -1 if maxTableSize has been reached already
113     * @throws IOException on error
114     */
115    protected int addRepeatOfPreviousCode() throws IOException {
116        if (previousCode == -1) {
117            // can't have a repeat for the very first code
118            throw new IOException("The first code can't be a reference to its preceding code");
119        }
120        return addEntry(previousCode, previousCodeFirstChar);
121    }
122
123    @Override
124    public void close() throws IOException {
125        in.close();
126    }
127
128    /**
129     * Reads the next code and expand it.
130     *
131     * @return the expanded next code, negative on EOF
132     * @throws IOException on error
133     */
134    protected abstract int decompressNextSymbol() throws IOException;
135
136    /**
137     * Expands the entry with index code to the output stack and may create a new entry.
138     *
139     * @param code                 the code.
140     * @param addedUnfinishedEntry whether unfinished entries have been added.
141     * @return the new location of the output stack.
142     * @throws IOException if an I/O error occurs.
143     */
144    protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry) throws IOException {
145        for (int entry = code; entry >= 0; entry = prefixes[entry]) {
146            outputStack[--outputStackLocation] = characters[entry];
147        }
148        if (previousCode != -1 && !addedUnfinishedEntry) {
149            addEntry(previousCode, outputStack[outputStackLocation]);
150        }
151        previousCode = code;
152        previousCodeFirstChar = outputStack[outputStackLocation];
153        return outputStackLocation;
154    }
155
156    /**
157     * Gets the clear code.
158     *
159     * @return the clear code.
160     */
161    protected int getClearCode() {
162        return clearCode;
163    }
164
165    /**
166     * Gets the code size in bits.
167     *
168     * @return the code size in bits.
169     */
170    protected int getCodeSize() {
171        return codeSize;
172    }
173
174    /**
175     * @since 1.17
176     */
177    @Override
178    public long getCompressedCount() {
179        return in.getBytesRead();
180    }
181
182    /**
183     * Gets the prefix at the given offset.
184     *
185     * @param offset offset to query.
186     * @return the prefix at the given offset.
187     */
188    protected int getPrefix(final int offset) {
189        return prefixes[offset];
190    }
191
192    /**
193     * Gets the prefixes length.
194     *
195     * @return the prefixes length.
196     */
197    protected int getPrefixesLength() {
198        return prefixes.length;
199    }
200
201    /**
202     * Gets the table size.
203     *
204     * @return the table size.
205     */
206    protected int getTableSize() {
207        return tableSize;
208    }
209
210    /**
211     * Increments the code size by one.
212     */
213    protected void incrementCodeSize() {
214        codeSize++;
215    }
216
217    /**
218     * Initializes the arrays based on the maximum code size.
219     *
220     * @param maxCodeSize maximum code size
221     * @throws IllegalArgumentException if {@code maxCodeSize} is out of bounds for {@code prefixes} and {@code characters}.
222     */
223    protected void initializeTables(final int maxCodeSize) {
224        // maxCodeSize shifted cannot be less than 256, otherwise the loop in initializeTables() will throw an ArrayIndexOutOfBoundsException
225        // maxCodeSize cannot be smaller than getCodeSize(), otherwise addEntry() will throw an ArrayIndexOutOfBoundsException
226        if (1 << maxCodeSize < 256 || getCodeSize() > maxCodeSize) {
227            // TODO test against prefixes.length and characters.length?
228            throw new IllegalArgumentException("maxCodeSize " + maxCodeSize + " is out of bounds.");
229        }
230        final int maxTableSize = 1 << maxCodeSize;
231        prefixes = new int[maxTableSize];
232        characters = new byte[maxTableSize];
233        outputStack = new byte[maxTableSize];
234        outputStackLocation = maxTableSize;
235        final int max = 1 << 8;
236        for (int i = 0; i < max; i++) {
237            prefixes[i] = -1;
238            characters[i] = (byte) i;
239        }
240    }
241
242    /**
243     * Initializes the arrays based on the maximum code size. First checks that the estimated memory usage is below memoryLimitInKb
244     *
245     * @param maxCodeSize     maximum code size
246     * @param memoryLimiKiB maximum allowed estimated memory usage in kibibytes (KiB).
247     * @throws MemoryLimitException     if estimated memory usage is greater than memoryLimitKiB
248     * @throws IllegalArgumentException if {@code maxCodeSize} is not bigger than 0
249     */
250    protected void initializeTables(final int maxCodeSize, final int memoryLimiKiB) throws MemoryLimitException {
251        if (maxCodeSize <= 0) {
252            throw new IllegalArgumentException("maxCodeSize is " + maxCodeSize + ", must be bigger than 0");
253        }
254
255        if (memoryLimiKiB > -1) {
256            final int maxTableSize = 1 << maxCodeSize;
257            // account for potential overflow
258            final long memoryUsageBytes = (long) maxTableSize * 6; // (4 (prefixes) + 1 (characters) +1 (outputStack))
259            final long memoryUsageKiB = memoryUsageBytes >> 10;
260
261            if (memoryUsageKiB > memoryLimiKiB) {
262                throw new MemoryLimitException(memoryUsageKiB, memoryLimiKiB);
263            }
264        }
265        initializeTables(maxCodeSize);
266    }
267
268    @Override
269    public int read() throws IOException {
270        final int ret = read(oneByte);
271        if (ret < 0) {
272            return ret;
273        }
274        return 0xff & oneByte[0];
275    }
276
277    @Override
278    public int read(final byte[] b, final int off, final int len) throws IOException {
279        if (len == 0) {
280            return 0;
281        }
282        int bytesRead = readFromStack(b, off, len);
283        while (len - bytesRead > 0) {
284            final int result = decompressNextSymbol();
285            if (result < 0) {
286                if (bytesRead > 0) {
287                    count(bytesRead);
288                    return bytesRead;
289                }
290                return result;
291            }
292            bytesRead += readFromStack(b, off + bytesRead, len - bytesRead);
293        }
294        count(bytesRead);
295        return bytesRead;
296    }
297
298    private int readFromStack(final byte[] b, final int off, final int len) {
299        final int remainingInStack = outputStack.length - outputStackLocation;
300        if (remainingInStack > 0) {
301            final int maxLength = Math.min(remainingInStack, len);
302            System.arraycopy(outputStack, outputStackLocation, b, off, maxLength);
303            outputStackLocation += maxLength;
304            return maxLength;
305        }
306        return 0;
307    }
308
309    /**
310     * Reads the next code from the stream.
311     *
312     * @return the next code
313     * @throws IOException on error
314     */
315    protected int readNextCode() throws IOException {
316        if (codeSize > MAX_CODE_SIZE) {
317            throw new IllegalArgumentException("Code size must not be bigger than 31");
318        }
319        return (int) in.readBits(codeSize);
320    }
321
322    /**
323     * Resets the code size to its default value.
324     */
325    protected void resetCodeSize() {
326        setCodeSize(DEFAULT_CODE_SIZE);
327    }
328
329    /**
330     * Resets the previous code to its default value.
331     */
332    protected void resetPreviousCode() {
333        this.previousCode = -1;
334    }
335
336    /**
337     * Sets the clear code based on the code size.
338     *
339     * @param codeSize code size
340     */
341    protected void setClearCode(final int codeSize) {
342        clearCode = 1 << codeSize - 1;
343    }
344
345    /**
346     * Sets the code size in bits.
347     *
348     * @param codeSize the code size in bits.
349     */
350    protected void setCodeSize(final int codeSize) {
351        this.codeSize = codeSize;
352    }
353
354    /**
355     * Sets the prefix at the given offset.
356     *
357     * @param offset the target offset.
358     * @param value the new value.
359     */
360    protected void setPrefix(final int offset, final int value) {
361        prefixes[offset] = value;
362    }
363
364    /**
365     * Sets the table size.
366     *
367     * @param tableSize the new table size.
368     */
369    protected void setTableSize(final int tableSize) {
370        this.tableSize = tableSize;
371    }
372
373}