001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lzw;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.nio.ByteOrder;
024
025import org.apache.commons.compress.MemoryLimitException;
026import org.apache.commons.compress.compressors.CompressorInputStream;
027import org.apache.commons.compress.utils.BitInputStream;
028import org.apache.commons.compress.utils.InputStreamStatistics;
029
030/**
031 * <p>
032 * Generic LZW implementation. It is used internally for the Z decompressor and the Unshrinking Zip file compression method, but may be useful for third-party
033 * projects in implementing their own LZW variations.
034 * </p>
035 *
036 * @NotThreadSafe
037 * @since 1.10
038 */
039public abstract class LZWInputStream extends CompressorInputStream implements InputStreamStatistics {
040    protected static final int DEFAULT_CODE_SIZE = 9;
041    protected static final int UNUSED_PREFIX = -1;
042
043    private final byte[] oneByte = new byte[1];
044
045    protected final BitInputStream in;
046    private int clearCode = -1;
047    private int codeSize = DEFAULT_CODE_SIZE;
048    private byte previousCodeFirstChar;
049    private int previousCode = UNUSED_PREFIX;
050    private int tableSize;
051    private int[] prefixes;
052    private byte[] characters;
053    private byte[] outputStack;
054    private int outputStackLocation;
055
056    protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) {
057        this.in = new BitInputStream(inputStream, byteOrder);
058    }
059
060    /**
061     * Add a new entry to the dictionary.
062     *
063     * @param previousCode the previous code
064     * @param character    the next character to append
065     * @return the new code
066     * @throws IOException on error
067     */
068    protected abstract int addEntry(int previousCode, byte character) throws IOException;
069
070    /**
071     * Adds a new entry if the maximum table size hasn't been exceeded and returns the new index.
072     *
073     * @param previousCode the previous code
074     * @param character    the character to append
075     * @param maxTableSize the maximum table size
076     * @return the new code or -1 if maxTableSize has been reached already
077     */
078    protected int addEntry(final int previousCode, final byte character, final int maxTableSize) {
079        if (tableSize < maxTableSize) {
080            prefixes[tableSize] = previousCode;
081            characters[tableSize] = character;
082            return tableSize++;
083        }
084        return -1;
085    }
086
087    /**
088     * Add entry for repeat of previousCode we haven't added, yet.
089     *
090     * @return new code for a repeat of the previous code or -1 if maxTableSize has been reached already
091     * @throws IOException on error
092     */
093    protected int addRepeatOfPreviousCode() throws IOException {
094        if (previousCode == -1) {
095            // can't have a repeat for the very first code
096            throw new IOException("The first code can't be a reference to its preceding code");
097        }
098        return addEntry(previousCode, previousCodeFirstChar);
099    }
100
101    @Override
102    public void close() throws IOException {
103        in.close();
104    }
105
106    /**
107     * Read the next code and expand it.
108     *
109     * @return the expanded next code, negative on EOF
110     * @throws IOException on error
111     */
112    protected abstract int decompressNextSymbol() throws IOException;
113
114    /**
115     * Expands the entry with index code to the output stack and may create a new entry
116     *
117     * @param code                 the code
118     * @param addedUnfinishedEntry whether unfinished entries have been added
119     * @return the new location of the output stack
120     * @throws IOException on error
121     */
122    protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry) throws IOException {
123        for (int entry = code; entry >= 0; entry = prefixes[entry]) {
124            outputStack[--outputStackLocation] = characters[entry];
125        }
126        if (previousCode != -1 && !addedUnfinishedEntry) {
127            addEntry(previousCode, outputStack[outputStackLocation]);
128        }
129        previousCode = code;
130        previousCodeFirstChar = outputStack[outputStackLocation];
131        return outputStackLocation;
132    }
133
134    protected int getClearCode() {
135        return clearCode;
136    }
137
138    protected int getCodeSize() {
139        return codeSize;
140    }
141
142    /**
143     * @since 1.17
144     */
145    @Override
146    public long getCompressedCount() {
147        return in.getBytesRead();
148    }
149
150    protected int getPrefix(final int offset) {
151        return prefixes[offset];
152    }
153
154    protected int getPrefixesLength() {
155        return prefixes.length;
156    }
157
158    protected int getTableSize() {
159        return tableSize;
160    }
161
162    protected void incrementCodeSize() {
163        codeSize++;
164    }
165
166    /**
167     * Initializes the arrays based on the maximum code size.
168     *
169     * @param maxCodeSize maximum code size
170     * @throws IllegalArgumentException if {@code maxCodeSize} is out of bounds for {@code prefixes} and {@code characters}.
171     */
172    protected void initializeTables(final int maxCodeSize) {
173        // maxCodeSize shifted cannot be less than 256, otherwise the loop in initializeTables() will throw an ArrayIndexOutOfBoundsException
174        // maxCodeSize cannot be smaller than getCodeSize(), otherwise addEntry() will throw an ArrayIndexOutOfBoundsException
175        if (1 << maxCodeSize < 256 || getCodeSize() > maxCodeSize) {
176            // TODO test against prefixes.length and characters.length?
177            throw new IllegalArgumentException("maxCodeSize " + maxCodeSize + " is out of bounds.");
178        }
179        final int maxTableSize = 1 << maxCodeSize;
180        prefixes = new int[maxTableSize];
181        characters = new byte[maxTableSize];
182        outputStack = new byte[maxTableSize];
183        outputStackLocation = maxTableSize;
184        final int max = 1 << 8;
185        for (int i = 0; i < max; i++) {
186            prefixes[i] = -1;
187            characters[i] = (byte) i;
188        }
189    }
190
191    /**
192     * Initializes the arrays based on the maximum code size. First checks that the estimated memory usage is below memoryLimitInKb
193     *
194     * @param maxCodeSize     maximum code size
195     * @param memoryLimitInKb maximum allowed estimated memory usage in Kb
196     * @throws MemoryLimitException     if estimated memory usage is greater than memoryLimitInKb
197     * @throws IllegalArgumentException if {@code maxCodeSize} is not bigger than 0
198     */
199    protected void initializeTables(final int maxCodeSize, final int memoryLimitInKb) throws MemoryLimitException {
200        if (maxCodeSize <= 0) {
201            throw new IllegalArgumentException("maxCodeSize is " + maxCodeSize + ", must be bigger than 0");
202        }
203
204        if (memoryLimitInKb > -1) {
205            final int maxTableSize = 1 << maxCodeSize;
206            // account for potential overflow
207            final long memoryUsageInBytes = (long) maxTableSize * 6; // (4 (prefixes) + 1 (characters) +1 (outputStack))
208            final long memoryUsageInKb = memoryUsageInBytes >> 10;
209
210            if (memoryUsageInKb > memoryLimitInKb) {
211                throw new MemoryLimitException(memoryUsageInKb, memoryLimitInKb);
212            }
213        }
214        initializeTables(maxCodeSize);
215    }
216
217    @Override
218    public int read() throws IOException {
219        final int ret = read(oneByte);
220        if (ret < 0) {
221            return ret;
222        }
223        return 0xff & oneByte[0];
224    }
225
226    @Override
227    public int read(final byte[] b, final int off, final int len) throws IOException {
228        if (len == 0) {
229            return 0;
230        }
231        int bytesRead = readFromStack(b, off, len);
232        while (len - bytesRead > 0) {
233            final int result = decompressNextSymbol();
234            if (result < 0) {
235                if (bytesRead > 0) {
236                    count(bytesRead);
237                    return bytesRead;
238                }
239                return result;
240            }
241            bytesRead += readFromStack(b, off + bytesRead, len - bytesRead);
242        }
243        count(bytesRead);
244        return bytesRead;
245    }
246
247    private int readFromStack(final byte[] b, final int off, final int len) {
248        final int remainingInStack = outputStack.length - outputStackLocation;
249        if (remainingInStack > 0) {
250            final int maxLength = Math.min(remainingInStack, len);
251            System.arraycopy(outputStack, outputStackLocation, b, off, maxLength);
252            outputStackLocation += maxLength;
253            return maxLength;
254        }
255        return 0;
256    }
257
258    /**
259     * Reads the next code from the stream.
260     *
261     * @return the next code
262     * @throws IOException on error
263     */
264    protected int readNextCode() throws IOException {
265        if (codeSize > 31) {
266            throw new IllegalArgumentException("Code size must not be bigger than 31");
267        }
268        return (int) in.readBits(codeSize);
269    }
270
271    protected void resetCodeSize() {
272        setCodeSize(DEFAULT_CODE_SIZE);
273    }
274
275    protected void resetPreviousCode() {
276        this.previousCode = -1;
277    }
278
279    /**
280     * Sets the clear code based on the code size.
281     *
282     * @param codeSize code size
283     */
284    protected void setClearCode(final int codeSize) {
285        clearCode = 1 << codeSize - 1;
286    }
287
288    protected void setCodeSize(final int cs) {
289        this.codeSize = cs;
290    }
291
292    protected void setPrefix(final int offset, final int value) {
293        prefixes[offset] = value;
294    }
295
296    protected void setTableSize(final int newSize) {
297        tableSize = newSize;
298    }
299
300}