LZWInputStream.java
- /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
- package org.apache.commons.compress.compressors.lzw;
- import java.io.IOException;
- import java.io.InputStream;
- import java.nio.ByteOrder;
- import org.apache.commons.compress.MemoryLimitException;
- import org.apache.commons.compress.compressors.CompressorInputStream;
- import org.apache.commons.compress.utils.BitInputStream;
- import org.apache.commons.compress.utils.InputStreamStatistics;
- /**
- * <p>
- * Generic LZW implementation. It is used internally for the Z decompressor and the Unshrinking Zip file compression method, but may be useful for third-party
- * projects in implementing their own LZW variations.
- * </p>
- *
- * @NotThreadSafe
- * @since 1.10
- */
- public abstract class LZWInputStream extends CompressorInputStream implements InputStreamStatistics {
- protected static final int DEFAULT_CODE_SIZE = 9;
- protected static final int UNUSED_PREFIX = -1;
- private final byte[] oneByte = new byte[1];
- protected final BitInputStream in;
- private int clearCode = -1;
- private int codeSize = DEFAULT_CODE_SIZE;
- private byte previousCodeFirstChar;
- private int previousCode = UNUSED_PREFIX;
- private int tableSize;
- private int[] prefixes;
- private byte[] characters;
- private byte[] outputStack;
- private int outputStackLocation;
- protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) {
- this.in = new BitInputStream(inputStream, byteOrder);
- }
- /**
- * Add a new entry to the dictionary.
- *
- * @param previousCode the previous code
- * @param character the next character to append
- * @return the new code
- * @throws IOException on error
- */
- protected abstract int addEntry(int previousCode, byte character) throws IOException;
- /**
- * Adds a new entry if the maximum table size hasn't been exceeded and returns the new index.
- *
- * @param previousCode the previous code
- * @param character the character to append
- * @param maxTableSize the maximum table size
- * @return the new code or -1 if maxTableSize has been reached already
- */
- protected int addEntry(final int previousCode, final byte character, final int maxTableSize) {
- if (tableSize < maxTableSize) {
- prefixes[tableSize] = previousCode;
- characters[tableSize] = character;
- return tableSize++;
- }
- return -1;
- }
- /**
- * Add entry for repeat of previousCode we haven't added, yet.
- *
- * @return new code for a repeat of the previous code or -1 if maxTableSize has been reached already
- * @throws IOException on error
- */
- protected int addRepeatOfPreviousCode() throws IOException {
- if (previousCode == -1) {
- // can't have a repeat for the very first code
- throw new IOException("The first code can't be a reference to its preceding code");
- }
- return addEntry(previousCode, previousCodeFirstChar);
- }
- @Override
- public void close() throws IOException {
- in.close();
- }
- /**
- * Read the next code and expand it.
- *
- * @return the expanded next code, negative on EOF
- * @throws IOException on error
- */
- protected abstract int decompressNextSymbol() throws IOException;
- /**
- * Expands the entry with index code to the output stack and may create a new entry
- *
- * @param code the code
- * @param addedUnfinishedEntry whether unfinished entries have been added
- * @return the new location of the output stack
- * @throws IOException on error
- */
- protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry) throws IOException {
- for (int entry = code; entry >= 0; entry = prefixes[entry]) {
- outputStack[--outputStackLocation] = characters[entry];
- }
- if (previousCode != -1 && !addedUnfinishedEntry) {
- addEntry(previousCode, outputStack[outputStackLocation]);
- }
- previousCode = code;
- previousCodeFirstChar = outputStack[outputStackLocation];
- return outputStackLocation;
- }
- protected int getClearCode() {
- return clearCode;
- }
- protected int getCodeSize() {
- return codeSize;
- }
- /**
- * @since 1.17
- */
- @Override
- public long getCompressedCount() {
- return in.getBytesRead();
- }
- protected int getPrefix(final int offset) {
- return prefixes[offset];
- }
- protected int getPrefixesLength() {
- return prefixes.length;
- }
- protected int getTableSize() {
- return tableSize;
- }
- protected void incrementCodeSize() {
- codeSize++;
- }
- /**
- * Initializes the arrays based on the maximum code size.
- *
- * @param maxCodeSize maximum code size
- * @throws IllegalArgumentException if {@code maxCodeSize} is out of bounds for {@code prefixes} and {@code characters}.
- */
- protected void initializeTables(final int maxCodeSize) {
- // maxCodeSize shifted cannot be less than 256, otherwise the loop in initializeTables() will throw an ArrayIndexOutOfBoundsException
- // maxCodeSize cannot be smaller than getCodeSize(), otherwise addEntry() will throw an ArrayIndexOutOfBoundsException
- if (1 << maxCodeSize < 256 || getCodeSize() > maxCodeSize) {
- // TODO test against prefixes.length and characters.length?
- throw new IllegalArgumentException("maxCodeSize " + maxCodeSize + " is out of bounds.");
- }
- final int maxTableSize = 1 << maxCodeSize;
- prefixes = new int[maxTableSize];
- characters = new byte[maxTableSize];
- outputStack = new byte[maxTableSize];
- outputStackLocation = maxTableSize;
- final int max = 1 << 8;
- for (int i = 0; i < max; i++) {
- prefixes[i] = -1;
- characters[i] = (byte) i;
- }
- }
- /**
- * Initializes the arrays based on the maximum code size. First checks that the estimated memory usage is below memoryLimitInKb
- *
- * @param maxCodeSize maximum code size
- * @param memoryLimitInKb maximum allowed estimated memory usage in Kb
- * @throws MemoryLimitException if estimated memory usage is greater than memoryLimitInKb
- * @throws IllegalArgumentException if {@code maxCodeSize} is not bigger than 0
- */
- protected void initializeTables(final int maxCodeSize, final int memoryLimitInKb) throws MemoryLimitException {
- if (maxCodeSize <= 0) {
- throw new IllegalArgumentException("maxCodeSize is " + maxCodeSize + ", must be bigger than 0");
- }
- if (memoryLimitInKb > -1) {
- final int maxTableSize = 1 << maxCodeSize;
- // account for potential overflow
- final long memoryUsageInBytes = (long) maxTableSize * 6; // (4 (prefixes) + 1 (characters) +1 (outputStack))
- final long memoryUsageInKb = memoryUsageInBytes >> 10;
- if (memoryUsageInKb > memoryLimitInKb) {
- throw new MemoryLimitException(memoryUsageInKb, memoryLimitInKb);
- }
- }
- initializeTables(maxCodeSize);
- }
- @Override
- public int read() throws IOException {
- final int ret = read(oneByte);
- if (ret < 0) {
- return ret;
- }
- return 0xff & oneByte[0];
- }
- @Override
- public int read(final byte[] b, final int off, final int len) throws IOException {
- if (len == 0) {
- return 0;
- }
- int bytesRead = readFromStack(b, off, len);
- while (len - bytesRead > 0) {
- final int result = decompressNextSymbol();
- if (result < 0) {
- if (bytesRead > 0) {
- count(bytesRead);
- return bytesRead;
- }
- return result;
- }
- bytesRead += readFromStack(b, off + bytesRead, len - bytesRead);
- }
- count(bytesRead);
- return bytesRead;
- }
- private int readFromStack(final byte[] b, final int off, final int len) {
- final int remainingInStack = outputStack.length - outputStackLocation;
- if (remainingInStack > 0) {
- final int maxLength = Math.min(remainingInStack, len);
- System.arraycopy(outputStack, outputStackLocation, b, off, maxLength);
- outputStackLocation += maxLength;
- return maxLength;
- }
- return 0;
- }
- /**
- * Reads the next code from the stream.
- *
- * @return the next code
- * @throws IOException on error
- */
- protected int readNextCode() throws IOException {
- if (codeSize > 31) {
- throw new IllegalArgumentException("Code size must not be bigger than 31");
- }
- return (int) in.readBits(codeSize);
- }
- protected void resetCodeSize() {
- setCodeSize(DEFAULT_CODE_SIZE);
- }
- protected void resetPreviousCode() {
- this.previousCode = -1;
- }
- /**
- * Sets the clear code based on the code size.
- *
- * @param codeSize code size
- */
- protected void setClearCode(final int codeSize) {
- clearCode = 1 << codeSize - 1;
- }
- protected void setCodeSize(final int cs) {
- this.codeSize = cs;
- }
- protected void setPrefix(final int offset, final int value) {
- prefixes[offset] = value;
- }
- protected void setTableSize(final int newSize) {
- tableSize = newSize;
- }
- }