LZWInputStream.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one
  3.  * or more contributor license agreements.  See the NOTICE file
  4.  * distributed with this work for additional information
  5.  * regarding copyright ownership.  The ASF licenses this file
  6.  * to you under the Apache License, Version 2.0 (the
  7.  * "License"); you may not use this file except in compliance
  8.  * with the License.  You may obtain a copy of the License at
  9.  *
  10.  * http://www.apache.org/licenses/LICENSE-2.0
  11.  *
  12.  * Unless required by applicable law or agreed to in writing,
  13.  * software distributed under the License is distributed on an
  14.  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15.  * KIND, either express or implied.  See the License for the
  16.  * specific language governing permissions and limitations
  17.  * under the License.
  18.  */
  19. package org.apache.commons.compress.compressors.lzw;

  20. import java.io.IOException;
  21. import java.io.InputStream;
  22. import java.nio.ByteOrder;

  23. import org.apache.commons.compress.MemoryLimitException;
  24. import org.apache.commons.compress.compressors.CompressorInputStream;
  25. import org.apache.commons.compress.utils.BitInputStream;
  26. import org.apache.commons.compress.utils.InputStreamStatistics;

  27. /**
  28.  * <p>
  29.  * Generic LZW implementation. It is used internally for the Z decompressor and the Unshrinking Zip file compression method, but may be useful for third-party
  30.  * projects in implementing their own LZW variations.
  31.  * </p>
  32.  *
  33.  * @NotThreadSafe
  34.  * @since 1.10
  35.  */
  36. public abstract class LZWInputStream extends CompressorInputStream implements InputStreamStatistics {
  37.     protected static final int DEFAULT_CODE_SIZE = 9;
  38.     protected static final int UNUSED_PREFIX = -1;

  39.     private final byte[] oneByte = new byte[1];

  40.     protected final BitInputStream in;
  41.     private int clearCode = -1;
  42.     private int codeSize = DEFAULT_CODE_SIZE;
  43.     private byte previousCodeFirstChar;
  44.     private int previousCode = UNUSED_PREFIX;
  45.     private int tableSize;
  46.     private int[] prefixes;
  47.     private byte[] characters;
  48.     private byte[] outputStack;
  49.     private int outputStackLocation;

  50.     protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) {
  51.         this.in = new BitInputStream(inputStream, byteOrder);
  52.     }

  53.     /**
  54.      * Add a new entry to the dictionary.
  55.      *
  56.      * @param previousCode the previous code
  57.      * @param character    the next character to append
  58.      * @return the new code
  59.      * @throws IOException on error
  60.      */
  61.     protected abstract int addEntry(int previousCode, byte character) throws IOException;

  62.     /**
  63.      * Adds a new entry if the maximum table size hasn't been exceeded and returns the new index.
  64.      *
  65.      * @param previousCode the previous code
  66.      * @param character    the character to append
  67.      * @param maxTableSize the maximum table size
  68.      * @return the new code or -1 if maxTableSize has been reached already
  69.      */
  70.     protected int addEntry(final int previousCode, final byte character, final int maxTableSize) {
  71.         if (tableSize < maxTableSize) {
  72.             prefixes[tableSize] = previousCode;
  73.             characters[tableSize] = character;
  74.             return tableSize++;
  75.         }
  76.         return -1;
  77.     }

  78.     /**
  79.      * Add entry for repeat of previousCode we haven't added, yet.
  80.      *
  81.      * @return new code for a repeat of the previous code or -1 if maxTableSize has been reached already
  82.      * @throws IOException on error
  83.      */
  84.     protected int addRepeatOfPreviousCode() throws IOException {
  85.         if (previousCode == -1) {
  86.             // can't have a repeat for the very first code
  87.             throw new IOException("The first code can't be a reference to its preceding code");
  88.         }
  89.         return addEntry(previousCode, previousCodeFirstChar);
  90.     }

  91.     @Override
  92.     public void close() throws IOException {
  93.         in.close();
  94.     }

  95.     /**
  96.      * Read the next code and expand it.
  97.      *
  98.      * @return the expanded next code, negative on EOF
  99.      * @throws IOException on error
  100.      */
  101.     protected abstract int decompressNextSymbol() throws IOException;

  102.     /**
  103.      * Expands the entry with index code to the output stack and may create a new entry
  104.      *
  105.      * @param code                 the code
  106.      * @param addedUnfinishedEntry whether unfinished entries have been added
  107.      * @return the new location of the output stack
  108.      * @throws IOException on error
  109.      */
  110.     protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry) throws IOException {
  111.         for (int entry = code; entry >= 0; entry = prefixes[entry]) {
  112.             outputStack[--outputStackLocation] = characters[entry];
  113.         }
  114.         if (previousCode != -1 && !addedUnfinishedEntry) {
  115.             addEntry(previousCode, outputStack[outputStackLocation]);
  116.         }
  117.         previousCode = code;
  118.         previousCodeFirstChar = outputStack[outputStackLocation];
  119.         return outputStackLocation;
  120.     }

  121.     protected int getClearCode() {
  122.         return clearCode;
  123.     }

  124.     protected int getCodeSize() {
  125.         return codeSize;
  126.     }

  127.     /**
  128.      * @since 1.17
  129.      */
  130.     @Override
  131.     public long getCompressedCount() {
  132.         return in.getBytesRead();
  133.     }

  134.     protected int getPrefix(final int offset) {
  135.         return prefixes[offset];
  136.     }

  137.     protected int getPrefixesLength() {
  138.         return prefixes.length;
  139.     }

  140.     protected int getTableSize() {
  141.         return tableSize;
  142.     }

  143.     protected void incrementCodeSize() {
  144.         codeSize++;
  145.     }

  146.     /**
  147.      * Initializes the arrays based on the maximum code size.
  148.      *
  149.      * @param maxCodeSize maximum code size
  150.      * @throws IllegalArgumentException if {@code maxCodeSize} is out of bounds for {@code prefixes} and {@code characters}.
  151.      */
  152.     protected void initializeTables(final int maxCodeSize) {
  153.         // maxCodeSize shifted cannot be less than 256, otherwise the loop in initializeTables() will throw an ArrayIndexOutOfBoundsException
  154.         // maxCodeSize cannot be smaller than getCodeSize(), otherwise addEntry() will throw an ArrayIndexOutOfBoundsException
  155.         if (1 << maxCodeSize < 256 || getCodeSize() > maxCodeSize) {
  156.             // TODO test against prefixes.length and characters.length?
  157.             throw new IllegalArgumentException("maxCodeSize " + maxCodeSize + " is out of bounds.");
  158.         }
  159.         final int maxTableSize = 1 << maxCodeSize;
  160.         prefixes = new int[maxTableSize];
  161.         characters = new byte[maxTableSize];
  162.         outputStack = new byte[maxTableSize];
  163.         outputStackLocation = maxTableSize;
  164.         final int max = 1 << 8;
  165.         for (int i = 0; i < max; i++) {
  166.             prefixes[i] = -1;
  167.             characters[i] = (byte) i;
  168.         }
  169.     }

  170.     /**
  171.      * Initializes the arrays based on the maximum code size. First checks that the estimated memory usage is below memoryLimitInKb
  172.      *
  173.      * @param maxCodeSize     maximum code size
  174.      * @param memoryLimitInKb maximum allowed estimated memory usage in Kb
  175.      * @throws MemoryLimitException     if estimated memory usage is greater than memoryLimitInKb
  176.      * @throws IllegalArgumentException if {@code maxCodeSize} is not bigger than 0
  177.      */
  178.     protected void initializeTables(final int maxCodeSize, final int memoryLimitInKb) throws MemoryLimitException {
  179.         if (maxCodeSize <= 0) {
  180.             throw new IllegalArgumentException("maxCodeSize is " + maxCodeSize + ", must be bigger than 0");
  181.         }

  182.         if (memoryLimitInKb > -1) {
  183.             final int maxTableSize = 1 << maxCodeSize;
  184.             // account for potential overflow
  185.             final long memoryUsageInBytes = (long) maxTableSize * 6; // (4 (prefixes) + 1 (characters) +1 (outputStack))
  186.             final long memoryUsageInKb = memoryUsageInBytes >> 10;

  187.             if (memoryUsageInKb > memoryLimitInKb) {
  188.                 throw new MemoryLimitException(memoryUsageInKb, memoryLimitInKb);
  189.             }
  190.         }
  191.         initializeTables(maxCodeSize);
  192.     }

  193.     @Override
  194.     public int read() throws IOException {
  195.         final int ret = read(oneByte);
  196.         if (ret < 0) {
  197.             return ret;
  198.         }
  199.         return 0xff & oneByte[0];
  200.     }

  201.     @Override
  202.     public int read(final byte[] b, final int off, final int len) throws IOException {
  203.         if (len == 0) {
  204.             return 0;
  205.         }
  206.         int bytesRead = readFromStack(b, off, len);
  207.         while (len - bytesRead > 0) {
  208.             final int result = decompressNextSymbol();
  209.             if (result < 0) {
  210.                 if (bytesRead > 0) {
  211.                     count(bytesRead);
  212.                     return bytesRead;
  213.                 }
  214.                 return result;
  215.             }
  216.             bytesRead += readFromStack(b, off + bytesRead, len - bytesRead);
  217.         }
  218.         count(bytesRead);
  219.         return bytesRead;
  220.     }

  221.     private int readFromStack(final byte[] b, final int off, final int len) {
  222.         final int remainingInStack = outputStack.length - outputStackLocation;
  223.         if (remainingInStack > 0) {
  224.             final int maxLength = Math.min(remainingInStack, len);
  225.             System.arraycopy(outputStack, outputStackLocation, b, off, maxLength);
  226.             outputStackLocation += maxLength;
  227.             return maxLength;
  228.         }
  229.         return 0;
  230.     }

  231.     /**
  232.      * Reads the next code from the stream.
  233.      *
  234.      * @return the next code
  235.      * @throws IOException on error
  236.      */
  237.     protected int readNextCode() throws IOException {
  238.         if (codeSize > 31) {
  239.             throw new IllegalArgumentException("Code size must not be bigger than 31");
  240.         }
  241.         return (int) in.readBits(codeSize);
  242.     }

  243.     protected void resetCodeSize() {
  244.         setCodeSize(DEFAULT_CODE_SIZE);
  245.     }

  246.     protected void resetPreviousCode() {
  247.         this.previousCode = -1;
  248.     }

  249.     /**
  250.      * Sets the clear code based on the code size.
  251.      *
  252.      * @param codeSize code size
  253.      */
  254.     protected void setClearCode(final int codeSize) {
  255.         clearCode = 1 << codeSize - 1;
  256.     }

  257.     protected void setCodeSize(final int cs) {
  258.         this.codeSize = cs;
  259.     }

  260.     protected void setPrefix(final int offset, final int value) {
  261.         prefixes[offset] = value;
  262.     }

  263.     protected void setTableSize(final int newSize) {
  264.         tableSize = newSize;
  265.     }

  266. }