001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lzw; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.nio.ByteOrder; 024 025import org.apache.commons.compress.MemoryLimitException; 026import org.apache.commons.compress.compressors.CompressorInputStream; 027import org.apache.commons.compress.utils.BitInputStream; 028import org.apache.commons.compress.utils.InputStreamStatistics; 029 030/** 031 * <p> 032 * Generic LZW implementation. It is used internally for the Z decompressor and the Unshrinking Zip file compression method, but may be useful for third-party 033 * projects in implementing their own LZW variations. 034 * </p> 035 * 036 * @NotThreadSafe 037 * @since 1.10 038 */ 039public abstract class LZWInputStream extends CompressorInputStream implements InputStreamStatistics { 040 041 /** 042 * Maximum code size in bits. 043 */ 044 private static final int MAX_CODE_SIZE = 31; 045 046 /** 047 * Default code size in bits. 048 */ 049 protected static final int DEFAULT_CODE_SIZE = 9; 050 051 /** 052 * Unused marker. 053 */ 054 protected static final int UNUSED_PREFIX = -1; 055 056 private final byte[] oneByte = new byte[1]; 057 058 /** 059 * Input. 060 */ 061 protected final BitInputStream in; 062 private int clearCode = -1; 063 private int codeSize = DEFAULT_CODE_SIZE; 064 private byte previousCodeFirstChar; 065 private int previousCode = UNUSED_PREFIX; 066 private int tableSize; 067 private int[] prefixes; 068 private byte[] characters; 069 private byte[] outputStack; 070 private int outputStackLocation; 071 072 /** 073 * Constructs a new instance. 074 * 075 * @param inputStream The underlying input stream. 076 * @param byteOrder the input byte order. 077 */ 078 protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) { 079 this.in = new BitInputStream(inputStream, byteOrder); 080 } 081 082 /** 083 * Add a new entry to the dictionary. 084 * 085 * @param previousCode the previous code 086 * @param character the next character to append 087 * @return the new code 088 * @throws IOException on error 089 */ 090 protected abstract int addEntry(int previousCode, byte character) throws IOException; 091 092 /** 093 * Adds a new entry if the maximum table size hasn't been exceeded and returns the new index. 094 * 095 * @param previousCode the previous code 096 * @param character the character to append 097 * @param maxTableSize the maximum table size 098 * @return the new code or -1 if maxTableSize has been reached already 099 */ 100 protected int addEntry(final int previousCode, final byte character, final int maxTableSize) { 101 if (tableSize < maxTableSize) { 102 prefixes[tableSize] = previousCode; 103 characters[tableSize] = character; 104 return tableSize++; 105 } 106 return -1; 107 } 108 109 /** 110 * Add entry for repeat of previousCode we haven't added, yet. 111 * 112 * @return new code for a repeat of the previous code or -1 if maxTableSize has been reached already 113 * @throws IOException on error 114 */ 115 protected int addRepeatOfPreviousCode() throws IOException { 116 if (previousCode == -1) { 117 // can't have a repeat for the very first code 118 throw new IOException("The first code can't be a reference to its preceding code"); 119 } 120 return addEntry(previousCode, previousCodeFirstChar); 121 } 122 123 @Override 124 public void close() throws IOException { 125 in.close(); 126 } 127 128 /** 129 * Reads the next code and expand it. 130 * 131 * @return the expanded next code, negative on EOF 132 * @throws IOException on error 133 */ 134 protected abstract int decompressNextSymbol() throws IOException; 135 136 /** 137 * Expands the entry with index code to the output stack and may create a new entry. 138 * 139 * @param code the code. 140 * @param addedUnfinishedEntry whether unfinished entries have been added. 141 * @return the new location of the output stack. 142 * @throws IOException if an I/O error occurs. 143 */ 144 protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry) throws IOException { 145 for (int entry = code; entry >= 0; entry = prefixes[entry]) { 146 outputStack[--outputStackLocation] = characters[entry]; 147 } 148 if (previousCode != -1 && !addedUnfinishedEntry) { 149 addEntry(previousCode, outputStack[outputStackLocation]); 150 } 151 previousCode = code; 152 previousCodeFirstChar = outputStack[outputStackLocation]; 153 return outputStackLocation; 154 } 155 156 /** 157 * Gets the clear code. 158 * 159 * @return the clear code. 160 */ 161 protected int getClearCode() { 162 return clearCode; 163 } 164 165 /** 166 * Gets the code size in bits. 167 * 168 * @return the code size in bits. 169 */ 170 protected int getCodeSize() { 171 return codeSize; 172 } 173 174 /** 175 * @since 1.17 176 */ 177 @Override 178 public long getCompressedCount() { 179 return in.getBytesRead(); 180 } 181 182 /** 183 * Gets the prefix at the given offset. 184 * 185 * @param offset offset to query. 186 * @return the prefix at the given offset. 187 */ 188 protected int getPrefix(final int offset) { 189 return prefixes[offset]; 190 } 191 192 /** 193 * Gets the prefixes length. 194 * 195 * @return the prefixes length. 196 */ 197 protected int getPrefixesLength() { 198 return prefixes.length; 199 } 200 201 /** 202 * Gets the table size. 203 * 204 * @return the table size. 205 */ 206 protected int getTableSize() { 207 return tableSize; 208 } 209 210 /** 211 * Increments the code size by one. 212 */ 213 protected void incrementCodeSize() { 214 codeSize++; 215 } 216 217 /** 218 * Initializes the arrays based on the maximum code size. 219 * 220 * @param maxCodeSize maximum code size 221 * @throws IllegalArgumentException if {@code maxCodeSize} is out of bounds for {@code prefixes} and {@code characters}. 222 */ 223 protected void initializeTables(final int maxCodeSize) { 224 // maxCodeSize shifted cannot be less than 256, otherwise the loop in initializeTables() will throw an ArrayIndexOutOfBoundsException 225 // maxCodeSize cannot be smaller than getCodeSize(), otherwise addEntry() will throw an ArrayIndexOutOfBoundsException 226 if (1 << maxCodeSize < 256 || getCodeSize() > maxCodeSize) { 227 // TODO test against prefixes.length and characters.length? 228 throw new IllegalArgumentException("maxCodeSize " + maxCodeSize + " is out of bounds."); 229 } 230 final int maxTableSize = 1 << maxCodeSize; 231 prefixes = new int[maxTableSize]; 232 characters = new byte[maxTableSize]; 233 outputStack = new byte[maxTableSize]; 234 outputStackLocation = maxTableSize; 235 final int max = 1 << 8; 236 for (int i = 0; i < max; i++) { 237 prefixes[i] = -1; 238 characters[i] = (byte) i; 239 } 240 } 241 242 /** 243 * Initializes the arrays based on the maximum code size. First checks that the estimated memory usage is below memoryLimitInKb 244 * 245 * @param maxCodeSize maximum code size 246 * @param memoryLimiKiB maximum allowed estimated memory usage in kibibytes (KiB). 247 * @throws MemoryLimitException if estimated memory usage is greater than memoryLimitKiB 248 * @throws IllegalArgumentException if {@code maxCodeSize} is not bigger than 0 249 */ 250 protected void initializeTables(final int maxCodeSize, final int memoryLimiKiB) throws MemoryLimitException { 251 if (maxCodeSize <= 0) { 252 throw new IllegalArgumentException("maxCodeSize is " + maxCodeSize + ", must be bigger than 0"); 253 } 254 255 if (memoryLimiKiB > -1) { 256 final int maxTableSize = 1 << maxCodeSize; 257 // account for potential overflow 258 final long memoryUsageBytes = (long) maxTableSize * 6; // (4 (prefixes) + 1 (characters) +1 (outputStack)) 259 final long memoryUsageKiB = memoryUsageBytes >> 10; 260 261 if (memoryUsageKiB > memoryLimiKiB) { 262 throw new MemoryLimitException(memoryUsageKiB, memoryLimiKiB); 263 } 264 } 265 initializeTables(maxCodeSize); 266 } 267 268 @Override 269 public int read() throws IOException { 270 final int ret = read(oneByte); 271 if (ret < 0) { 272 return ret; 273 } 274 return 0xff & oneByte[0]; 275 } 276 277 @Override 278 public int read(final byte[] b, final int off, final int len) throws IOException { 279 if (len == 0) { 280 return 0; 281 } 282 int bytesRead = readFromStack(b, off, len); 283 while (len - bytesRead > 0) { 284 final int result = decompressNextSymbol(); 285 if (result < 0) { 286 if (bytesRead > 0) { 287 count(bytesRead); 288 return bytesRead; 289 } 290 return result; 291 } 292 bytesRead += readFromStack(b, off + bytesRead, len - bytesRead); 293 } 294 count(bytesRead); 295 return bytesRead; 296 } 297 298 private int readFromStack(final byte[] b, final int off, final int len) { 299 final int remainingInStack = outputStack.length - outputStackLocation; 300 if (remainingInStack > 0) { 301 final int maxLength = Math.min(remainingInStack, len); 302 System.arraycopy(outputStack, outputStackLocation, b, off, maxLength); 303 outputStackLocation += maxLength; 304 return maxLength; 305 } 306 return 0; 307 } 308 309 /** 310 * Reads the next code from the stream. 311 * 312 * @return the next code 313 * @throws IOException on error 314 */ 315 protected int readNextCode() throws IOException { 316 if (codeSize > MAX_CODE_SIZE) { 317 throw new IllegalArgumentException("Code size must not be bigger than 31"); 318 } 319 return (int) in.readBits(codeSize); 320 } 321 322 /** 323 * Resets the code size to its default value. 324 */ 325 protected void resetCodeSize() { 326 setCodeSize(DEFAULT_CODE_SIZE); 327 } 328 329 /** 330 * Resets the previous code to its default value. 331 */ 332 protected void resetPreviousCode() { 333 this.previousCode = -1; 334 } 335 336 /** 337 * Sets the clear code based on the code size. 338 * 339 * @param codeSize code size 340 */ 341 protected void setClearCode(final int codeSize) { 342 clearCode = 1 << codeSize - 1; 343 } 344 345 /** 346 * Sets the code size in bits. 347 * 348 * @param codeSize the code size in bits. 349 */ 350 protected void setCodeSize(final int codeSize) { 351 this.codeSize = codeSize; 352 } 353 354 /** 355 * Sets the prefix at the given offset. 356 * 357 * @param offset the target offset. 358 * @param value the new value. 359 */ 360 protected void setPrefix(final int offset, final int value) { 361 prefixes[offset] = value; 362 } 363 364 /** 365 * Sets the table size. 366 * 367 * @param tableSize the new table size. 368 */ 369 protected void setTableSize(final int tableSize) { 370 this.tableSize = tableSize; 371 } 372 373}