ZCompressorInputStream.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one
  3.  * or more contributor license agreements.  See the NOTICE file
  4.  * distributed with this work for additional information
  5.  * regarding copyright ownership.  The ASF licenses this file
  6.  * to you under the Apache License, Version 2.0 (the
  7.  * "License"); you may not use this file except in compliance
  8.  * with the License.  You may obtain a copy of the License at
  9.  *
  10.  * http://www.apache.org/licenses/LICENSE-2.0
  11.  *
  12.  * Unless required by applicable law or agreed to in writing,
  13.  * software distributed under the License is distributed on an
  14.  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15.  * KIND, either express or implied.  See the License for the
  16.  * specific language governing permissions and limitations
  17.  * under the License.
  18.  */
  19. package org.apache.commons.compress.compressors.z;

  20. import java.io.IOException;
  21. import java.io.InputStream;
  22. import java.nio.ByteOrder;

  23. import org.apache.commons.compress.compressors.lzw.LZWInputStream;

  24. /**
  25.  * Input stream that decompresses .Z files.
  26.  *
  27.  * @NotThreadSafe
  28.  * @since 1.7
  29.  */
  30. public class ZCompressorInputStream extends LZWInputStream {
  31.     private static final int MAGIC_1 = 0x1f;
  32.     private static final int MAGIC_2 = 0x9d;
  33.     private static final int BLOCK_MODE_MASK = 0x80;
  34.     private static final int MAX_CODE_SIZE_MASK = 0x1f;

  35.     /**
  36.      * Checks if the signature matches what is expected for a UNIX compress file.
  37.      *
  38.      * @param signature the bytes to check
  39.      * @param length    the number of bytes to check
  40.      * @return true, if this stream is a UNIX compress compressed stream, false otherwise
  41.      *
  42.      * @since 1.9
  43.      */
  44.     public static boolean matches(final byte[] signature, final int length) {
  45.         return length > 3 && signature[0] == MAGIC_1 && signature[1] == (byte) MAGIC_2;
  46.     }

  47.     private final boolean blockMode;
  48.     private final int maxCodeSize;

  49.     private long totalCodesRead;

  50.     public ZCompressorInputStream(final InputStream inputStream) throws IOException {
  51.         this(inputStream, -1);
  52.     }

  53.     public ZCompressorInputStream(final InputStream inputStream, final int memoryLimitInKb) throws IOException {
  54.         super(inputStream, ByteOrder.LITTLE_ENDIAN);
  55.         final int firstByte = (int) in.readBits(8);
  56.         final int secondByte = (int) in.readBits(8);
  57.         final int thirdByte = (int) in.readBits(8);
  58.         if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) {
  59.             throw new IOException("Input is not in .Z format");
  60.         }
  61.         blockMode = (thirdByte & BLOCK_MODE_MASK) != 0;
  62.         maxCodeSize = thirdByte & MAX_CODE_SIZE_MASK;
  63.         if (blockMode) {
  64.             setClearCode(DEFAULT_CODE_SIZE);
  65.         }
  66.         initializeTables(maxCodeSize, memoryLimitInKb);
  67.         clearEntries();
  68.     }

  69.     /**
  70.      * {@inheritDoc}
  71.      * <p>
  72.      * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
  73.      * warning.</strong>
  74.      * </p>
  75.      */
  76.     @Override
  77.     protected int addEntry(final int previousCode, final byte character) throws IOException {
  78.         final int maxTableSize = 1 << getCodeSize();
  79.         final int r = addEntry(previousCode, character, maxTableSize);
  80.         if (getTableSize() == maxTableSize && getCodeSize() < maxCodeSize) {
  81.             reAlignReading();
  82.             incrementCodeSize();
  83.         }
  84.         return r;
  85.     }

  86.     private void clearEntries() {
  87.         setTableSize((1 << 8) + (blockMode ? 1 : 0));
  88.     }

  89.     /**
  90.      * {@inheritDoc}
  91.      * <p>
  92.      * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
  93.      * warning.</strong>
  94.      * </p>
  95.      */
  96.     @Override
  97.     protected int decompressNextSymbol() throws IOException {
  98.         //
  99.         // table entry table entry
  100.         // _____________ _____
  101.         // table entry / \ / \
  102.         // ____________/ \ \
  103.         // / / \ / \ \
  104.         // +---+---+---+---+---+---+---+---+---+---+
  105.         // | . | . | . | . | . | . | . | . | . | . |
  106.         // +---+---+---+---+---+---+---+---+---+---+
  107.         // |<--------->|<------------->|<----->|<->|
  108.         // symbol symbol symbol symbol
  109.         //
  110.         final int code = readNextCode();
  111.         if (code < 0) {
  112.             return -1;
  113.         }
  114.         if (blockMode && code == getClearCode()) {
  115.             clearEntries();
  116.             reAlignReading();
  117.             resetCodeSize();
  118.             resetPreviousCode();
  119.             return 0;
  120.         }
  121.         boolean addedUnfinishedEntry = false;
  122.         if (code == getTableSize()) {
  123.             addRepeatOfPreviousCode();
  124.             addedUnfinishedEntry = true;
  125.         } else if (code > getTableSize()) {
  126.             throw new IOException(String.format("Invalid %d bit code 0x%x", getCodeSize(), code));
  127.         }
  128.         return expandCodeToOutputStack(code, addedUnfinishedEntry);
  129.     }

  130.     /**
  131.      * {@inheritDoc}
  132.      * <p>
  133.      * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
  134.      * warning.</strong>
  135.      * </p>
  136.      */
  137.     @Override
  138.     protected int readNextCode() throws IOException {
  139.         final int code = super.readNextCode();
  140.         if (code >= 0) {
  141.             ++totalCodesRead;
  142.         }
  143.         return code;
  144.     }

  145.     private void reAlignReading() throws IOException {
  146.         // "compress" works in multiples of 8 symbols, each codeBits bits long.
  147.         // When codeBits changes, the remaining unused symbols in the current
  148.         // group of 8 are still written out, in the old codeSize,
  149.         // as garbage values (usually zeroes) that need to be skipped.
  150.         long codeReadsToThrowAway = 8 - totalCodesRead % 8;
  151.         if (codeReadsToThrowAway == 8) {
  152.             codeReadsToThrowAway = 0;
  153.         }
  154.         for (long i = 0; i < codeReadsToThrowAway; i++) {
  155.             readNextCode();
  156.         }
  157.         in.clearBitCache();
  158.     }

  159. }