View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.z;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.nio.ByteOrder;
24  
25  import org.apache.commons.compress.compressors.lzw.LZWInputStream;
26  
27  /**
28   * Input stream that decompresses .Z files.
29   *
30   * @NotThreadSafe
31   * @since 1.7
32   */
33  public class ZCompressorInputStream extends LZWInputStream {
34      private static final int MAGIC_1 = 0x1f;
35      private static final int MAGIC_2 = 0x9d;
36      private static final int BLOCK_MODE_MASK = 0x80;
37      private static final int MAX_CODE_SIZE_MASK = 0x1f;
38  
39      /**
40       * Checks if the signature matches what is expected for a Unix compress file.
41       *
42       * @param signature the bytes to check
43       * @param length    the number of bytes to check
44       * @return true, if this stream is a Unix compress compressed stream, false otherwise
45       *
46       * @since 1.9
47       */
48      public static boolean matches(final byte[] signature, final int length) {
49          return length > 3 && signature[0] == MAGIC_1 && signature[1] == (byte) MAGIC_2;
50      }
51  
52      private final boolean blockMode;
53      private final int maxCodeSize;
54  
55      private long totalCodesRead;
56  
57      public ZCompressorInputStream(final InputStream inputStream) throws IOException {
58          this(inputStream, -1);
59      }
60  
61      public ZCompressorInputStream(final InputStream inputStream, final int memoryLimitInKb) throws IOException {
62          super(inputStream, ByteOrder.LITTLE_ENDIAN);
63          final int firstByte = (int) in.readBits(8);
64          final int secondByte = (int) in.readBits(8);
65          final int thirdByte = (int) in.readBits(8);
66          if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) {
67              throw new IOException("Input is not in .Z format");
68          }
69          blockMode = (thirdByte & BLOCK_MODE_MASK) != 0;
70          maxCodeSize = thirdByte & MAX_CODE_SIZE_MASK;
71          if (blockMode) {
72              setClearCode(DEFAULT_CODE_SIZE);
73          }
74          initializeTables(maxCodeSize, memoryLimitInKb);
75          clearEntries();
76      }
77  
78      /**
79       * {@inheritDoc}
80       * <p>
81       * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
82       * warning.</strong>
83       * </p>
84       */
85      @Override
86      protected int addEntry(final int previousCode, final byte character) throws IOException {
87          final int maxTableSize = 1 << getCodeSize();
88          final int r = addEntry(previousCode, character, maxTableSize);
89          if (getTableSize() == maxTableSize && getCodeSize() < maxCodeSize) {
90              reAlignReading();
91              incrementCodeSize();
92          }
93          return r;
94      }
95  
96      private void clearEntries() {
97          setTableSize((1 << 8) + (blockMode ? 1 : 0));
98      }
99  
100     /**
101      * {@inheritDoc}
102      * <p>
103      * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
104      * warning.</strong>
105      * </p>
106      */
107     @Override
108     protected int decompressNextSymbol() throws IOException {
109         //
110         // table entry table entry
111         // _____________ _____
112         // table entry / \ / \
113         // ____________/ \ \
114         // / / \ / \ \
115         // +---+---+---+---+---+---+---+---+---+---+
116         // | . | . | . | . | . | . | . | . | . | . |
117         // +---+---+---+---+---+---+---+---+---+---+
118         // |<--------->|<------------->|<----->|<->|
119         // symbol symbol symbol symbol
120         //
121         final int code = readNextCode();
122         if (code < 0) {
123             return -1;
124         }
125         if (blockMode && code == getClearCode()) {
126             clearEntries();
127             reAlignReading();
128             resetCodeSize();
129             resetPreviousCode();
130             return 0;
131         }
132         boolean addedUnfinishedEntry = false;
133         if (code == getTableSize()) {
134             addRepeatOfPreviousCode();
135             addedUnfinishedEntry = true;
136         } else if (code > getTableSize()) {
137             throw new IOException(String.format("Invalid %d bit code 0x%x", getCodeSize(), code));
138         }
139         return expandCodeToOutputStack(code, addedUnfinishedEntry);
140     }
141 
142     /**
143      * {@inheritDoc}
144      * <p>
145      * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
146      * warning.</strong>
147      * </p>
148      */
149     @Override
150     protected int readNextCode() throws IOException {
151         final int code = super.readNextCode();
152         if (code >= 0) {
153             ++totalCodesRead;
154         }
155         return code;
156     }
157 
158     private void reAlignReading() throws IOException {
159         // "compress" works in multiples of 8 symbols, each codeBits bits long.
160         // When codeBits changes, the remaining unused symbols in the current
161         // group of 8 are still written out, in the old codeSize,
162         // as garbage values (usually zeroes) that need to be skipped.
163         long codeReadsToThrowAway = 8 - totalCodesRead % 8;
164         if (codeReadsToThrowAway == 8) {
165             codeReadsToThrowAway = 0;
166         }
167         for (long i = 0; i < codeReadsToThrowAway; i++) {
168             readNextCode();
169         }
170         in.clearBitCache();
171     }
172 
173 }