View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.compressors.z;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.nio.ByteOrder;
24  
25  import org.apache.commons.compress.compressors.lzw.LZWInputStream;
26  
27  /**
28   * Input stream that decompresses .Z files.
29   *
30   * @NotThreadSafe
31   * @since 1.7
32   */
33  public class ZCompressorInputStream extends LZWInputStream {
34  
35      private static final int MAGIC_1 = 0x1f;
36      private static final int MAGIC_2 = 0x9d;
37      private static final int BLOCK_MODE_MASK = 0x80;
38      private static final int MAX_CODE_SIZE_MASK = 0x1f;
39  
40      /**
41       * Checks if the signature matches what is expected for a Unix compress file.
42       *
43       * @param signature the bytes to check
44       * @param length    the number of bytes to check
45       * @return true, if this stream is a Unix compress compressed stream, false otherwise
46       * @since 1.9
47       */
48      public static boolean matches(final byte[] signature, final int length) {
49          return length > 3 && signature[0] == MAGIC_1 && signature[1] == (byte) MAGIC_2;
50      }
51  
52      private final boolean blockMode;
53      private final int maxCodeSize;
54      private long totalCodesRead;
55  
56      /**
57       * Constructs a new instance.
58       *
59       * @param inputStream The underlying input stream.
60       * @throws IOException if an I/O error occurs.
61       */
62      public ZCompressorInputStream(final InputStream inputStream) throws IOException {
63          this(inputStream, -1);
64      }
65  
66      /**
67       * Constructs a new instance.
68       *
69       * @param inputStream The underlying input stream.
70       * @param memoryLimitInKiB maximum allowed estimated memory usage in kibibytes.
71       * @throws IOException if an I/O error occurs.
72       */
73      public ZCompressorInputStream(final InputStream inputStream, final int memoryLimitInKiB) throws IOException {
74          super(inputStream, ByteOrder.LITTLE_ENDIAN);
75          final int firstByte = (int) in.readBits(8);
76          final int secondByte = (int) in.readBits(8);
77          final int thirdByte = (int) in.readBits(8);
78          if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) {
79              throw new IOException("Input is not in .Z format");
80          }
81          blockMode = (thirdByte & BLOCK_MODE_MASK) != 0;
82          maxCodeSize = thirdByte & MAX_CODE_SIZE_MASK;
83          if (blockMode) {
84              setClearCode(DEFAULT_CODE_SIZE);
85          }
86          initializeTables(maxCodeSize, memoryLimitInKiB);
87          clearEntries();
88      }
89  
90      /**
91       * {@inheritDoc}
92       * <p>
93       * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
94       * warning.</strong>
95       * </p>
96       */
97      @Override
98      protected int addEntry(final int previousCode, final byte character) throws IOException {
99          final int maxTableSize = 1 << getCodeSize();
100         final int r = addEntry(previousCode, character, maxTableSize);
101         if (getTableSize() == maxTableSize && getCodeSize() < maxCodeSize) {
102             reAlignReading();
103             incrementCodeSize();
104         }
105         return r;
106     }
107 
108     private void clearEntries() {
109         setTableSize((1 << 8) + (blockMode ? 1 : 0));
110     }
111 
112     /**
113      * {@inheritDoc}
114      * <p>
115      * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
116      * warning.</strong>
117      * </p>
118      */
119     @Override
120     protected int decompressNextSymbol() throws IOException {
121         //
122         // table entry table entry
123         // _____________ _____
124         // table entry / \ / \
125         // ____________/ \ \
126         // / / \ / \ \
127         // +---+---+---+---+---+---+---+---+---+---+
128         // | . | . | . | . | . | . | . | . | . | . |
129         // +---+---+---+---+---+---+---+---+---+---+
130         // |<--------->|<------------->|<----->|<->|
131         // symbol symbol symbol symbol
132         //
133         final int code = readNextCode();
134         if (code < 0) {
135             return -1;
136         }
137         if (blockMode && code == getClearCode()) {
138             clearEntries();
139             reAlignReading();
140             resetCodeSize();
141             resetPreviousCode();
142             return 0;
143         }
144         boolean addedUnfinishedEntry = false;
145         if (code == getTableSize()) {
146             addRepeatOfPreviousCode();
147             addedUnfinishedEntry = true;
148         } else if (code > getTableSize()) {
149             throw new IOException(String.format("Invalid %d bit code 0x%x", getCodeSize(), code));
150         }
151         return expandCodeToOutputStack(code, addedUnfinishedEntry);
152     }
153 
154     /**
155      * {@inheritDoc}
156      * <p>
157      * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
158      * warning.</strong>
159      * </p>
160      */
161     @Override
162     protected int readNextCode() throws IOException {
163         final int code = super.readNextCode();
164         if (code >= 0) {
165             ++totalCodesRead;
166         }
167         return code;
168     }
169 
170     private void reAlignReading() throws IOException {
171         // "compress" works in multiples of 8 symbols, each codeBits bits long.
172         // When codeBits changes, the remaining unused symbols in the current
173         // group of 8 are still written out, in the old codeSize,
174         // as garbage values (usually zeroes) that need to be skipped.
175         long codeReadsToThrowAway = 8 - totalCodesRead % 8;
176         if (codeReadsToThrowAway == 8) {
177             codeReadsToThrowAway = 0;
178         }
179         for (long i = 0; i < codeReadsToThrowAway; i++) {
180             readNextCode();
181         }
182         in.clearBitCache();
183     }
184 
185 }