1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * https://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.commons.compress.compressors.z;
20
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.nio.ByteOrder;
24
25 import org.apache.commons.compress.compressors.lzw.LZWInputStream;
26
27 /**
28 * Input stream that decompresses .Z files.
29 *
30 * @NotThreadSafe
31 * @since 1.7
32 */
33 public class ZCompressorInputStream extends LZWInputStream {
34
35 private static final int MAGIC_1 = 0x1f;
36 private static final int MAGIC_2 = 0x9d;
37 private static final int BLOCK_MODE_MASK = 0x80;
38 private static final int MAX_CODE_SIZE_MASK = 0x1f;
39
40 /**
41 * Checks if the signature matches what is expected for a Unix compress file.
42 *
43 * @param signature the bytes to check
44 * @param length the number of bytes to check
45 * @return true, if this stream is a Unix compress compressed stream, false otherwise
46 * @since 1.9
47 */
48 public static boolean matches(final byte[] signature, final int length) {
49 return length > 3 && signature[0] == MAGIC_1 && signature[1] == (byte) MAGIC_2;
50 }
51
52 private final boolean blockMode;
53 private final int maxCodeSize;
54 private long totalCodesRead;
55
56 /**
57 * Constructs a new instance.
58 *
59 * @param inputStream The underlying input stream.
60 * @throws IOException if an I/O error occurs.
61 */
62 public ZCompressorInputStream(final InputStream inputStream) throws IOException {
63 this(inputStream, -1);
64 }
65
66 /**
67 * Constructs a new instance.
68 *
69 * @param inputStream The underlying input stream.
70 * @param memoryLimitInKiB maximum allowed estimated memory usage in kibibytes.
71 * @throws IOException if an I/O error occurs.
72 */
73 public ZCompressorInputStream(final InputStream inputStream, final int memoryLimitInKiB) throws IOException {
74 super(inputStream, ByteOrder.LITTLE_ENDIAN);
75 final int firstByte = (int) in.readBits(8);
76 final int secondByte = (int) in.readBits(8);
77 final int thirdByte = (int) in.readBits(8);
78 if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) {
79 throw new IOException("Input is not in .Z format");
80 }
81 blockMode = (thirdByte & BLOCK_MODE_MASK) != 0;
82 maxCodeSize = thirdByte & MAX_CODE_SIZE_MASK;
83 if (blockMode) {
84 setClearCode(DEFAULT_CODE_SIZE);
85 }
86 initializeTables(maxCodeSize, memoryLimitInKiB);
87 clearEntries();
88 }
89
90 /**
91 * {@inheritDoc}
92 * <p>
93 * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
94 * warning.</strong>
95 * </p>
96 */
97 @Override
98 protected int addEntry(final int previousCode, final byte character) throws IOException {
99 final int maxTableSize = 1 << getCodeSize();
100 final int r = addEntry(previousCode, character, maxTableSize);
101 if (getTableSize() == maxTableSize && getCodeSize() < maxCodeSize) {
102 reAlignReading();
103 incrementCodeSize();
104 }
105 return r;
106 }
107
108 private void clearEntries() {
109 setTableSize((1 << 8) + (blockMode ? 1 : 0));
110 }
111
112 /**
113 * {@inheritDoc}
114 * <p>
115 * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
116 * warning.</strong>
117 * </p>
118 */
119 @Override
120 protected int decompressNextSymbol() throws IOException {
121 //
122 // table entry table entry
123 // _____________ _____
124 // table entry / \ / \
125 // ____________/ \ \
126 // / / \ / \ \
127 // +---+---+---+---+---+---+---+---+---+---+
128 // | . | . | . | . | . | . | . | . | . | . |
129 // +---+---+---+---+---+---+---+---+---+---+
130 // |<--------->|<------------->|<----->|<->|
131 // symbol symbol symbol symbol
132 //
133 final int code = readNextCode();
134 if (code < 0) {
135 return -1;
136 }
137 if (blockMode && code == getClearCode()) {
138 clearEntries();
139 reAlignReading();
140 resetCodeSize();
141 resetPreviousCode();
142 return 0;
143 }
144 boolean addedUnfinishedEntry = false;
145 if (code == getTableSize()) {
146 addRepeatOfPreviousCode();
147 addedUnfinishedEntry = true;
148 } else if (code > getTableSize()) {
149 throw new IOException(String.format("Invalid %d bit code 0x%x", getCodeSize(), code));
150 }
151 return expandCodeToOutputStack(code, addedUnfinishedEntry);
152 }
153
154 /**
155 * {@inheritDoc}
156 * <p>
157 * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
158 * warning.</strong>
159 * </p>
160 */
161 @Override
162 protected int readNextCode() throws IOException {
163 final int code = super.readNextCode();
164 if (code >= 0) {
165 ++totalCodesRead;
166 }
167 return code;
168 }
169
170 private void reAlignReading() throws IOException {
171 // "compress" works in multiples of 8 symbols, each codeBits bits long.
172 // When codeBits changes, the remaining unused symbols in the current
173 // group of 8 are still written out, in the old codeSize,
174 // as garbage values (usually zeroes) that need to be skipped.
175 long codeReadsToThrowAway = 8 - totalCodesRead % 8;
176 if (codeReadsToThrowAway == 8) {
177 codeReadsToThrowAway = 0;
178 }
179 for (long i = 0; i < codeReadsToThrowAway; i++) {
180 readNextCode();
181 }
182 in.clearBitCache();
183 }
184
185 }