001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.z;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.nio.ByteOrder;
024
025import org.apache.commons.compress.compressors.lzw.LZWInputStream;
026
027/**
028 * Input stream that decompresses .Z files.
029 *
030 * @NotThreadSafe
031 * @since 1.7
032 */
033public class ZCompressorInputStream extends LZWInputStream {
034
035    private static final int MAGIC_1 = 0x1f;
036    private static final int MAGIC_2 = 0x9d;
037    private static final int BLOCK_MODE_MASK = 0x80;
038    private static final int MAX_CODE_SIZE_MASK = 0x1f;
039
040    /**
041     * Checks if the signature matches what is expected for a Unix compress file.
042     *
043     * @param signature the bytes to check
044     * @param length    the number of bytes to check
045     * @return true, if this stream is a Unix compress compressed stream, false otherwise
046     * @since 1.9
047     */
048    public static boolean matches(final byte[] signature, final int length) {
049        return length > 3 && signature[0] == MAGIC_1 && signature[1] == (byte) MAGIC_2;
050    }
051
052    private final boolean blockMode;
053    private final int maxCodeSize;
054    private long totalCodesRead;
055
056    /**
057     * Constructs a new instance.
058     *
059     * @param inputStream The underlying input stream.
060     * @throws IOException if an I/O error occurs.
061     */
062    public ZCompressorInputStream(final InputStream inputStream) throws IOException {
063        this(inputStream, -1);
064    }
065
066    /**
067     * Constructs a new instance.
068     *
069     * @param inputStream The underlying input stream.
070     * @param memoryLimitInKiB maximum allowed estimated memory usage in kibibytes.
071     * @throws IOException if an I/O error occurs.
072     */
073    public ZCompressorInputStream(final InputStream inputStream, final int memoryLimitInKiB) throws IOException {
074        super(inputStream, ByteOrder.LITTLE_ENDIAN);
075        final int firstByte = (int) in.readBits(8);
076        final int secondByte = (int) in.readBits(8);
077        final int thirdByte = (int) in.readBits(8);
078        if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) {
079            throw new IOException("Input is not in .Z format");
080        }
081        blockMode = (thirdByte & BLOCK_MODE_MASK) != 0;
082        maxCodeSize = thirdByte & MAX_CODE_SIZE_MASK;
083        if (blockMode) {
084            setClearCode(DEFAULT_CODE_SIZE);
085        }
086        initializeTables(maxCodeSize, memoryLimitInKiB);
087        clearEntries();
088    }
089
090    /**
091     * {@inheritDoc}
092     * <p>
093     * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
094     * warning.</strong>
095     * </p>
096     */
097    @Override
098    protected int addEntry(final int previousCode, final byte character) throws IOException {
099        final int maxTableSize = 1 << getCodeSize();
100        final int r = addEntry(previousCode, character, maxTableSize);
101        if (getTableSize() == maxTableSize && getCodeSize() < maxCodeSize) {
102            reAlignReading();
103            incrementCodeSize();
104        }
105        return r;
106    }
107
108    private void clearEntries() {
109        setTableSize((1 << 8) + (blockMode ? 1 : 0));
110    }
111
112    /**
113     * {@inheritDoc}
114     * <p>
115     * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
116     * warning.</strong>
117     * </p>
118     */
119    @Override
120    protected int decompressNextSymbol() throws IOException {
121        //
122        // table entry table entry
123        // _____________ _____
124        // table entry / \ / \
125        // ____________/ \ \
126        // / / \ / \ \
127        // +---+---+---+---+---+---+---+---+---+---+
128        // | . | . | . | . | . | . | . | . | . | . |
129        // +---+---+---+---+---+---+---+---+---+---+
130        // |<--------->|<------------->|<----->|<->|
131        // symbol symbol symbol symbol
132        //
133        final int code = readNextCode();
134        if (code < 0) {
135            return -1;
136        }
137        if (blockMode && code == getClearCode()) {
138            clearEntries();
139            reAlignReading();
140            resetCodeSize();
141            resetPreviousCode();
142            return 0;
143        }
144        boolean addedUnfinishedEntry = false;
145        if (code == getTableSize()) {
146            addRepeatOfPreviousCode();
147            addedUnfinishedEntry = true;
148        } else if (code > getTableSize()) {
149            throw new IOException(String.format("Invalid %d bit code 0x%x", getCodeSize(), code));
150        }
151        return expandCodeToOutputStack(code, addedUnfinishedEntry);
152    }
153
154    /**
155     * {@inheritDoc}
156     * <p>
157     * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without
158     * warning.</strong>
159     * </p>
160     */
161    @Override
162    protected int readNextCode() throws IOException {
163        final int code = super.readNextCode();
164        if (code >= 0) {
165            ++totalCodesRead;
166        }
167        return code;
168    }
169
170    private void reAlignReading() throws IOException {
171        // "compress" works in multiples of 8 symbols, each codeBits bits long.
172        // When codeBits changes, the remaining unused symbols in the current
173        // group of 8 are still written out, in the old codeSize,
174        // as garbage values (usually zeroes) that need to be skipped.
175        long codeReadsToThrowAway = 8 - totalCodesRead % 8;
176        if (codeReadsToThrowAway == 8) {
177            codeReadsToThrowAway = 0;
178        }
179        for (long i = 0; i < codeReadsToThrowAway; i++) {
180            readNextCode();
181        }
182        in.clearBitCache();
183    }
184
185}