001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.z; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.nio.ByteOrder; 024 025import org.apache.commons.compress.compressors.lzw.LZWInputStream; 026 027/** 028 * Input stream that decompresses .Z files. 029 * 030 * @NotThreadSafe 031 * @since 1.7 032 */ 033public class ZCompressorInputStream extends LZWInputStream { 034 035 private static final int MAGIC_1 = 0x1f; 036 private static final int MAGIC_2 = 0x9d; 037 private static final int BLOCK_MODE_MASK = 0x80; 038 private static final int MAX_CODE_SIZE_MASK = 0x1f; 039 040 /** 041 * Checks if the signature matches what is expected for a Unix compress file. 042 * 043 * @param signature the bytes to check 044 * @param length the number of bytes to check 045 * @return true, if this stream is a Unix compress compressed stream, false otherwise 046 * @since 1.9 047 */ 048 public static boolean matches(final byte[] signature, final int length) { 049 return length > 3 && signature[0] == MAGIC_1 && signature[1] == (byte) MAGIC_2; 050 } 051 052 private final boolean blockMode; 053 private final int maxCodeSize; 054 private long totalCodesRead; 055 056 /** 057 * Constructs a new instance. 058 * 059 * @param inputStream The underlying input stream. 060 * @throws IOException if an I/O error occurs. 061 */ 062 public ZCompressorInputStream(final InputStream inputStream) throws IOException { 063 this(inputStream, -1); 064 } 065 066 /** 067 * Constructs a new instance. 068 * 069 * @param inputStream The underlying input stream. 070 * @param memoryLimitInKiB maximum allowed estimated memory usage in kibibytes. 071 * @throws IOException if an I/O error occurs. 072 */ 073 public ZCompressorInputStream(final InputStream inputStream, final int memoryLimitInKiB) throws IOException { 074 super(inputStream, ByteOrder.LITTLE_ENDIAN); 075 final int firstByte = (int) in.readBits(8); 076 final int secondByte = (int) in.readBits(8); 077 final int thirdByte = (int) in.readBits(8); 078 if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) { 079 throw new IOException("Input is not in .Z format"); 080 } 081 blockMode = (thirdByte & BLOCK_MODE_MASK) != 0; 082 maxCodeSize = thirdByte & MAX_CODE_SIZE_MASK; 083 if (blockMode) { 084 setClearCode(DEFAULT_CODE_SIZE); 085 } 086 initializeTables(maxCodeSize, memoryLimitInKiB); 087 clearEntries(); 088 } 089 090 /** 091 * {@inheritDoc} 092 * <p> 093 * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without 094 * warning.</strong> 095 * </p> 096 */ 097 @Override 098 protected int addEntry(final int previousCode, final byte character) throws IOException { 099 final int maxTableSize = 1 << getCodeSize(); 100 final int r = addEntry(previousCode, character, maxTableSize); 101 if (getTableSize() == maxTableSize && getCodeSize() < maxCodeSize) { 102 reAlignReading(); 103 incrementCodeSize(); 104 } 105 return r; 106 } 107 108 private void clearEntries() { 109 setTableSize((1 << 8) + (blockMode ? 1 : 0)); 110 } 111 112 /** 113 * {@inheritDoc} 114 * <p> 115 * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without 116 * warning.</strong> 117 * </p> 118 */ 119 @Override 120 protected int decompressNextSymbol() throws IOException { 121 // 122 // table entry table entry 123 // _____________ _____ 124 // table entry / \ / \ 125 // ____________/ \ \ 126 // / / \ / \ \ 127 // +---+---+---+---+---+---+---+---+---+---+ 128 // | . | . | . | . | . | . | . | . | . | . | 129 // +---+---+---+---+---+---+---+---+---+---+ 130 // |<--------->|<------------->|<----->|<->| 131 // symbol symbol symbol symbol 132 // 133 final int code = readNextCode(); 134 if (code < 0) { 135 return -1; 136 } 137 if (blockMode && code == getClearCode()) { 138 clearEntries(); 139 reAlignReading(); 140 resetCodeSize(); 141 resetPreviousCode(); 142 return 0; 143 } 144 boolean addedUnfinishedEntry = false; 145 if (code == getTableSize()) { 146 addRepeatOfPreviousCode(); 147 addedUnfinishedEntry = true; 148 } else if (code > getTableSize()) { 149 throw new IOException(String.format("Invalid %d bit code 0x%x", getCodeSize(), code)); 150 } 151 return expandCodeToOutputStack(code, addedUnfinishedEntry); 152 } 153 154 /** 155 * {@inheritDoc} 156 * <p> 157 * <strong>This method is only protected for technical reasons and is not part of Commons Compress' published API. It may change or disappear without 158 * warning.</strong> 159 * </p> 160 */ 161 @Override 162 protected int readNextCode() throws IOException { 163 final int code = super.readNextCode(); 164 if (code >= 0) { 165 ++totalCodesRead; 166 } 167 return code; 168 } 169 170 private void reAlignReading() throws IOException { 171 // "compress" works in multiples of 8 symbols, each codeBits bits long. 172 // When codeBits changes, the remaining unused symbols in the current 173 // group of 8 are still written out, in the old codeSize, 174 // as garbage values (usually zeroes) that need to be skipped. 175 long codeReadsToThrowAway = 8 - totalCodesRead % 8; 176 if (codeReadsToThrowAway == 8) { 177 codeReadsToThrowAway = 0; 178 } 179 for (long i = 0; i < codeReadsToThrowAway; i++) { 180 readNextCode(); 181 } 182 in.clearBitCache(); 183 } 184 185}