001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.harmony.pack200; 020 021import java.io.EOFException; 022import java.io.IOException; 023import java.io.InputStream; 024import java.util.Arrays; 025import java.util.HashMap; 026import java.util.Map; 027 028/** 029 * CodecEncoding is used to get the right Codec for a given meta-encoding. 030 */ 031public class CodecEncoding { 032 033 private static final int[] EMPTY_INT_ARRAY = {}; 034 035 /** 036 * The canonical encodings are defined to allow a single byte to represent one of the standard encodings. The following values are defined in the Pack200 037 * specification, and this array cannot be changed. 038 */ 039 private static final BHSDCodec[] canonicalCodec = { null, new BHSDCodec(1, 256), new BHSDCodec(1, 256, 1), new BHSDCodec(1, 256, 0, 1), 040 new BHSDCodec(1, 256, 1, 1), new BHSDCodec(2, 256), new BHSDCodec(2, 256, 1), new BHSDCodec(2, 256, 0, 1), new BHSDCodec(2, 256, 1, 1), 041 new BHSDCodec(3, 256), new BHSDCodec(3, 256, 1), new BHSDCodec(3, 256, 0, 1), new BHSDCodec(3, 256, 1, 1), new BHSDCodec(4, 256), 042 new BHSDCodec(4, 256, 1), new BHSDCodec(4, 256, 0, 1), new BHSDCodec(4, 256, 1, 1), new BHSDCodec(5, 4), new BHSDCodec(5, 4, 1), 043 new BHSDCodec(5, 4, 2), new BHSDCodec(5, 16), new BHSDCodec(5, 16, 1), new BHSDCodec(5, 16, 2), new BHSDCodec(5, 32), new BHSDCodec(5, 32, 1), 044 new BHSDCodec(5, 32, 2), new BHSDCodec(5, 64), new BHSDCodec(5, 64, 1), new BHSDCodec(5, 64, 2), new BHSDCodec(5, 128), new BHSDCodec(5, 128, 1), 045 new BHSDCodec(5, 128, 2), new BHSDCodec(5, 4, 0, 1), new BHSDCodec(5, 4, 1, 1), new BHSDCodec(5, 4, 2, 1), new BHSDCodec(5, 16, 0, 1), 046 new BHSDCodec(5, 16, 1, 1), new BHSDCodec(5, 16, 2, 1), new BHSDCodec(5, 32, 0, 1), new BHSDCodec(5, 32, 1, 1), new BHSDCodec(5, 32, 2, 1), 047 new BHSDCodec(5, 64, 0, 1), new BHSDCodec(5, 64, 1, 1), new BHSDCodec(5, 64, 2, 1), new BHSDCodec(5, 128, 0, 1), new BHSDCodec(5, 128, 1, 1), 048 new BHSDCodec(5, 128, 2, 1), new BHSDCodec(2, 192), new BHSDCodec(2, 224), new BHSDCodec(2, 240), new BHSDCodec(2, 248), new BHSDCodec(2, 252), 049 new BHSDCodec(2, 8, 0, 1), new BHSDCodec(2, 8, 1, 1), new BHSDCodec(2, 16, 0, 1), new BHSDCodec(2, 16, 1, 1), new BHSDCodec(2, 32, 0, 1), 050 new BHSDCodec(2, 32, 1, 1), new BHSDCodec(2, 64, 0, 1), new BHSDCodec(2, 64, 1, 1), new BHSDCodec(2, 128, 0, 1), new BHSDCodec(2, 128, 1, 1), 051 new BHSDCodec(2, 192, 0, 1), new BHSDCodec(2, 192, 1, 1), new BHSDCodec(2, 224, 0, 1), new BHSDCodec(2, 224, 1, 1), new BHSDCodec(2, 240, 0, 1), 052 new BHSDCodec(2, 240, 1, 1), new BHSDCodec(2, 248, 0, 1), new BHSDCodec(2, 248, 1, 1), new BHSDCodec(3, 192), new BHSDCodec(3, 224), 053 new BHSDCodec(3, 240), new BHSDCodec(3, 248), new BHSDCodec(3, 252), new BHSDCodec(3, 8, 0, 1), new BHSDCodec(3, 8, 1, 1), 054 new BHSDCodec(3, 16, 0, 1), new BHSDCodec(3, 16, 1, 1), new BHSDCodec(3, 32, 0, 1), new BHSDCodec(3, 32, 1, 1), new BHSDCodec(3, 64, 0, 1), 055 new BHSDCodec(3, 64, 1, 1), new BHSDCodec(3, 128, 0, 1), new BHSDCodec(3, 128, 1, 1), new BHSDCodec(3, 192, 0, 1), new BHSDCodec(3, 192, 1, 1), 056 new BHSDCodec(3, 224, 0, 1), new BHSDCodec(3, 224, 1, 1), new BHSDCodec(3, 240, 0, 1), new BHSDCodec(3, 240, 1, 1), new BHSDCodec(3, 248, 0, 1), 057 new BHSDCodec(3, 248, 1, 1), new BHSDCodec(4, 192), new BHSDCodec(4, 224), new BHSDCodec(4, 240), new BHSDCodec(4, 248), new BHSDCodec(4, 252), 058 new BHSDCodec(4, 8, 0, 1), new BHSDCodec(4, 8, 1, 1), new BHSDCodec(4, 16, 0, 1), new BHSDCodec(4, 16, 1, 1), new BHSDCodec(4, 32, 0, 1), 059 new BHSDCodec(4, 32, 1, 1), new BHSDCodec(4, 64, 0, 1), new BHSDCodec(4, 64, 1, 1), new BHSDCodec(4, 128, 0, 1), new BHSDCodec(4, 128, 1, 1), 060 new BHSDCodec(4, 192, 0, 1), new BHSDCodec(4, 192, 1, 1), new BHSDCodec(4, 224, 0, 1), new BHSDCodec(4, 224, 1, 1), new BHSDCodec(4, 240, 0, 1), 061 new BHSDCodec(4, 240, 1, 1), new BHSDCodec(4, 248, 0, 1), new BHSDCodec(4, 248, 1, 1) }; 062 063 private static Map<BHSDCodec, Integer> canonicalCodecsToSpecifiers; 064 065 static { 066 final HashMap<BHSDCodec, Integer> reverseMap = new HashMap<>(canonicalCodec.length); 067 for (int i = 0; i < canonicalCodec.length; i++) { 068 reverseMap.put(canonicalCodec[i], Integer.valueOf(i)); 069 } 070 canonicalCodecsToSpecifiers = reverseMap; 071 } 072 073 public static BHSDCodec getCanonicalCodec(final int i) { 074 return canonicalCodec[i]; 075 } 076 077 /** 078 * Gets the codec specified by the given value byte and optional byte header. If the value is >= 116, then bytes may be consumed from the secondary 079 * input stream, which is taken to be the contents of the band_headers byte array. Since the values from this are consumed and not repeated, the input 080 * stream should be reused for subsequent encodings. This does not therefore close the input stream. 081 * 082 * @param value the canonical encoding value 083 * @param in the input stream to read additional byte headers from 084 * @param defaultCodec TODO 085 * @return the corresponding codec, or {@code null} if the default should be used 086 * @throws IOException if there is a problem reading from the input stream (which in reality, is never, since the band_headers are likely stored in a 087 * byte array and accessed via a ByteArrayInputStream. However, an EOFException could occur if things go wrong) 088 * @throws Pack200Exception TODO 089 */ 090 public static Codec getCodec(final int value, final InputStream in, final Codec defaultCodec) throws IOException, Pack200Exception { 091 // Sanity check to make sure that no-one has changed 092 // the canonical codecs, which would really cause havoc 093 if (canonicalCodec.length != 116) { 094 throw new Error("Canonical encodings have been incorrectly modified"); 095 } 096 if (value < 0) { 097 throw new IllegalArgumentException("Encoding cannot be less than zero"); 098 } 099 if (value == 0) { 100 return defaultCodec; 101 } 102 if (value <= 115) { 103 return canonicalCodec[value]; 104 } 105 if (value == 116) { 106 int code = in.read(); 107 if (code == -1) { 108 throw new EOFException("End of buffer read whilst trying to decode codec"); 109 } 110 final int d = code & 0x01; 111 final int s = code >> 1 & 0x03; 112 final int b = (code >> 3 & 0x07) + 1; // this might result in an invalid 113 // number, but it's checked in the 114 // Codec constructor 115 code = in.read(); 116 if (code == -1) { 117 throw new EOFException("End of buffer read whilst trying to decode codec"); 118 } 119 final int h = code + 1; 120 // This handles the special cases for invalid combinations of data. 121 return new BHSDCodec(b, h, s, d); 122 } 123 if (value >= 117 && value <= 140) { // Run codec 124 final int offset = value - 117; 125 final int kx = offset & 3; 126 final boolean kbflag = (offset >> 2 & 1) == 1; 127 final boolean adef = (offset >> 3 & 1) == 1; 128 final boolean bdef = (offset >> 4 & 1) == 1; 129 // If both A and B use the default encoding, what's the point of 130 // having a run of default values followed by default values 131 if (adef && bdef) { 132 throw new Pack200Exception("ADef and BDef should never both be true"); 133 } 134 final int kb = kbflag ? in.read() : 3; 135 final int k = (kb + 1) * (int) Math.pow(16, kx); 136 final Codec aCodec; 137 final Codec bCodec; 138 if (adef) { 139 aCodec = defaultCodec; 140 } else { 141 aCodec = getCodec(in.read(), in, defaultCodec); 142 } 143 if (bdef) { 144 bCodec = defaultCodec; 145 } else { 146 bCodec = getCodec(in.read(), in, defaultCodec); 147 } 148 return new RunCodec(k, aCodec, bCodec); 149 } 150 if (value < 141 || value > 188) { 151 throw new Pack200Exception("Invalid codec encoding byte (" + value + ") found"); 152 } 153 final int offset = value - 141; 154 final boolean fdef = (offset & 1) == 1; 155 final boolean udef = (offset >> 1 & 1) == 1; 156 final int tdefl = offset >> 2; 157 final boolean tdef = tdefl != 0; 158 // From section 6.7.3 of spec 159 final int[] tdefToL = { 0, 4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252 }; 160 final int l = tdefToL[tdefl]; 161 // NOTE: Do not re-factor this to bring out uCodec; the order in 162 // which 163 // they are read from the stream is important 164 if (tdef) { 165 final Codec fCodec = fdef ? defaultCodec : getCodec(in.read(), in, defaultCodec); 166 final Codec uCodec = udef ? defaultCodec : getCodec(in.read(), in, defaultCodec); 167 // Unfortunately, if tdef, then tCodec depends both on l and 168 // also on k, the 169 // number of items read from the fCodec. So we don't know in 170 // advance what 171 // the codec will be. 172 return new PopulationCodec(fCodec, l, uCodec); 173 } 174 final Codec fCodec = fdef ? defaultCodec : getCodec(in.read(), in, defaultCodec); 175 final Codec tCodec = getCodec(in.read(), in, defaultCodec); 176 final Codec uCodec = udef ? defaultCodec : getCodec(in.read(), in, defaultCodec); 177 return new PopulationCodec(fCodec, tCodec, uCodec); 178 } 179 180 public static int[] getSpecifier(final Codec codec, final Codec defaultForBand) { 181 if (canonicalCodecsToSpecifiers.containsKey(codec)) { 182 return new int[] { canonicalCodecsToSpecifiers.get(codec).intValue() }; 183 } 184 if (codec instanceof BHSDCodec) { 185 // Cache these? 186 final BHSDCodec bhsdCodec = (BHSDCodec) codec; 187 final int[] specifiers = new int[3]; 188 specifiers[0] = 116; 189 specifiers[1] = (bhsdCodec.isDelta() ? 1 : 0) + 2 * bhsdCodec.getS() + 8 * (bhsdCodec.getB() - 1); 190 specifiers[2] = bhsdCodec.getH() - 1; 191 return specifiers; 192 } 193 if (codec instanceof RunCodec) { 194 final RunCodec runCodec = (RunCodec) codec; 195 final int k = runCodec.getK(); 196 final int kb; 197 final int kx; 198 if (k <= 256) { 199 kb = 0; 200 kx = k - 1; 201 } else if (k <= 4096) { 202 kb = 1; 203 kx = k / 16 - 1; 204 } else if (k <= 65536) { 205 kb = 2; 206 kx = k / 256 - 1; 207 } else { 208 kb = 3; 209 kx = k / 4096 - 1; 210 } 211 final Codec aCodec = runCodec.getACodec(); 212 final Codec bCodec = runCodec.getBCodec(); 213 int abDef = 0; 214 if (aCodec.equals(defaultForBand)) { 215 abDef = 1; 216 } else if (bCodec.equals(defaultForBand)) { 217 abDef = 2; 218 } 219 final int first = 117 + kb + (kx == 3 ? 0 : 4) + 8 * abDef; 220 final int[] aSpecifier = abDef == 1 ? EMPTY_INT_ARRAY : getSpecifier(aCodec, defaultForBand); 221 final int[] bSpecifier = abDef == 2 ? EMPTY_INT_ARRAY : getSpecifier(bCodec, defaultForBand); 222 final int[] specifier = new int[1 + (kx == 3 ? 0 : 1) + aSpecifier.length + bSpecifier.length]; 223 specifier[0] = first; 224 int index = 1; 225 if (kx != 3) { 226 specifier[1] = kx; 227 index++; 228 } 229 for (final int element : aSpecifier) { 230 specifier[index] = element; 231 index++; 232 } 233 for (final int element : bSpecifier) { 234 specifier[index] = element; 235 index++; 236 } 237 return specifier; 238 } 239 if (codec instanceof PopulationCodec) { 240 final PopulationCodec populationCodec = (PopulationCodec) codec; 241 final Codec tokenCodec = populationCodec.getTokenCodec(); 242 final Codec favouredCodec = populationCodec.getFavouredCodec(); 243 final Codec unfavouredCodec = populationCodec.getUnfavouredCodec(); 244 final int fDef = favouredCodec.equals(defaultForBand) ? 1 : 0; 245 final int uDef = unfavouredCodec.equals(defaultForBand) ? 1 : 0; 246 int tDefL = 0; 247 final int[] favoured = populationCodec.getFavoured(); 248 if (favoured != null) { 249 if (tokenCodec == Codec.BYTE1) { 250 tDefL = 1; 251 } else if (tokenCodec instanceof BHSDCodec) { 252 final BHSDCodec tokenBHSD = (BHSDCodec) tokenCodec; 253 if (tokenBHSD.getS() == 0) { 254 final int[] possibleLValues = { 4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252 }; 255 final int l = 256 - tokenBHSD.getH(); 256 int index = Arrays.binarySearch(possibleLValues, l); 257 if (index != -1) { 258 // TODO: check range is ok for ks 259 tDefL = index++; 260 } 261 } 262 } 263 } 264 final int first = 141 + fDef + 2 * uDef + 4 * tDefL; 265 final int[] favouredSpecifier = fDef == 1 ? EMPTY_INT_ARRAY : getSpecifier(favouredCodec, defaultForBand); 266 final int[] tokenSpecifier = tDefL != 0 ? EMPTY_INT_ARRAY : getSpecifier(tokenCodec, defaultForBand); 267 final int[] unfavouredSpecifier = uDef == 1 ? EMPTY_INT_ARRAY : getSpecifier(unfavouredCodec, defaultForBand); 268 final int[] specifier = new int[1 + favouredSpecifier.length + unfavouredSpecifier.length + tokenSpecifier.length]; 269 specifier[0] = first; 270 int index = 1; 271 for (final int element : favouredSpecifier) { 272 specifier[index] = element; 273 index++; 274 } 275 for (final int element : tokenSpecifier) { 276 specifier[index] = element; 277 index++; 278 } 279 for (final int element : unfavouredSpecifier) { 280 specifier[index] = element; 281 index++; 282 } 283 return specifier; 284 } 285 286 return null; 287 } 288 289 public static int getSpecifierForDefaultCodec(final BHSDCodec defaultCodec) { 290 return getSpecifier(defaultCodec, null)[0]; 291 } 292}