001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.harmony.pack200;
020
021import java.io.EOFException;
022import java.io.IOException;
023import java.io.InputStream;
024import java.util.Arrays;
025import java.util.HashMap;
026import java.util.Map;
027
028/**
029 * CodecEncoding is used to get the right Codec for a given meta-encoding.
030 */
031public class CodecEncoding {
032
033    private static final int[] EMPTY_INT_ARRAY = {};
034
035    /**
036     * The canonical encodings are defined to allow a single byte to represent one of the standard encodings. The following values are defined in the Pack200
037     * specification, and this array cannot be changed.
038     */
039    private static final BHSDCodec[] canonicalCodec = { null, new BHSDCodec(1, 256), new BHSDCodec(1, 256, 1), new BHSDCodec(1, 256, 0, 1),
040            new BHSDCodec(1, 256, 1, 1), new BHSDCodec(2, 256), new BHSDCodec(2, 256, 1), new BHSDCodec(2, 256, 0, 1), new BHSDCodec(2, 256, 1, 1),
041            new BHSDCodec(3, 256), new BHSDCodec(3, 256, 1), new BHSDCodec(3, 256, 0, 1), new BHSDCodec(3, 256, 1, 1), new BHSDCodec(4, 256),
042            new BHSDCodec(4, 256, 1), new BHSDCodec(4, 256, 0, 1), new BHSDCodec(4, 256, 1, 1), new BHSDCodec(5, 4), new BHSDCodec(5, 4, 1),
043            new BHSDCodec(5, 4, 2), new BHSDCodec(5, 16), new BHSDCodec(5, 16, 1), new BHSDCodec(5, 16, 2), new BHSDCodec(5, 32), new BHSDCodec(5, 32, 1),
044            new BHSDCodec(5, 32, 2), new BHSDCodec(5, 64), new BHSDCodec(5, 64, 1), new BHSDCodec(5, 64, 2), new BHSDCodec(5, 128), new BHSDCodec(5, 128, 1),
045            new BHSDCodec(5, 128, 2), new BHSDCodec(5, 4, 0, 1), new BHSDCodec(5, 4, 1, 1), new BHSDCodec(5, 4, 2, 1), new BHSDCodec(5, 16, 0, 1),
046            new BHSDCodec(5, 16, 1, 1), new BHSDCodec(5, 16, 2, 1), new BHSDCodec(5, 32, 0, 1), new BHSDCodec(5, 32, 1, 1), new BHSDCodec(5, 32, 2, 1),
047            new BHSDCodec(5, 64, 0, 1), new BHSDCodec(5, 64, 1, 1), new BHSDCodec(5, 64, 2, 1), new BHSDCodec(5, 128, 0, 1), new BHSDCodec(5, 128, 1, 1),
048            new BHSDCodec(5, 128, 2, 1), new BHSDCodec(2, 192), new BHSDCodec(2, 224), new BHSDCodec(2, 240), new BHSDCodec(2, 248), new BHSDCodec(2, 252),
049            new BHSDCodec(2, 8, 0, 1), new BHSDCodec(2, 8, 1, 1), new BHSDCodec(2, 16, 0, 1), new BHSDCodec(2, 16, 1, 1), new BHSDCodec(2, 32, 0, 1),
050            new BHSDCodec(2, 32, 1, 1), new BHSDCodec(2, 64, 0, 1), new BHSDCodec(2, 64, 1, 1), new BHSDCodec(2, 128, 0, 1), new BHSDCodec(2, 128, 1, 1),
051            new BHSDCodec(2, 192, 0, 1), new BHSDCodec(2, 192, 1, 1), new BHSDCodec(2, 224, 0, 1), new BHSDCodec(2, 224, 1, 1), new BHSDCodec(2, 240, 0, 1),
052            new BHSDCodec(2, 240, 1, 1), new BHSDCodec(2, 248, 0, 1), new BHSDCodec(2, 248, 1, 1), new BHSDCodec(3, 192), new BHSDCodec(3, 224),
053            new BHSDCodec(3, 240), new BHSDCodec(3, 248), new BHSDCodec(3, 252), new BHSDCodec(3, 8, 0, 1), new BHSDCodec(3, 8, 1, 1),
054            new BHSDCodec(3, 16, 0, 1), new BHSDCodec(3, 16, 1, 1), new BHSDCodec(3, 32, 0, 1), new BHSDCodec(3, 32, 1, 1), new BHSDCodec(3, 64, 0, 1),
055            new BHSDCodec(3, 64, 1, 1), new BHSDCodec(3, 128, 0, 1), new BHSDCodec(3, 128, 1, 1), new BHSDCodec(3, 192, 0, 1), new BHSDCodec(3, 192, 1, 1),
056            new BHSDCodec(3, 224, 0, 1), new BHSDCodec(3, 224, 1, 1), new BHSDCodec(3, 240, 0, 1), new BHSDCodec(3, 240, 1, 1), new BHSDCodec(3, 248, 0, 1),
057            new BHSDCodec(3, 248, 1, 1), new BHSDCodec(4, 192), new BHSDCodec(4, 224), new BHSDCodec(4, 240), new BHSDCodec(4, 248), new BHSDCodec(4, 252),
058            new BHSDCodec(4, 8, 0, 1), new BHSDCodec(4, 8, 1, 1), new BHSDCodec(4, 16, 0, 1), new BHSDCodec(4, 16, 1, 1), new BHSDCodec(4, 32, 0, 1),
059            new BHSDCodec(4, 32, 1, 1), new BHSDCodec(4, 64, 0, 1), new BHSDCodec(4, 64, 1, 1), new BHSDCodec(4, 128, 0, 1), new BHSDCodec(4, 128, 1, 1),
060            new BHSDCodec(4, 192, 0, 1), new BHSDCodec(4, 192, 1, 1), new BHSDCodec(4, 224, 0, 1), new BHSDCodec(4, 224, 1, 1), new BHSDCodec(4, 240, 0, 1),
061            new BHSDCodec(4, 240, 1, 1), new BHSDCodec(4, 248, 0, 1), new BHSDCodec(4, 248, 1, 1) };
062
063    private static Map<BHSDCodec, Integer> canonicalCodecsToSpecifiers;
064
065    static {
066        final HashMap<BHSDCodec, Integer> reverseMap = new HashMap<>(canonicalCodec.length);
067        for (int i = 0; i < canonicalCodec.length; i++) {
068            reverseMap.put(canonicalCodec[i], Integer.valueOf(i));
069        }
070        canonicalCodecsToSpecifiers = reverseMap;
071    }
072
073    public static BHSDCodec getCanonicalCodec(final int i) {
074        return canonicalCodec[i];
075    }
076
077    /**
078     * Gets the codec specified by the given value byte and optional byte header. If the value is &gt;= 116, then bytes may be consumed from the secondary
079     * input stream, which is taken to be the contents of the band_headers byte array. Since the values from this are consumed and not repeated, the input
080     * stream should be reused for subsequent encodings. This does not therefore close the input stream.
081     *
082     * @param value        the canonical encoding value
083     * @param in           the input stream to read additional byte headers from
084     * @param defaultCodec TODO
085     * @return the corresponding codec, or {@code null} if the default should be used
086     * @throws IOException      if there is a problem reading from the input stream (which in reality, is never, since the band_headers are likely stored in a
087     *                          byte array and accessed via a ByteArrayInputStream. However, an EOFException could occur if things go wrong)
088     * @throws Pack200Exception TODO
089     */
090    public static Codec getCodec(final int value, final InputStream in, final Codec defaultCodec) throws IOException, Pack200Exception {
091        // Sanity check to make sure that no-one has changed
092        // the canonical codecs, which would really cause havoc
093        if (canonicalCodec.length != 116) {
094            throw new Error("Canonical encodings have been incorrectly modified");
095        }
096        if (value < 0) {
097            throw new IllegalArgumentException("Encoding cannot be less than zero");
098        }
099        if (value == 0) {
100            return defaultCodec;
101        }
102        if (value <= 115) {
103            return canonicalCodec[value];
104        }
105        if (value == 116) {
106            int code = in.read();
107            if (code == -1) {
108                throw new EOFException("End of buffer read whilst trying to decode codec");
109            }
110            final int d = code & 0x01;
111            final int s = code >> 1 & 0x03;
112            final int b = (code >> 3 & 0x07) + 1; // this might result in an invalid
113            // number, but it's checked in the
114            // Codec constructor
115            code = in.read();
116            if (code == -1) {
117                throw new EOFException("End of buffer read whilst trying to decode codec");
118            }
119            final int h = code + 1;
120            // This handles the special cases for invalid combinations of data.
121            return new BHSDCodec(b, h, s, d);
122        }
123        if (value >= 117 && value <= 140) { // Run codec
124            final int offset = value - 117;
125            final int kx = offset & 3;
126            final boolean kbflag = (offset >> 2 & 1) == 1;
127            final boolean adef = (offset >> 3 & 1) == 1;
128            final boolean bdef = (offset >> 4 & 1) == 1;
129            // If both A and B use the default encoding, what's the point of
130            // having a run of default values followed by default values
131            if (adef && bdef) {
132                throw new Pack200Exception("ADef and BDef should never both be true");
133            }
134            final int kb = kbflag ? in.read() : 3;
135            final int k = (kb + 1) * (int) Math.pow(16, kx);
136            final Codec aCodec;
137            final Codec bCodec;
138            if (adef) {
139                aCodec = defaultCodec;
140            } else {
141                aCodec = getCodec(in.read(), in, defaultCodec);
142            }
143            if (bdef) {
144                bCodec = defaultCodec;
145            } else {
146                bCodec = getCodec(in.read(), in, defaultCodec);
147            }
148            return new RunCodec(k, aCodec, bCodec);
149        }
150        if (value < 141 || value > 188) {
151            throw new Pack200Exception("Invalid codec encoding byte (" + value + ") found");
152        }
153        final int offset = value - 141;
154        final boolean fdef = (offset & 1) == 1;
155        final boolean udef = (offset >> 1 & 1) == 1;
156        final int tdefl = offset >> 2;
157        final boolean tdef = tdefl != 0;
158        // From section 6.7.3 of spec
159        final int[] tdefToL = { 0, 4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252 };
160        final int l = tdefToL[tdefl];
161        // NOTE: Do not re-factor this to bring out uCodec; the order in
162        // which
163        // they are read from the stream is important
164        if (tdef) {
165            final Codec fCodec = fdef ? defaultCodec : getCodec(in.read(), in, defaultCodec);
166            final Codec uCodec = udef ? defaultCodec : getCodec(in.read(), in, defaultCodec);
167            // Unfortunately, if tdef, then tCodec depends both on l and
168            // also on k, the
169            // number of items read from the fCodec. So we don't know in
170            // advance what
171            // the codec will be.
172            return new PopulationCodec(fCodec, l, uCodec);
173        }
174        final Codec fCodec = fdef ? defaultCodec : getCodec(in.read(), in, defaultCodec);
175        final Codec tCodec = getCodec(in.read(), in, defaultCodec);
176        final Codec uCodec = udef ? defaultCodec : getCodec(in.read(), in, defaultCodec);
177        return new PopulationCodec(fCodec, tCodec, uCodec);
178    }
179
180    public static int[] getSpecifier(final Codec codec, final Codec defaultForBand) {
181        if (canonicalCodecsToSpecifiers.containsKey(codec)) {
182            return new int[] { canonicalCodecsToSpecifiers.get(codec).intValue() };
183        }
184        if (codec instanceof BHSDCodec) {
185            // Cache these?
186            final BHSDCodec bhsdCodec = (BHSDCodec) codec;
187            final int[] specifiers = new int[3];
188            specifiers[0] = 116;
189            specifiers[1] = (bhsdCodec.isDelta() ? 1 : 0) + 2 * bhsdCodec.getS() + 8 * (bhsdCodec.getB() - 1);
190            specifiers[2] = bhsdCodec.getH() - 1;
191            return specifiers;
192        }
193        if (codec instanceof RunCodec) {
194            final RunCodec runCodec = (RunCodec) codec;
195            final int k = runCodec.getK();
196            final int kb;
197            final int kx;
198            if (k <= 256) {
199                kb = 0;
200                kx = k - 1;
201            } else if (k <= 4096) {
202                kb = 1;
203                kx = k / 16 - 1;
204            } else if (k <= 65536) {
205                kb = 2;
206                kx = k / 256 - 1;
207            } else {
208                kb = 3;
209                kx = k / 4096 - 1;
210            }
211            final Codec aCodec = runCodec.getACodec();
212            final Codec bCodec = runCodec.getBCodec();
213            int abDef = 0;
214            if (aCodec.equals(defaultForBand)) {
215                abDef = 1;
216            } else if (bCodec.equals(defaultForBand)) {
217                abDef = 2;
218            }
219            final int first = 117 + kb + (kx == 3 ? 0 : 4) + 8 * abDef;
220            final int[] aSpecifier = abDef == 1 ? EMPTY_INT_ARRAY : getSpecifier(aCodec, defaultForBand);
221            final int[] bSpecifier = abDef == 2 ? EMPTY_INT_ARRAY : getSpecifier(bCodec, defaultForBand);
222            final int[] specifier = new int[1 + (kx == 3 ? 0 : 1) + aSpecifier.length + bSpecifier.length];
223            specifier[0] = first;
224            int index = 1;
225            if (kx != 3) {
226                specifier[1] = kx;
227                index++;
228            }
229            for (final int element : aSpecifier) {
230                specifier[index] = element;
231                index++;
232            }
233            for (final int element : bSpecifier) {
234                specifier[index] = element;
235                index++;
236            }
237            return specifier;
238        }
239        if (codec instanceof PopulationCodec) {
240            final PopulationCodec populationCodec = (PopulationCodec) codec;
241            final Codec tokenCodec = populationCodec.getTokenCodec();
242            final Codec favouredCodec = populationCodec.getFavouredCodec();
243            final Codec unfavouredCodec = populationCodec.getUnfavouredCodec();
244            final int fDef = favouredCodec.equals(defaultForBand) ? 1 : 0;
245            final int uDef = unfavouredCodec.equals(defaultForBand) ? 1 : 0;
246            int tDefL = 0;
247            final int[] favoured = populationCodec.getFavoured();
248            if (favoured != null) {
249                if (tokenCodec == Codec.BYTE1) {
250                    tDefL = 1;
251                } else if (tokenCodec instanceof BHSDCodec) {
252                    final BHSDCodec tokenBHSD = (BHSDCodec) tokenCodec;
253                    if (tokenBHSD.getS() == 0) {
254                        final int[] possibleLValues = { 4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252 };
255                        final int l = 256 - tokenBHSD.getH();
256                        int index = Arrays.binarySearch(possibleLValues, l);
257                        if (index != -1) {
258                            // TODO: check range is ok for ks
259                            tDefL = index++;
260                        }
261                    }
262                }
263            }
264            final int first = 141 + fDef + 2 * uDef + 4 * tDefL;
265            final int[] favouredSpecifier = fDef == 1 ? EMPTY_INT_ARRAY : getSpecifier(favouredCodec, defaultForBand);
266            final int[] tokenSpecifier = tDefL != 0 ? EMPTY_INT_ARRAY : getSpecifier(tokenCodec, defaultForBand);
267            final int[] unfavouredSpecifier = uDef == 1 ? EMPTY_INT_ARRAY : getSpecifier(unfavouredCodec, defaultForBand);
268            final int[] specifier = new int[1 + favouredSpecifier.length + unfavouredSpecifier.length + tokenSpecifier.length];
269            specifier[0] = first;
270            int index = 1;
271            for (final int element : favouredSpecifier) {
272                specifier[index] = element;
273                index++;
274            }
275            for (final int element : tokenSpecifier) {
276                specifier[index] = element;
277                index++;
278            }
279            for (final int element : unfavouredSpecifier) {
280                specifier[index] = element;
281                index++;
282            }
283            return specifier;
284        }
285
286        return null;
287    }
288
289    public static int getSpecifierForDefaultCodec(final BHSDCodec defaultCodec) {
290        return getSpecifier(defaultCodec, null)[0];
291    }
292}