001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017package org.apache.commons.compress.harmony.pack200;
018
019import java.io.IOException;
020import java.io.InputStream;
021
022import org.apache.commons.io.input.BoundedInputStream;
023
024/**
025 * A Codec allows a sequence of bytes to be decoded into integer values (or vice versa).
026 * <p>
027 * There are a number of standard Codecs ({@link #UDELTA5}, {@link #UNSIGNED5}, {@link #BYTE1}, {@link #CHAR3}) that are used in the implementation of many
028 * bands; but there are a variety of other ones, and indeed the specification assumes that other combinations of values can result in more specific and
029 * efficient formats. There are also a sequence of canonical encodings defined by the Pack200 specification, which allow a Codec to be referred to by canonical
030 * number. {@link CodecEncoding#getCodec(int, InputStream, Codec)})
031 * </p>
032 */
033public abstract class Codec {
034
035    /**
036     * BCI5 = (5,4): Used for storing branching information in bytecode.
037     */
038    public static final BHSDCodec BCI5 = new BHSDCodec(5, 4);
039
040    /**
041     * BRANCH5 = (5,4,2): Used for storing branching information in bytecode.
042     */
043    public static final BHSDCodec BRANCH5 = new BHSDCodec(5, 4, 2);
044
045    /**
046     * BYTE1 = (1,256): Used for storing plain bytes.
047     */
048    public static final BHSDCodec BYTE1 = new BHSDCodec(1, 256);
049
050    /**
051     * CHAR3 = (3,128): Used for storing text (UTF-8) strings. This isn't quite the same as UTF-8, but has similar properties; ASCII characters &lt; 127 are
052     * stored in a single byte.
053     */
054    public static final BHSDCodec CHAR3 = new BHSDCodec(3, 128);
055
056    /**
057     * DELTA5 = (5,64,1,1): Used for the majority of numerical codings where there is a correlated sequence of signed values.
058     */
059    public static final BHSDCodec DELTA5 = new BHSDCodec(5, 64, 1, 1);
060
061    /**
062     * MDELTA5 = (5,64,2,1): Used for the majority of numerical codings where there is a correlated sequence of signed values, but where most of them are
063     * expected to be non-negative.
064     */
065    public static final BHSDCodec MDELTA5 = new BHSDCodec(5, 64, 2, 1);
066
067    /**
068     * SIGNED5 = (5,64,1): Used for small signed values.
069     */
070    public static final BHSDCodec SIGNED5 = new BHSDCodec(5, 64, 1);
071
072    /**
073     * UDELTA5 = (5,64,0,1): Used for the majority of numerical codings where there is a correlated sequence of unsigned values.
074     */
075    public static final BHSDCodec UDELTA5 = new BHSDCodec(5, 64, 0, 1);
076
077    /**
078     * UNSIGNED5 = (5,64): Used for small unsigned values.
079     */
080    public static final BHSDCodec UNSIGNED5 = new BHSDCodec(5, 64);
081
082    public int lastBandLength;
083
084    int check(final int n, final InputStream in) throws Pack200Exception {
085        if (in instanceof BoundedInputStream) {
086            final BoundedInputStream bin = (BoundedInputStream) in;
087            final long count = bin.getCount();
088            final long maxLength = bin.getMaxLength();
089            if (maxLength > -1) {
090                final long remaining = maxLength - count;
091                final String format = "Can't read beyond end of stream (n = %,d, count = %,d, maxLength = %,d, remaining = %,d)";
092                if (count < -1) {
093                    throw new Pack200Exception(String.format(format, n, count, maxLength, remaining));
094                }
095                if (n > remaining) {
096                    throw new Pack200Exception(String.format(format, n, count, maxLength, remaining));
097                }
098            }
099        }
100        return n;
101    }
102
103    /**
104     * Decodes a sequence of bytes from the given input stream, returning the value as a long. Note that this method can only be applied for non-delta
105     * encodings.
106     *
107     * @param in the input stream to read from
108     * @return the value as a long
109     * @throws IOException      if there is a problem reading from the underlying input stream
110     * @throws Pack200Exception if the encoding is a delta encoding
111     */
112    public abstract int decode(InputStream in) throws IOException, Pack200Exception;
113
114    /**
115     * Decodes a sequence of bytes from the given input stream, returning the value as a long. If this encoding is a delta encoding (d=1) then the previous
116     * value must be passed in as a parameter. If it is a non-delta encoding, then it does not matter what value is passed in, so it makes sense for the value
117     * to be passed in by default using code similar to:
118     *
119     * <pre>
120     * long last = 0;
121     * while (condition) {
122     *     last = codec.decode(in, last);
123     *     // do something with last
124     * }
125     * </pre>
126     *
127     * @param in   the input stream to read from
128     * @param last the previous value read, which must be supplied if the codec is a delta encoding
129     * @return the value as a long
130     * @throws IOException      if there is a problem reading from the underlying input stream
131     * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
132     */
133    public abstract int decode(InputStream in, long last) throws IOException, Pack200Exception;
134
135    /**
136     * Decodes a sequence of {@code n} values from {@code in}. This should probably be used in most cases, since some codecs (such as {@link PopulationCodec})
137     * only work when the number of values to be read is known.
138     *
139     * @param n  the number of values to decode
140     * @param in the input stream to read from
141     * @return an array of {@code int} values corresponding to values decoded
142     * @throws IOException      if there is a problem reading from the underlying input stream
143     * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
144     */
145    public int[] decodeInts(final int n, final InputStream in) throws IOException, Pack200Exception {
146        lastBandLength = 0;
147        final int[] result = new int[check(n, in)];
148        int last = 0;
149        for (int i = 0; i < n; i++) {
150            result[i] = last = decode(in, last);
151        }
152        return result;
153    }
154
155    /**
156     * Decodes a sequence of {@code n} values from {@code in}.
157     *
158     * @param n          the number of values to decode
159     * @param in         the input stream to read from
160     * @param firstValue the first value in the band if it has already been read
161     * @return an array of {@code int} values corresponding to values decoded, with firstValue as the first value in the array.
162     * @throws IOException      if there is a problem reading from the underlying input stream
163     * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
164     */
165    public int[] decodeInts(final int n, final InputStream in, final int firstValue) throws IOException, Pack200Exception {
166        final int[] result = new int[check(n, in) + 1];
167        result[0] = firstValue;
168        int last = firstValue;
169        for (int i = 1; i < n + 1; i++) {
170            result[i] = last = decode(in, last);
171        }
172        return result;
173    }
174
175    /**
176     * Encodes a single value into a sequence of bytes. Note that this method can only be used for non-delta encodings.
177     *
178     * @param value the value to encode
179     * @return the encoded bytes
180     * @throws Pack200Exception TODO
181     */
182    public abstract byte[] encode(int value) throws Pack200Exception;
183
184    /**
185     * Encodes a single value into a sequence of bytes.
186     *
187     * @param value the value to encode
188     * @param last  the previous value encoded (for delta encodings)
189     * @return the encoded bytes
190     * @throws Pack200Exception TODO
191     */
192    public abstract byte[] encode(int value, int last) throws Pack200Exception;
193
194    /**
195     * Encodes a sequence of integers into a byte array
196     *
197     * @param ints the values to encode
198     * @return byte[] encoded bytes
199     * @throws Pack200Exception if there is a problem encoding any of the values
200     */
201    public byte[] encode(final int[] ints) throws Pack200Exception {
202        int total = 0;
203        final byte[][] bytes = new byte[ints.length][];
204        for (int i = 0; i < ints.length; i++) {
205            bytes[i] = encode(ints[i], i > 0 ? ints[i - 1] : 0);
206            total += bytes[i].length;
207        }
208        final byte[] encoded = new byte[total];
209        int index = 0;
210        for (final byte[] element : bytes) {
211            System.arraycopy(element, 0, encoded, index, element.length);
212            index += element.length;
213        }
214        return encoded;
215    }
216}