001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.harmony.pack200;
020
021import java.io.IOException;
022import java.io.InputStream;
023
024import org.apache.commons.io.input.BoundedInputStream;
025
026/**
027 * A Codec allows a sequence of bytes to be decoded into integer values (or vice versa).
028 * <p>
029 * There are a number of standard Codecs ({@link #UDELTA5}, {@link #UNSIGNED5}, {@link #BYTE1}, {@link #CHAR3}) that are used in the implementation of many
030 * bands; but there are a variety of other ones, and indeed the specification assumes that other combinations of values can result in more specific and
031 * efficient formats. There are also a sequence of canonical encodings defined by the Pack200 specification, which allow a Codec to be referred to by canonical
032 * number. {@link CodecEncoding#getCodec(int, InputStream, Codec)})
033 * </p>
034 */
035public abstract class Codec {
036
037    /**
038     * BCI5 = (5,4): Used for storing branching information in bytecode.
039     */
040    public static final BHSDCodec BCI5 = new BHSDCodec(5, 4);
041
042    /**
043     * BRANCH5 = (5,4,2): Used for storing branching information in bytecode.
044     */
045    public static final BHSDCodec BRANCH5 = new BHSDCodec(5, 4, 2);
046
047    /**
048     * BYTE1 = (1,256): Used for storing plain bytes.
049     */
050    public static final BHSDCodec BYTE1 = new BHSDCodec(1, 256);
051
052    /**
053     * CHAR3 = (3,128): Used for storing text (UTF-8) strings. This isn't quite the same as UTF-8, but has similar properties; ASCII characters &lt; 127 are
054     * stored in a single byte.
055     */
056    public static final BHSDCodec CHAR3 = new BHSDCodec(3, 128);
057
058    /**
059     * DELTA5 = (5,64,1,1): Used for the majority of numerical codings where there is a correlated sequence of signed values.
060     */
061    public static final BHSDCodec DELTA5 = new BHSDCodec(5, 64, 1, 1);
062
063    /**
064     * MDELTA5 = (5,64,2,1): Used for the majority of numerical codings where there is a correlated sequence of signed values, but where most of them are
065     * expected to be non-negative.
066     */
067    public static final BHSDCodec MDELTA5 = new BHSDCodec(5, 64, 2, 1);
068
069    /**
070     * SIGNED5 = (5,64,1): Used for small signed values.
071     */
072    public static final BHSDCodec SIGNED5 = new BHSDCodec(5, 64, 1);
073
074    /**
075     * UDELTA5 = (5,64,0,1): Used for the majority of numerical codings where there is a correlated sequence of unsigned values.
076     */
077    public static final BHSDCodec UDELTA5 = new BHSDCodec(5, 64, 0, 1);
078
079    /**
080     * UNSIGNED5 = (5,64): Used for small unsigned values.
081     */
082    public static final BHSDCodec UNSIGNED5 = new BHSDCodec(5, 64);
083
084    public int lastBandLength;
085
086    int check(final int n, final InputStream in) throws Pack200Exception {
087        if (in instanceof BoundedInputStream) {
088            final BoundedInputStream bin = (BoundedInputStream) in;
089            final long count = bin.getCount();
090            final long maxLength = bin.getMaxCount();
091            if (maxLength > -1) {
092                final long remaining = maxLength - count;
093                final String format = "Can't read beyond end of stream (n = %,d, count = %,d, maxLength = %,d, remaining = %,d)";
094                if (count < -1 || n > remaining) {
095                    throw new Pack200Exception(String.format(format, n, count, maxLength, remaining));
096                }
097            }
098        }
099        return n;
100    }
101
102    /**
103     * Decodes a sequence of bytes from the given input stream, returning the value as a long. Note that this method can only be applied for non-delta
104     * encodings.
105     *
106     * @param in the input stream to read from
107     * @return the value as a long
108     * @throws IOException      if there is a problem reading from the underlying input stream
109     * @throws Pack200Exception if the encoding is a delta encoding
110     */
111    public abstract int decode(InputStream in) throws IOException, Pack200Exception;
112
113    /**
114     * Decodes a sequence of bytes from the given input stream, returning the value as a long. If this encoding is a delta encoding (d=1) then the previous
115     * value must be passed in as a parameter. If it is a non-delta encoding, then it does not matter what value is passed in, so it makes sense for the value
116     * to be passed in by default using code similar to:
117     *
118     * <pre>
119     * long last = 0;
120     * while (condition) {
121     *     last = codec.decode(in, last);
122     *     // do something with last
123     * }
124     * </pre>
125     *
126     * @param in   the input stream to read from
127     * @param last the previous value read, which must be supplied if the codec is a delta encoding
128     * @return the value as a long
129     * @throws IOException      if there is a problem reading from the underlying input stream
130     * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
131     */
132    public abstract int decode(InputStream in, long last) throws IOException, Pack200Exception;
133
134    /**
135     * Decodes a sequence of {@code n} values from {@code in}. This should probably be used in most cases, since some codecs (such as {@link PopulationCodec})
136     * only work when the number of values to be read is known.
137     *
138     * @param n  the number of values to decode
139     * @param in the input stream to read from
140     * @return an array of {@code int} values corresponding to values decoded
141     * @throws IOException      if there is a problem reading from the underlying input stream
142     * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
143     */
144    public int[] decodeInts(final int n, final InputStream in) throws IOException, Pack200Exception {
145        lastBandLength = 0;
146        final int[] result = new int[check(n, in)];
147        int last = 0;
148        for (int i = 0; i < n; i++) {
149            result[i] = last = decode(in, last);
150        }
151        return result;
152    }
153
154    /**
155     * Decodes a sequence of {@code n} values from {@code in}.
156     *
157     * @param n          the number of values to decode
158     * @param in         the input stream to read from
159     * @param firstValue the first value in the band if it has already been read
160     * @return an array of {@code int} values corresponding to values decoded, with firstValue as the first value in the array.
161     * @throws IOException      if there is a problem reading from the underlying input stream
162     * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
163     */
164    public int[] decodeInts(final int n, final InputStream in, final int firstValue) throws IOException, Pack200Exception {
165        final int[] result = new int[check(n, in) + 1];
166        result[0] = firstValue;
167        int last = firstValue;
168        for (int i = 1; i < n + 1; i++) {
169            result[i] = last = decode(in, last);
170        }
171        return result;
172    }
173
174    /**
175     * Encodes a single value into a sequence of bytes. Note that this method can only be used for non-delta encodings.
176     *
177     * @param value the value to encode.
178     * @return the encoded bytes.
179     * @throws Pack200Exception If a Pack200 semantic error occurs.
180     */
181    public abstract byte[] encode(int value) throws Pack200Exception;
182
183    /**
184     * Encodes a single value into a sequence of bytes.
185     *
186     * @param value the value to encode.
187     * @param last  the previous value encoded (for delta encodings).
188     * @return the encoded bytes.
189     * @throws Pack200Exception If a Pack200 semantic error occurs.
190     */
191    public abstract byte[] encode(int value, int last) throws Pack200Exception;
192
193    /**
194     * Encodes a sequence of integers into a byte array.
195     *
196     * @param ints the values to encode.
197     * @return byte[] encoded bytes.
198     * @throws Pack200Exception if there is a problem encoding any of the values.
199     */
200    public byte[] encode(final int[] ints) throws Pack200Exception {
201        int total = 0;
202        final byte[][] bytes = new byte[ints.length][];
203        for (int i = 0; i < ints.length; i++) {
204            bytes[i] = encode(ints[i], i > 0 ? ints[i - 1] : 0);
205            total += bytes[i].length;
206        }
207        final byte[] encoded = new byte[total];
208        int index = 0;
209        for (final byte[] element : bytes) {
210            System.arraycopy(element, 0, encoded, index, element.length);
211            index += element.length;
212        }
213        return encoded;
214    }
215}