1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * https://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.commons.compress.harmony.pack200;
20
21 import java.io.IOException;
22 import java.io.InputStream;
23
24 import org.apache.commons.io.input.BoundedInputStream;
25
26 /**
27 * A Codec allows a sequence of bytes to be decoded into integer values (or vice versa).
28 * <p>
29 * There are a number of standard Codecs ({@link #UDELTA5}, {@link #UNSIGNED5}, {@link #BYTE1}, {@link #CHAR3}) that are used in the implementation of many
30 * bands; but there are a variety of other ones, and indeed the specification assumes that other combinations of values can result in more specific and
31 * efficient formats. There are also a sequence of canonical encodings defined by the Pack200 specification, which allow a Codec to be referred to by canonical
32 * number. {@link CodecEncoding#getCodec(int, InputStream, Codec)})
33 * </p>
34 */
35 public abstract class Codec {
36
37 /**
38 * BCI5 = (5,4): Used for storing branching information in bytecode.
39 */
40 public static final BHSDCodec BCI5 = new BHSDCodec(5, 4);
41
42 /**
43 * BRANCH5 = (5,4,2): Used for storing branching information in bytecode.
44 */
45 public static final BHSDCodec BRANCH5 = new BHSDCodec(5, 4, 2);
46
47 /**
48 * BYTE1 = (1,256): Used for storing plain bytes.
49 */
50 public static final BHSDCodec BYTE1 = new BHSDCodec(1, 256);
51
52 /**
53 * CHAR3 = (3,128): Used for storing text (UTF-8) strings. This isn't quite the same as UTF-8, but has similar properties; ASCII characters < 127 are
54 * stored in a single byte.
55 */
56 public static final BHSDCodec CHAR3 = new BHSDCodec(3, 128);
57
58 /**
59 * DELTA5 = (5,64,1,1): Used for the majority of numerical codings where there is a correlated sequence of signed values.
60 */
61 public static final BHSDCodec DELTA5 = new BHSDCodec(5, 64, 1, 1);
62
63 /**
64 * MDELTA5 = (5,64,2,1): Used for the majority of numerical codings where there is a correlated sequence of signed values, but where most of them are
65 * expected to be non-negative.
66 */
67 public static final BHSDCodec MDELTA5 = new BHSDCodec(5, 64, 2, 1);
68
69 /**
70 * SIGNED5 = (5,64,1): Used for small signed values.
71 */
72 public static final BHSDCodec SIGNED5 = new BHSDCodec(5, 64, 1);
73
74 /**
75 * UDELTA5 = (5,64,0,1): Used for the majority of numerical codings where there is a correlated sequence of unsigned values.
76 */
77 public static final BHSDCodec UDELTA5 = new BHSDCodec(5, 64, 0, 1);
78
79 /**
80 * UNSIGNED5 = (5,64): Used for small unsigned values.
81 */
82 public static final BHSDCodec UNSIGNED5 = new BHSDCodec(5, 64);
83
84 public int lastBandLength;
85
86 int check(final int n, final InputStream in) throws Pack200Exception {
87 if (in instanceof BoundedInputStream) {
88 final BoundedInputStream bin = (BoundedInputStream) in;
89 final long count = bin.getCount();
90 final long maxLength = bin.getMaxCount();
91 if (maxLength > -1) {
92 final long remaining = maxLength - count;
93 final String format = "Can't read beyond end of stream (n = %,d, count = %,d, maxLength = %,d, remaining = %,d)";
94 if (count < -1 || n > remaining) {
95 throw new Pack200Exception(String.format(format, n, count, maxLength, remaining));
96 }
97 }
98 }
99 return n;
100 }
101
102 /**
103 * Decodes a sequence of bytes from the given input stream, returning the value as a long. Note that this method can only be applied for non-delta
104 * encodings.
105 *
106 * @param in the input stream to read from
107 * @return the value as a long
108 * @throws IOException if there is a problem reading from the underlying input stream
109 * @throws Pack200Exception if the encoding is a delta encoding
110 */
111 public abstract int decode(InputStream in) throws IOException, Pack200Exception;
112
113 /**
114 * Decodes a sequence of bytes from the given input stream, returning the value as a long. If this encoding is a delta encoding (d=1) then the previous
115 * value must be passed in as a parameter. If it is a non-delta encoding, then it does not matter what value is passed in, so it makes sense for the value
116 * to be passed in by default using code similar to:
117 *
118 * <pre>
119 * long last = 0;
120 * while (condition) {
121 * last = codec.decode(in, last);
122 * // do something with last
123 * }
124 * </pre>
125 *
126 * @param in the input stream to read from
127 * @param last the previous value read, which must be supplied if the codec is a delta encoding
128 * @return the value as a long
129 * @throws IOException if there is a problem reading from the underlying input stream
130 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
131 */
132 public abstract int decode(InputStream in, long last) throws IOException, Pack200Exception;
133
134 /**
135 * Decodes a sequence of {@code n} values from {@code in}. This should probably be used in most cases, since some codecs (such as {@link PopulationCodec})
136 * only work when the number of values to be read is known.
137 *
138 * @param n the number of values to decode
139 * @param in the input stream to read from
140 * @return an array of {@code int} values corresponding to values decoded
141 * @throws IOException if there is a problem reading from the underlying input stream
142 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
143 */
144 public int[] decodeInts(final int n, final InputStream in) throws IOException, Pack200Exception {
145 lastBandLength = 0;
146 final int[] result = new int[check(n, in)];
147 int last = 0;
148 for (int i = 0; i < n; i++) {
149 result[i] = last = decode(in, last);
150 }
151 return result;
152 }
153
154 /**
155 * Decodes a sequence of {@code n} values from {@code in}.
156 *
157 * @param n the number of values to decode
158 * @param in the input stream to read from
159 * @param firstValue the first value in the band if it has already been read
160 * @return an array of {@code int} values corresponding to values decoded, with firstValue as the first value in the array.
161 * @throws IOException if there is a problem reading from the underlying input stream
162 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
163 */
164 public int[] decodeInts(final int n, final InputStream in, final int firstValue) throws IOException, Pack200Exception {
165 final int[] result = new int[check(n, in) + 1];
166 result[0] = firstValue;
167 int last = firstValue;
168 for (int i = 1; i < n + 1; i++) {
169 result[i] = last = decode(in, last);
170 }
171 return result;
172 }
173
174 /**
175 * Encodes a single value into a sequence of bytes. Note that this method can only be used for non-delta encodings.
176 *
177 * @param value the value to encode.
178 * @return the encoded bytes.
179 * @throws Pack200Exception If a Pack200 semantic error occurs.
180 */
181 public abstract byte[] encode(int value) throws Pack200Exception;
182
183 /**
184 * Encodes a single value into a sequence of bytes.
185 *
186 * @param value the value to encode.
187 * @param last the previous value encoded (for delta encodings).
188 * @return the encoded bytes.
189 * @throws Pack200Exception If a Pack200 semantic error occurs.
190 */
191 public abstract byte[] encode(int value, int last) throws Pack200Exception;
192
193 /**
194 * Encodes a sequence of integers into a byte array.
195 *
196 * @param ints the values to encode.
197 * @return byte[] encoded bytes.
198 * @throws Pack200Exception if there is a problem encoding any of the values.
199 */
200 public byte[] encode(final int[] ints) throws Pack200Exception {
201 int total = 0;
202 final byte[][] bytes = new byte[ints.length][];
203 for (int i = 0; i < ints.length; i++) {
204 bytes[i] = encode(ints[i], i > 0 ? ints[i - 1] : 0);
205 total += bytes[i].length;
206 }
207 final byte[] encoded = new byte[total];
208 int index = 0;
209 for (final byte[] element : bytes) {
210 System.arraycopy(element, 0, encoded, index, element.length);
211 index += element.length;
212 }
213 return encoded;
214 }
215 }