View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  package org.apache.commons.compress.harmony.pack200;
18  
19  import java.io.IOException;
20  import java.io.InputStream;
21  
22  import org.apache.commons.io.input.BoundedInputStream;
23  
24  /**
25   * A Codec allows a sequence of bytes to be decoded into integer values (or vice versa).
26   * <p>
27   * There are a number of standard Codecs ({@link #UDELTA5}, {@link #UNSIGNED5}, {@link #BYTE1}, {@link #CHAR3}) that are used in the implementation of many
28   * bands; but there are a variety of other ones, and indeed the specification assumes that other combinations of values can result in more specific and
29   * efficient formats. There are also a sequence of canonical encodings defined by the Pack200 specification, which allow a Codec to be referred to by canonical
30   * number. {@link CodecEncoding#getCodec(int, InputStream, Codec)})
31   * </p>
32   */
33  public abstract class Codec {
34  
35      /**
36       * BCI5 = (5,4): Used for storing branching information in bytecode.
37       */
38      public static final BHSDCodec BCI5 = new BHSDCodec(5, 4);
39  
40      /**
41       * BRANCH5 = (5,4,2): Used for storing branching information in bytecode.
42       */
43      public static final BHSDCodec BRANCH5 = new BHSDCodec(5, 4, 2);
44  
45      /**
46       * BYTE1 = (1,256): Used for storing plain bytes.
47       */
48      public static final BHSDCodec BYTE1 = new BHSDCodec(1, 256);
49  
50      /**
51       * CHAR3 = (3,128): Used for storing text (UTF-8) strings. This isn't quite the same as UTF-8, but has similar properties; ASCII characters &lt; 127 are
52       * stored in a single byte.
53       */
54      public static final BHSDCodec CHAR3 = new BHSDCodec(3, 128);
55  
56      /**
57       * DELTA5 = (5,64,1,1): Used for the majority of numerical codings where there is a correlated sequence of signed values.
58       */
59      public static final BHSDCodec DELTA5 = new BHSDCodec(5, 64, 1, 1);
60  
61      /**
62       * MDELTA5 = (5,64,2,1): Used for the majority of numerical codings where there is a correlated sequence of signed values, but where most of them are
63       * expected to be non-negative.
64       */
65      public static final BHSDCodec MDELTA5 = new BHSDCodec(5, 64, 2, 1);
66  
67      /**
68       * SIGNED5 = (5,64,1): Used for small signed values.
69       */
70      public static final BHSDCodec SIGNED5 = new BHSDCodec(5, 64, 1);
71  
72      /**
73       * UDELTA5 = (5,64,0,1): Used for the majority of numerical codings where there is a correlated sequence of unsigned values.
74       */
75      public static final BHSDCodec UDELTA5 = new BHSDCodec(5, 64, 0, 1);
76  
77      /**
78       * UNSIGNED5 = (5,64): Used for small unsigned values.
79       */
80      public static final BHSDCodec UNSIGNED5 = new BHSDCodec(5, 64);
81  
82      public int lastBandLength;
83  
84      int check(final int n, final InputStream in) throws Pack200Exception {
85          if (in instanceof BoundedInputStream) {
86              final BoundedInputStream bin = (BoundedInputStream) in;
87              final long count = bin.getCount();
88              final long maxLength = bin.getMaxLength();
89              if (maxLength > -1) {
90                  final long remaining = maxLength - count;
91                  final String format = "Can't read beyond end of stream (n = %,d, count = %,d, maxLength = %,d, remaining = %,d)";
92                  if (count < -1) {
93                      throw new Pack200Exception(String.format(format, n, count, maxLength, remaining));
94                  }
95                  if (n > remaining) {
96                      throw new Pack200Exception(String.format(format, n, count, maxLength, remaining));
97                  }
98              }
99          }
100         return n;
101     }
102 
103     /**
104      * Decodes a sequence of bytes from the given input stream, returning the value as a long. Note that this method can only be applied for non-delta
105      * encodings.
106      *
107      * @param in the input stream to read from
108      * @return the value as a long
109      * @throws IOException      if there is a problem reading from the underlying input stream
110      * @throws Pack200Exception if the encoding is a delta encoding
111      */
112     public abstract int decode(InputStream in) throws IOException, Pack200Exception;
113 
114     /**
115      * Decodes a sequence of bytes from the given input stream, returning the value as a long. If this encoding is a delta encoding (d=1) then the previous
116      * value must be passed in as a parameter. If it is a non-delta encoding, then it does not matter what value is passed in, so it makes sense for the value
117      * to be passed in by default using code similar to:
118      *
119      * <pre>
120      * long last = 0;
121      * while (condition) {
122      *     last = codec.decode(in, last);
123      *     // do something with last
124      * }
125      * </pre>
126      *
127      * @param in   the input stream to read from
128      * @param last the previous value read, which must be supplied if the codec is a delta encoding
129      * @return the value as a long
130      * @throws IOException      if there is a problem reading from the underlying input stream
131      * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
132      */
133     public abstract int decode(InputStream in, long last) throws IOException, Pack200Exception;
134 
135     /**
136      * Decodes a sequence of {@code n} values from {@code in}. This should probably be used in most cases, since some codecs (such as {@link PopulationCodec})
137      * only work when the number of values to be read is known.
138      *
139      * @param n  the number of values to decode
140      * @param in the input stream to read from
141      * @return an array of {@code int} values corresponding to values decoded
142      * @throws IOException      if there is a problem reading from the underlying input stream
143      * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
144      */
145     public int[] decodeInts(final int n, final InputStream in) throws IOException, Pack200Exception {
146         lastBandLength = 0;
147         final int[] result = new int[check(n, in)];
148         int last = 0;
149         for (int i = 0; i < n; i++) {
150             result[i] = last = decode(in, last);
151         }
152         return result;
153     }
154 
155     /**
156      * Decodes a sequence of {@code n} values from {@code in}.
157      *
158      * @param n          the number of values to decode
159      * @param in         the input stream to read from
160      * @param firstValue the first value in the band if it has already been read
161      * @return an array of {@code int} values corresponding to values decoded, with firstValue as the first value in the array.
162      * @throws IOException      if there is a problem reading from the underlying input stream
163      * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
164      */
165     public int[] decodeInts(final int n, final InputStream in, final int firstValue) throws IOException, Pack200Exception {
166         final int[] result = new int[check(n, in) + 1];
167         result[0] = firstValue;
168         int last = firstValue;
169         for (int i = 1; i < n + 1; i++) {
170             result[i] = last = decode(in, last);
171         }
172         return result;
173     }
174 
175     /**
176      * Encodes a single value into a sequence of bytes. Note that this method can only be used for non-delta encodings.
177      *
178      * @param value the value to encode
179      * @return the encoded bytes
180      * @throws Pack200Exception TODO
181      */
182     public abstract byte[] encode(int value) throws Pack200Exception;
183 
184     /**
185      * Encodes a single value into a sequence of bytes.
186      *
187      * @param value the value to encode
188      * @param last  the previous value encoded (for delta encodings)
189      * @return the encoded bytes
190      * @throws Pack200Exception TODO
191      */
192     public abstract byte[] encode(int value, int last) throws Pack200Exception;
193 
194     /**
195      * Encodes a sequence of integers into a byte array
196      *
197      * @param ints the values to encode
198      * @return byte[] encoded bytes
199      * @throws Pack200Exception if there is a problem encoding any of the values
200      */
201     public byte[] encode(final int[] ints) throws Pack200Exception {
202         int total = 0;
203         final byte[][] bytes = new byte[ints.length][];
204         for (int i = 0; i < ints.length; i++) {
205             bytes[i] = encode(ints[i], i > 0 ? ints[i - 1] : 0);
206             total += bytes[i].length;
207         }
208         final byte[] encoded = new byte[total];
209         int index = 0;
210         for (final byte[] element : bytes) {
211             System.arraycopy(element, 0, encoded, index, element.length);
212             index += element.length;
213         }
214         return encoded;
215     }
216 }