View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   https://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.compress.harmony.pack200;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  
24  import org.apache.commons.io.input.BoundedInputStream;
25  
26  /**
27   * A Codec allows a sequence of bytes to be decoded into integer values (or vice versa).
28   * <p>
29   * There are a number of standard Codecs ({@link #UDELTA5}, {@link #UNSIGNED5}, {@link #BYTE1}, {@link #CHAR3}) that are used in the implementation of many
30   * bands; but there are a variety of other ones, and indeed the specification assumes that other combinations of values can result in more specific and
31   * efficient formats. There are also a sequence of canonical encodings defined by the Pack200 specification, which allow a Codec to be referred to by canonical
32   * number. {@link CodecEncoding#getCodec(int, InputStream, Codec)})
33   * </p>
34   */
35  public abstract class Codec {
36  
37      /**
38       * BCI5 = (5,4): Used for storing branching information in bytecode.
39       */
40      public static final BHSDCodec BCI5 = new BHSDCodec(5, 4);
41  
42      /**
43       * BRANCH5 = (5,4,2): Used for storing branching information in bytecode.
44       */
45      public static final BHSDCodec BRANCH5 = new BHSDCodec(5, 4, 2);
46  
47      /**
48       * BYTE1 = (1,256): Used for storing plain bytes.
49       */
50      public static final BHSDCodec BYTE1 = new BHSDCodec(1, 256);
51  
52      /**
53       * CHAR3 = (3,128): Used for storing text (UTF-8) strings. This isn't quite the same as UTF-8, but has similar properties; ASCII characters &lt; 127 are
54       * stored in a single byte.
55       */
56      public static final BHSDCodec CHAR3 = new BHSDCodec(3, 128);
57  
58      /**
59       * DELTA5 = (5,64,1,1): Used for the majority of numerical codings where there is a correlated sequence of signed values.
60       */
61      public static final BHSDCodec DELTA5 = new BHSDCodec(5, 64, 1, 1);
62  
63      /**
64       * MDELTA5 = (5,64,2,1): Used for the majority of numerical codings where there is a correlated sequence of signed values, but where most of them are
65       * expected to be non-negative.
66       */
67      public static final BHSDCodec MDELTA5 = new BHSDCodec(5, 64, 2, 1);
68  
69      /**
70       * SIGNED5 = (5,64,1): Used for small signed values.
71       */
72      public static final BHSDCodec SIGNED5 = new BHSDCodec(5, 64, 1);
73  
74      /**
75       * UDELTA5 = (5,64,0,1): Used for the majority of numerical codings where there is a correlated sequence of unsigned values.
76       */
77      public static final BHSDCodec UDELTA5 = new BHSDCodec(5, 64, 0, 1);
78  
79      /**
80       * UNSIGNED5 = (5,64): Used for small unsigned values.
81       */
82      public static final BHSDCodec UNSIGNED5 = new BHSDCodec(5, 64);
83  
84      public int lastBandLength;
85  
86      int check(final int n, final InputStream in) throws Pack200Exception {
87          if (in instanceof BoundedInputStream) {
88              final BoundedInputStream bin = (BoundedInputStream) in;
89              final long count = bin.getCount();
90              final long maxLength = bin.getMaxCount();
91              if (maxLength > -1) {
92                  final long remaining = maxLength - count;
93                  final String format = "Can't read beyond end of stream (n = %,d, count = %,d, maxLength = %,d, remaining = %,d)";
94                  if (count < -1 || n > remaining) {
95                      throw new Pack200Exception(String.format(format, n, count, maxLength, remaining));
96                  }
97              }
98          }
99          return n;
100     }
101 
102     /**
103      * Decodes a sequence of bytes from the given input stream, returning the value as a long. Note that this method can only be applied for non-delta
104      * encodings.
105      *
106      * @param in the input stream to read from
107      * @return the value as a long
108      * @throws IOException      if there is a problem reading from the underlying input stream
109      * @throws Pack200Exception if the encoding is a delta encoding
110      */
111     public abstract int decode(InputStream in) throws IOException, Pack200Exception;
112 
113     /**
114      * Decodes a sequence of bytes from the given input stream, returning the value as a long. If this encoding is a delta encoding (d=1) then the previous
115      * value must be passed in as a parameter. If it is a non-delta encoding, then it does not matter what value is passed in, so it makes sense for the value
116      * to be passed in by default using code similar to:
117      *
118      * <pre>
119      * long last = 0;
120      * while (condition) {
121      *     last = codec.decode(in, last);
122      *     // do something with last
123      * }
124      * </pre>
125      *
126      * @param in   the input stream to read from
127      * @param last the previous value read, which must be supplied if the codec is a delta encoding
128      * @return the value as a long
129      * @throws IOException      if there is a problem reading from the underlying input stream
130      * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
131      */
132     public abstract int decode(InputStream in, long last) throws IOException, Pack200Exception;
133 
134     /**
135      * Decodes a sequence of {@code n} values from {@code in}. This should probably be used in most cases, since some codecs (such as {@link PopulationCodec})
136      * only work when the number of values to be read is known.
137      *
138      * @param n  the number of values to decode
139      * @param in the input stream to read from
140      * @return an array of {@code int} values corresponding to values decoded
141      * @throws IOException      if there is a problem reading from the underlying input stream
142      * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
143      */
144     public int[] decodeInts(final int n, final InputStream in) throws IOException, Pack200Exception {
145         lastBandLength = 0;
146         final int[] result = new int[check(n, in)];
147         int last = 0;
148         for (int i = 0; i < n; i++) {
149             result[i] = last = decode(in, last);
150         }
151         return result;
152     }
153 
154     /**
155      * Decodes a sequence of {@code n} values from {@code in}.
156      *
157      * @param n          the number of values to decode
158      * @param in         the input stream to read from
159      * @param firstValue the first value in the band if it has already been read
160      * @return an array of {@code int} values corresponding to values decoded, with firstValue as the first value in the array.
161      * @throws IOException      if there is a problem reading from the underlying input stream
162      * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
163      */
164     public int[] decodeInts(final int n, final InputStream in, final int firstValue) throws IOException, Pack200Exception {
165         final int[] result = new int[check(n, in) + 1];
166         result[0] = firstValue;
167         int last = firstValue;
168         for (int i = 1; i < n + 1; i++) {
169             result[i] = last = decode(in, last);
170         }
171         return result;
172     }
173 
174     /**
175      * Encodes a single value into a sequence of bytes. Note that this method can only be used for non-delta encodings.
176      *
177      * @param value the value to encode.
178      * @return the encoded bytes.
179      * @throws Pack200Exception If a Pack200 semantic error occurs.
180      */
181     public abstract byte[] encode(int value) throws Pack200Exception;
182 
183     /**
184      * Encodes a single value into a sequence of bytes.
185      *
186      * @param value the value to encode.
187      * @param last  the previous value encoded (for delta encodings).
188      * @return the encoded bytes.
189      * @throws Pack200Exception If a Pack200 semantic error occurs.
190      */
191     public abstract byte[] encode(int value, int last) throws Pack200Exception;
192 
193     /**
194      * Encodes a sequence of integers into a byte array.
195      *
196      * @param ints the values to encode.
197      * @return byte[] encoded bytes.
198      * @throws Pack200Exception if there is a problem encoding any of the values.
199      */
200     public byte[] encode(final int[] ints) throws Pack200Exception {
201         int total = 0;
202         final byte[][] bytes = new byte[ints.length][];
203         for (int i = 0; i < ints.length; i++) {
204             bytes[i] = encode(ints[i], i > 0 ? ints[i - 1] : 0);
205             total += bytes[i].length;
206         }
207         final byte[] encoded = new byte[total];
208         int index = 0;
209         for (final byte[] element : bytes) {
210             System.arraycopy(element, 0, encoded, index, element.length);
211             index += element.length;
212         }
213         return encoded;
214     }
215 }