Codec.java

  1. /*
  2.  *  Licensed to the Apache Software Foundation (ASF) under one or more
  3.  *  contributor license agreements.  See the NOTICE file distributed with
  4.  *  this work for additional information regarding copyright ownership.
  5.  *  The ASF licenses this file to You under the Apache License, Version 2.0
  6.  *  (the "License"); you may not use this file except in compliance with
  7.  *  the License.  You may obtain a copy of the License at
  8.  *
  9.  *     http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  *  Unless required by applicable law or agreed to in writing, software
  12.  *  distributed under the License is distributed on an "AS IS" BASIS,
  13.  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  *  See the License for the specific language governing permissions and
  15.  *  limitations under the License.
  16.  */
  17. package org.apache.commons.compress.harmony.pack200;

  18. import java.io.IOException;
  19. import java.io.InputStream;

  20. import org.apache.commons.io.input.BoundedInputStream;

  21. /**
  22.  * A Codec allows a sequence of bytes to be decoded into integer values (or vice versa).
  23.  * <p>
  24.  * There are a number of standard Codecs ({@link #UDELTA5}, {@link #UNSIGNED5}, {@link #BYTE1}, {@link #CHAR3}) that are used in the implementation of many
  25.  * bands; but there are a variety of other ones, and indeed the specification assumes that other combinations of values can result in more specific and
  26.  * efficient formats. There are also a sequence of canonical encodings defined by the Pack200 specification, which allow a Codec to be referred to by canonical
  27.  * number. {@link CodecEncoding#getCodec(int, InputStream, Codec)})
  28.  * </p>
  29.  */
  30. public abstract class Codec {

  31.     /**
  32.      * BCI5 = (5,4): Used for storing branching information in bytecode.
  33.      */
  34.     public static final BHSDCodec BCI5 = new BHSDCodec(5, 4);

  35.     /**
  36.      * BRANCH5 = (5,4,2): Used for storing branching information in bytecode.
  37.      */
  38.     public static final BHSDCodec BRANCH5 = new BHSDCodec(5, 4, 2);

  39.     /**
  40.      * BYTE1 = (1,256): Used for storing plain bytes.
  41.      */
  42.     public static final BHSDCodec BYTE1 = new BHSDCodec(1, 256);

  43.     /**
  44.      * CHAR3 = (3,128): Used for storing text (UTF-8) strings. This isn't quite the same as UTF-8, but has similar properties; ASCII characters &lt; 127 are
  45.      * stored in a single byte.
  46.      */
  47.     public static final BHSDCodec CHAR3 = new BHSDCodec(3, 128);

  48.     /**
  49.      * DELTA5 = (5,64,1,1): Used for the majority of numerical codings where there is a correlated sequence of signed values.
  50.      */
  51.     public static final BHSDCodec DELTA5 = new BHSDCodec(5, 64, 1, 1);

  52.     /**
  53.      * MDELTA5 = (5,64,2,1): Used for the majority of numerical codings where there is a correlated sequence of signed values, but where most of them are
  54.      * expected to be non-negative.
  55.      */
  56.     public static final BHSDCodec MDELTA5 = new BHSDCodec(5, 64, 2, 1);

  57.     /**
  58.      * SIGNED5 = (5,64,1): Used for small signed values.
  59.      */
  60.     public static final BHSDCodec SIGNED5 = new BHSDCodec(5, 64, 1);

  61.     /**
  62.      * UDELTA5 = (5,64,0,1): Used for the majority of numerical codings where there is a correlated sequence of unsigned values.
  63.      */
  64.     public static final BHSDCodec UDELTA5 = new BHSDCodec(5, 64, 0, 1);

  65.     /**
  66.      * UNSIGNED5 = (5,64): Used for small unsigned values.
  67.      */
  68.     public static final BHSDCodec UNSIGNED5 = new BHSDCodec(5, 64);

  69.     public int lastBandLength;

  70.     int check(final int n, final InputStream in) throws Pack200Exception {
  71.         if (in instanceof BoundedInputStream) {
  72.             final BoundedInputStream bin = (BoundedInputStream) in;
  73.             final long count = bin.getCount();
  74.             final long maxLength = bin.getMaxCount();
  75.             if (maxLength > -1) {
  76.                 final long remaining = maxLength - count;
  77.                 final String format = "Can't read beyond end of stream (n = %,d, count = %,d, maxLength = %,d, remaining = %,d)";
  78.                 if (count < -1) {
  79.                     throw new Pack200Exception(String.format(format, n, count, maxLength, remaining));
  80.                 }
  81.                 if (n > remaining) {
  82.                     throw new Pack200Exception(String.format(format, n, count, maxLength, remaining));
  83.                 }
  84.             }
  85.         }
  86.         return n;
  87.     }

  88.     /**
  89.      * Decodes a sequence of bytes from the given input stream, returning the value as a long. Note that this method can only be applied for non-delta
  90.      * encodings.
  91.      *
  92.      * @param in the input stream to read from
  93.      * @return the value as a long
  94.      * @throws IOException      if there is a problem reading from the underlying input stream
  95.      * @throws Pack200Exception if the encoding is a delta encoding
  96.      */
  97.     public abstract int decode(InputStream in) throws IOException, Pack200Exception;

  98.     /**
  99.      * Decodes a sequence of bytes from the given input stream, returning the value as a long. If this encoding is a delta encoding (d=1) then the previous
  100.      * value must be passed in as a parameter. If it is a non-delta encoding, then it does not matter what value is passed in, so it makes sense for the value
  101.      * to be passed in by default using code similar to:
  102.      *
  103.      * <pre>
  104.      * long last = 0;
  105.      * while (condition) {
  106.      *     last = codec.decode(in, last);
  107.      *     // do something with last
  108.      * }
  109.      * </pre>
  110.      *
  111.      * @param in   the input stream to read from
  112.      * @param last the previous value read, which must be supplied if the codec is a delta encoding
  113.      * @return the value as a long
  114.      * @throws IOException      if there is a problem reading from the underlying input stream
  115.      * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
  116.      */
  117.     public abstract int decode(InputStream in, long last) throws IOException, Pack200Exception;

  118.     /**
  119.      * Decodes a sequence of {@code n} values from {@code in}. This should probably be used in most cases, since some codecs (such as {@link PopulationCodec})
  120.      * only work when the number of values to be read is known.
  121.      *
  122.      * @param n  the number of values to decode
  123.      * @param in the input stream to read from
  124.      * @return an array of {@code int} values corresponding to values decoded
  125.      * @throws IOException      if there is a problem reading from the underlying input stream
  126.      * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
  127.      */
  128.     public int[] decodeInts(final int n, final InputStream in) throws IOException, Pack200Exception {
  129.         lastBandLength = 0;
  130.         final int[] result = new int[check(n, in)];
  131.         int last = 0;
  132.         for (int i = 0; i < n; i++) {
  133.             result[i] = last = decode(in, last);
  134.         }
  135.         return result;
  136.     }

  137.     /**
  138.      * Decodes a sequence of {@code n} values from {@code in}.
  139.      *
  140.      * @param n          the number of values to decode
  141.      * @param in         the input stream to read from
  142.      * @param firstValue the first value in the band if it has already been read
  143.      * @return an array of {@code int} values corresponding to values decoded, with firstValue as the first value in the array.
  144.      * @throws IOException      if there is a problem reading from the underlying input stream
  145.      * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid
  146.      */
  147.     public int[] decodeInts(final int n, final InputStream in, final int firstValue) throws IOException, Pack200Exception {
  148.         final int[] result = new int[check(n, in) + 1];
  149.         result[0] = firstValue;
  150.         int last = firstValue;
  151.         for (int i = 1; i < n + 1; i++) {
  152.             result[i] = last = decode(in, last);
  153.         }
  154.         return result;
  155.     }

  156.     /**
  157.      * Encodes a single value into a sequence of bytes. Note that this method can only be used for non-delta encodings.
  158.      *
  159.      * @param value the value to encode
  160.      * @return the encoded bytes
  161.      * @throws Pack200Exception TODO
  162.      */
  163.     public abstract byte[] encode(int value) throws Pack200Exception;

  164.     /**
  165.      * Encodes a single value into a sequence of bytes.
  166.      *
  167.      * @param value the value to encode
  168.      * @param last  the previous value encoded (for delta encodings)
  169.      * @return the encoded bytes
  170.      * @throws Pack200Exception TODO
  171.      */
  172.     public abstract byte[] encode(int value, int last) throws Pack200Exception;

  173.     /**
  174.      * Encodes a sequence of integers into a byte array
  175.      *
  176.      * @param ints the values to encode
  177.      * @return byte[] encoded bytes
  178.      * @throws Pack200Exception if there is a problem encoding any of the values
  179.      */
  180.     public byte[] encode(final int[] ints) throws Pack200Exception {
  181.         int total = 0;
  182.         final byte[][] bytes = new byte[ints.length][];
  183.         for (int i = 0; i < ints.length; i++) {
  184.             bytes[i] = encode(ints[i], i > 0 ? ints[i - 1] : 0);
  185.             total += bytes[i].length;
  186.         }
  187.         final byte[] encoded = new byte[total];
  188.         int index = 0;
  189.         for (final byte[] element : bytes) {
  190.             System.arraycopy(element, 0, encoded, index, element.length);
  191.             index += element.length;
  192.         }
  193.         return encoded;
  194.     }
  195. }