001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.harmony.pack200; 020 021import java.io.IOException; 022import java.io.InputStream; 023 024import org.apache.commons.io.input.BoundedInputStream; 025 026/** 027 * A Codec allows a sequence of bytes to be decoded into integer values (or vice versa). 028 * <p> 029 * There are a number of standard Codecs ({@link #UDELTA5}, {@link #UNSIGNED5}, {@link #BYTE1}, {@link #CHAR3}) that are used in the implementation of many 030 * bands; but there are a variety of other ones, and indeed the specification assumes that other combinations of values can result in more specific and 031 * efficient formats. There are also a sequence of canonical encodings defined by the Pack200 specification, which allow a Codec to be referred to by canonical 032 * number. {@link CodecEncoding#getCodec(int, InputStream, Codec)}) 033 * </p> 034 */ 035public abstract class Codec { 036 037 /** 038 * BCI5 = (5,4): Used for storing branching information in bytecode. 039 */ 040 public static final BHSDCodec BCI5 = new BHSDCodec(5, 4); 041 042 /** 043 * BRANCH5 = (5,4,2): Used for storing branching information in bytecode. 044 */ 045 public static final BHSDCodec BRANCH5 = new BHSDCodec(5, 4, 2); 046 047 /** 048 * BYTE1 = (1,256): Used for storing plain bytes. 049 */ 050 public static final BHSDCodec BYTE1 = new BHSDCodec(1, 256); 051 052 /** 053 * CHAR3 = (3,128): Used for storing text (UTF-8) strings. This isn't quite the same as UTF-8, but has similar properties; ASCII characters < 127 are 054 * stored in a single byte. 055 */ 056 public static final BHSDCodec CHAR3 = new BHSDCodec(3, 128); 057 058 /** 059 * DELTA5 = (5,64,1,1): Used for the majority of numerical codings where there is a correlated sequence of signed values. 060 */ 061 public static final BHSDCodec DELTA5 = new BHSDCodec(5, 64, 1, 1); 062 063 /** 064 * MDELTA5 = (5,64,2,1): Used for the majority of numerical codings where there is a correlated sequence of signed values, but where most of them are 065 * expected to be non-negative. 066 */ 067 public static final BHSDCodec MDELTA5 = new BHSDCodec(5, 64, 2, 1); 068 069 /** 070 * SIGNED5 = (5,64,1): Used for small signed values. 071 */ 072 public static final BHSDCodec SIGNED5 = new BHSDCodec(5, 64, 1); 073 074 /** 075 * UDELTA5 = (5,64,0,1): Used for the majority of numerical codings where there is a correlated sequence of unsigned values. 076 */ 077 public static final BHSDCodec UDELTA5 = new BHSDCodec(5, 64, 0, 1); 078 079 /** 080 * UNSIGNED5 = (5,64): Used for small unsigned values. 081 */ 082 public static final BHSDCodec UNSIGNED5 = new BHSDCodec(5, 64); 083 084 public int lastBandLength; 085 086 int check(final int n, final InputStream in) throws Pack200Exception { 087 if (in instanceof BoundedInputStream) { 088 final BoundedInputStream bin = (BoundedInputStream) in; 089 final long count = bin.getCount(); 090 final long maxLength = bin.getMaxCount(); 091 if (maxLength > -1) { 092 final long remaining = maxLength - count; 093 final String format = "Can't read beyond end of stream (n = %,d, count = %,d, maxLength = %,d, remaining = %,d)"; 094 if (count < -1 || n > remaining) { 095 throw new Pack200Exception(String.format(format, n, count, maxLength, remaining)); 096 } 097 } 098 } 099 return n; 100 } 101 102 /** 103 * Decodes a sequence of bytes from the given input stream, returning the value as a long. Note that this method can only be applied for non-delta 104 * encodings. 105 * 106 * @param in the input stream to read from 107 * @return the value as a long 108 * @throws IOException if there is a problem reading from the underlying input stream 109 * @throws Pack200Exception if the encoding is a delta encoding 110 */ 111 public abstract int decode(InputStream in) throws IOException, Pack200Exception; 112 113 /** 114 * Decodes a sequence of bytes from the given input stream, returning the value as a long. If this encoding is a delta encoding (d=1) then the previous 115 * value must be passed in as a parameter. If it is a non-delta encoding, then it does not matter what value is passed in, so it makes sense for the value 116 * to be passed in by default using code similar to: 117 * 118 * <pre> 119 * long last = 0; 120 * while (condition) { 121 * last = codec.decode(in, last); 122 * // do something with last 123 * } 124 * </pre> 125 * 126 * @param in the input stream to read from 127 * @param last the previous value read, which must be supplied if the codec is a delta encoding 128 * @return the value as a long 129 * @throws IOException if there is a problem reading from the underlying input stream 130 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid 131 */ 132 public abstract int decode(InputStream in, long last) throws IOException, Pack200Exception; 133 134 /** 135 * Decodes a sequence of {@code n} values from {@code in}. This should probably be used in most cases, since some codecs (such as {@link PopulationCodec}) 136 * only work when the number of values to be read is known. 137 * 138 * @param n the number of values to decode 139 * @param in the input stream to read from 140 * @return an array of {@code int} values corresponding to values decoded 141 * @throws IOException if there is a problem reading from the underlying input stream 142 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid 143 */ 144 public int[] decodeInts(final int n, final InputStream in) throws IOException, Pack200Exception { 145 lastBandLength = 0; 146 final int[] result = new int[check(n, in)]; 147 int last = 0; 148 for (int i = 0; i < n; i++) { 149 result[i] = last = decode(in, last); 150 } 151 return result; 152 } 153 154 /** 155 * Decodes a sequence of {@code n} values from {@code in}. 156 * 157 * @param n the number of values to decode 158 * @param in the input stream to read from 159 * @param firstValue the first value in the band if it has already been read 160 * @return an array of {@code int} values corresponding to values decoded, with firstValue as the first value in the array. 161 * @throws IOException if there is a problem reading from the underlying input stream 162 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid 163 */ 164 public int[] decodeInts(final int n, final InputStream in, final int firstValue) throws IOException, Pack200Exception { 165 final int[] result = new int[check(n, in) + 1]; 166 result[0] = firstValue; 167 int last = firstValue; 168 for (int i = 1; i < n + 1; i++) { 169 result[i] = last = decode(in, last); 170 } 171 return result; 172 } 173 174 /** 175 * Encodes a single value into a sequence of bytes. Note that this method can only be used for non-delta encodings. 176 * 177 * @param value the value to encode. 178 * @return the encoded bytes. 179 * @throws Pack200Exception If a Pack200 semantic error occurs. 180 */ 181 public abstract byte[] encode(int value) throws Pack200Exception; 182 183 /** 184 * Encodes a single value into a sequence of bytes. 185 * 186 * @param value the value to encode. 187 * @param last the previous value encoded (for delta encodings). 188 * @return the encoded bytes. 189 * @throws Pack200Exception If a Pack200 semantic error occurs. 190 */ 191 public abstract byte[] encode(int value, int last) throws Pack200Exception; 192 193 /** 194 * Encodes a sequence of integers into a byte array. 195 * 196 * @param ints the values to encode. 197 * @return byte[] encoded bytes. 198 * @throws Pack200Exception if there is a problem encoding any of the values. 199 */ 200 public byte[] encode(final int[] ints) throws Pack200Exception { 201 int total = 0; 202 final byte[][] bytes = new byte[ints.length][]; 203 for (int i = 0; i < ints.length; i++) { 204 bytes[i] = encode(ints[i], i > 0 ? ints[i - 1] : 0); 205 total += bytes[i].length; 206 } 207 final byte[] encoded = new byte[total]; 208 int index = 0; 209 for (final byte[] element : bytes) { 210 System.arraycopy(element, 0, encoded, index, element.length); 211 index += element.length; 212 } 213 return encoded; 214 } 215}