001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.binary;
019
020 import org.apache.commons.codec.BinaryDecoder;
021 import org.apache.commons.codec.BinaryEncoder;
022 import org.apache.commons.codec.DecoderException;
023 import org.apache.commons.codec.EncoderException;
024
025 /**
026 * Converts between byte arrays and strings of "0"s and "1"s.
027 *
028 * <p>This class is immutable and thread-safe.</p>
029 *
030 * TODO: may want to add more bit vector functions like and/or/xor/nand
031 * TODO: also might be good to generate boolean[] from byte[] et cetera.
032 *
033 * @since 1.3
034 * @version $Id: BinaryCodec.html 889935 2013-12-11 05:05:13Z ggregory $
035 */
036 public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
037 /*
038 * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth
039 * it.
040 */
041 /** Empty char array. */
042 private static final char[] EMPTY_CHAR_ARRAY = new char[0];
043
044 /** Empty byte array. */
045 private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
046
047 /** Mask for bit 0 of a byte. */
048 private static final int BIT_0 = 1;
049
050 /** Mask for bit 1 of a byte. */
051 private static final int BIT_1 = 0x02;
052
053 /** Mask for bit 2 of a byte. */
054 private static final int BIT_2 = 0x04;
055
056 /** Mask for bit 3 of a byte. */
057 private static final int BIT_3 = 0x08;
058
059 /** Mask for bit 4 of a byte. */
060 private static final int BIT_4 = 0x10;
061
062 /** Mask for bit 5 of a byte. */
063 private static final int BIT_5 = 0x20;
064
065 /** Mask for bit 6 of a byte. */
066 private static final int BIT_6 = 0x40;
067
068 /** Mask for bit 7 of a byte. */
069 private static final int BIT_7 = 0x80;
070
071 private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7};
072
073 /**
074 * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
075 *
076 * @param raw
077 * the raw binary data to convert
078 * @return 0 and 1 ASCII character bytes one for each bit of the argument
079 * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
080 */
081 @Override
082 public byte[] encode(byte[] raw) {
083 return toAsciiBytes(raw);
084 }
085
086 /**
087 * Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
088 *
089 * @param raw
090 * the raw binary data to convert
091 * @return 0 and 1 ASCII character chars one for each bit of the argument
092 * @throws EncoderException
093 * if the argument is not a byte[]
094 * @see org.apache.commons.codec.Encoder#encode(Object)
095 */
096 @Override
097 public Object encode(Object raw) throws EncoderException {
098 if (!(raw instanceof byte[])) {
099 throw new EncoderException("argument not a byte array");
100 }
101 return toAsciiChars((byte[]) raw);
102 }
103
104 /**
105 * Decodes a byte array where each byte represents an ASCII '0' or '1'.
106 *
107 * @param ascii
108 * each byte represents an ASCII '0' or '1'
109 * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
110 * @throws DecoderException
111 * if argument is not a byte[], char[] or String
112 * @see org.apache.commons.codec.Decoder#decode(Object)
113 */
114 @Override
115 public Object decode(Object ascii) throws DecoderException {
116 if (ascii == null) {
117 return EMPTY_BYTE_ARRAY;
118 }
119 if (ascii instanceof byte[]) {
120 return fromAscii((byte[]) ascii);
121 }
122 if (ascii instanceof char[]) {
123 return fromAscii((char[]) ascii);
124 }
125 if (ascii instanceof String) {
126 return fromAscii(((String) ascii).toCharArray());
127 }
128 throw new DecoderException("argument not a byte array");
129 }
130
131 /**
132 * Decodes a byte array where each byte represents an ASCII '0' or '1'.
133 *
134 * @param ascii
135 * each byte represents an ASCII '0' or '1'
136 * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
137 * @see org.apache.commons.codec.Decoder#decode(Object)
138 */
139 @Override
140 public byte[] decode(byte[] ascii) {
141 return fromAscii(ascii);
142 }
143
144 /**
145 * Decodes a String where each char of the String represents an ASCII '0' or '1'.
146 *
147 * @param ascii
148 * String of '0' and '1' characters
149 * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
150 * @see org.apache.commons.codec.Decoder#decode(Object)
151 */
152 public byte[] toByteArray(String ascii) {
153 if (ascii == null) {
154 return EMPTY_BYTE_ARRAY;
155 }
156 return fromAscii(ascii.toCharArray());
157 }
158
159 // ------------------------------------------------------------------------
160 //
161 // static codec operations
162 //
163 // ------------------------------------------------------------------------
164 /**
165 * Decodes a char array where each char represents an ASCII '0' or '1'.
166 *
167 * @param ascii
168 * each char represents an ASCII '0' or '1'
169 * @return the raw encoded binary where each bit corresponds to a char in the char array argument
170 */
171 public static byte[] fromAscii(char[] ascii) {
172 if (ascii == null || ascii.length == 0) {
173 return EMPTY_BYTE_ARRAY;
174 }
175 // get length/8 times bytes with 3 bit shifts to the right of the length
176 byte[] l_raw = new byte[ascii.length >> 3];
177 /*
178 * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
179 * loop.
180 */
181 for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
182 for (int bits = 0; bits < BITS.length; ++bits) {
183 if (ascii[jj - bits] == '1') {
184 l_raw[ii] |= BITS[bits];
185 }
186 }
187 }
188 return l_raw;
189 }
190
191 /**
192 * Decodes a byte array where each byte represents an ASCII '0' or '1'.
193 *
194 * @param ascii
195 * each byte represents an ASCII '0' or '1'
196 * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
197 */
198 public static byte[] fromAscii(byte[] ascii) {
199 if (isEmpty(ascii)) {
200 return EMPTY_BYTE_ARRAY;
201 }
202 // get length/8 times bytes with 3 bit shifts to the right of the length
203 byte[] l_raw = new byte[ascii.length >> 3];
204 /*
205 * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
206 * loop.
207 */
208 for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
209 for (int bits = 0; bits < BITS.length; ++bits) {
210 if (ascii[jj - bits] == '1') {
211 l_raw[ii] |= BITS[bits];
212 }
213 }
214 }
215 return l_raw;
216 }
217
218 /**
219 * Returns {@code true} if the given array is {@code null} or empty (size 0.)
220 *
221 * @param array
222 * the source array
223 * @return {@code true} if the given array is {@code null} or empty (size 0.)
224 */
225 private static boolean isEmpty(byte[] array) {
226 return array == null || array.length == 0;
227 }
228
229 /**
230 * Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated
231 * char.
232 *
233 * @param raw
234 * the raw binary data to convert
235 * @return an array of 0 and 1 character bytes for each bit of the argument
236 * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
237 */
238 public static byte[] toAsciiBytes(byte[] raw) {
239 if (isEmpty(raw)) {
240 return EMPTY_BYTE_ARRAY;
241 }
242 // get 8 times the bytes with 3 bit shifts to the left of the length
243 byte[] l_ascii = new byte[raw.length << 3];
244 /*
245 * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
246 * loop.
247 */
248 for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
249 for (int bits = 0; bits < BITS.length; ++bits) {
250 if ((raw[ii] & BITS[bits]) == 0) {
251 l_ascii[jj - bits] = '0';
252 } else {
253 l_ascii[jj - bits] = '1';
254 }
255 }
256 }
257 return l_ascii;
258 }
259
260 /**
261 * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
262 *
263 * @param raw
264 * the raw binary data to convert
265 * @return an array of 0 and 1 characters for each bit of the argument
266 * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
267 */
268 public static char[] toAsciiChars(byte[] raw) {
269 if (isEmpty(raw)) {
270 return EMPTY_CHAR_ARRAY;
271 }
272 // get 8 times the bytes with 3 bit shifts to the left of the length
273 char[] l_ascii = new char[raw.length << 3];
274 /*
275 * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
276 * loop.
277 */
278 for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
279 for (int bits = 0; bits < BITS.length; ++bits) {
280 if ((raw[ii] & BITS[bits]) == 0) {
281 l_ascii[jj - bits] = '0';
282 } else {
283 l_ascii[jj - bits] = '1';
284 }
285 }
286 }
287 return l_ascii;
288 }
289
290 /**
291 * Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
292 *
293 * @param raw
294 * the raw binary data to convert
295 * @return a String of 0 and 1 characters representing the binary data
296 * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
297 */
298 public static String toAsciiString(byte[] raw) {
299 return new String(toAsciiChars(raw));
300 }
301 }