1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.binary;
19
20 import org.apache.commons.codec.BinaryDecoder;
21 import org.apache.commons.codec.BinaryEncoder;
22 import org.apache.commons.codec.DecoderException;
23 import org.apache.commons.codec.EncoderException;
24
25 /**
26 * Converts between byte arrays and strings of "0"s and "1"s.
27 *
28 * <p>
29 * This class is immutable and thread-safe.
30 * </p>
31 *
32 * TODO: may want to add more bit vector functions like and/or/xor/nand TODO: also might be good to generate boolean[] from byte[] et cetera.
33 *
34 * @since 1.3
35 */
36 public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
37
38 /*
39 * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth it.
40 */
41
42 /** Empty char array. */
43 private static final char[] EMPTY_CHAR_ARRAY = {};
44
45 /** Empty byte array. */
46 private static final byte[] EMPTY_BYTE_ARRAY = {};
47
48 /** Mask for bit 0 of a byte. */
49 private static final int BIT_0 = 1;
50
51 /** Mask for bit 1 of a byte. */
52 private static final int BIT_1 = 0x02;
53
54 /** Mask for bit 2 of a byte. */
55 private static final int BIT_2 = 0x04;
56
57 /** Mask for bit 3 of a byte. */
58 private static final int BIT_3 = 0x08;
59
60 /** Mask for bit 4 of a byte. */
61 private static final int BIT_4 = 0x10;
62
63 /** Mask for bit 5 of a byte. */
64 private static final int BIT_5 = 0x20;
65
66 /** Mask for bit 6 of a byte. */
67 private static final int BIT_6 = 0x40;
68
69 /** Mask for bit 7 of a byte. */
70 private static final int BIT_7 = 0x80;
71
72 private static final int[] BITS = { BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7 };
73
74 /**
75 * Decodes a byte array where each byte represents an ASCII '0' or '1'.
76 *
77 * @param ascii each byte represents an ASCII '0' or '1'.
78 * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument.
79 */
80 public static byte[] fromAscii(final byte[] ascii) {
81 if (isEmpty(ascii)) {
82 return EMPTY_BYTE_ARRAY;
83 }
84 final int asciiLength = ascii.length;
85 // get length/8 times bytes with 3 bit shifts to the right of the length
86 final byte[] raw = new byte[asciiLength >> 3];
87 /*
88 * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the loop.
89 */
90 for (int ii = 0, jj = asciiLength - 1; ii < raw.length; ii++, jj -= 8) {
91 for (int bits = 0; bits < BITS.length; ++bits) {
92 if (ascii[jj - bits] == '1') {
93 raw[ii] |= BITS[bits];
94 }
95 }
96 }
97 return raw;
98 }
99
100 /**
101 * Decodes a char array where each char represents an ASCII '0' or '1'.
102 *
103 * @param ascii each char represents an ASCII '0' or '1'.
104 * @return the raw encoded binary where each bit corresponds to a char in the char array argument.
105 */
106 public static byte[] fromAscii(final char[] ascii) {
107 if (ascii == null || ascii.length == 0) {
108 return EMPTY_BYTE_ARRAY;
109 }
110 final int asciiLength = ascii.length;
111 // get length/8 times bytes with 3 bit shifts to the right of the length
112 final byte[] raw = new byte[asciiLength >> 3];
113 /*
114 * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the loop.
115 */
116 for (int ii = 0, jj = asciiLength - 1; ii < raw.length; ii++, jj -= 8) {
117 for (int bits = 0; bits < BITS.length; ++bits) {
118 if (ascii[jj - bits] == '1') {
119 raw[ii] |= BITS[bits];
120 }
121 }
122 }
123 return raw;
124 }
125
126 /**
127 * Returns {@code true} if the given array is {@code null} or empty (size 0.)
128 *
129 * @param array the source array.
130 * @return {@code true} if the given array is {@code null} or empty (size 0.)
131 */
132 static boolean isEmpty(final byte[] array) {
133 return array == null || array.length == 0;
134 }
135
136 /**
137 * Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated char.
138 *
139 * @param raw the raw binary data to convert.
140 * @return an array of 0 and 1 character bytes for each bit of the argument.
141 * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
142 */
143 public static byte[] toAsciiBytes(final byte[] raw) {
144 if (isEmpty(raw)) {
145 return EMPTY_BYTE_ARRAY;
146 }
147 final int rawLength = raw.length;
148 // get 8 times the bytes with 3 bit shifts to the left of the length
149 final byte[] ascii = new byte[rawLength << 3];
150 /*
151 * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the loop.
152 */
153 for (int ii = 0, jj = ascii.length - 1; ii < rawLength; ii++, jj -= 8) {
154 for (int bits = 0; bits < BITS.length; ++bits) {
155 if ((raw[ii] & BITS[bits]) == 0) {
156 ascii[jj - bits] = '0';
157 } else {
158 ascii[jj - bits] = '1';
159 }
160 }
161 }
162 return ascii;
163 }
164
165 /**
166 * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
167 *
168 * @param raw the raw binary data to convert.
169 * @return an array of 0 and 1 characters for each bit of the argument.
170 * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
171 */
172 public static char[] toAsciiChars(final byte[] raw) {
173 if (isEmpty(raw)) {
174 return EMPTY_CHAR_ARRAY;
175 }
176 final int rawLength = raw.length;
177 // get 8 times the bytes with 3 bit shifts to the left of the length
178 final char[] ascii = new char[rawLength << 3];
179 /*
180 * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the loop.
181 */
182 for (int ii = 0, jj = ascii.length - 1; ii < rawLength; ii++, jj -= 8) {
183 for (int bits = 0; bits < BITS.length; ++bits) {
184 if ((raw[ii] & BITS[bits]) == 0) {
185 ascii[jj - bits] = '0';
186 } else {
187 ascii[jj - bits] = '1';
188 }
189 }
190 }
191 return ascii;
192 }
193
194 /**
195 * Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
196 *
197 * @param raw the raw binary data to convert.
198 * @return a String of 0 and 1 characters representing the binary data.
199 * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
200 */
201 public static String toAsciiString(final byte[] raw) {
202 return new String(toAsciiChars(raw));
203 }
204
205 /**
206 * Constructs a new instance.
207 */
208 public BinaryCodec() {
209 // empty
210 }
211
212 /**
213 * Decodes a byte array where each byte represents an ASCII '0' or '1'.
214 *
215 * @param ascii each byte represents an ASCII '0' or '1'.
216 * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument.
217 * @see org.apache.commons.codec.Decoder#decode(Object)
218 */
219 @Override
220 public byte[] decode(final byte[] ascii) {
221 return fromAscii(ascii);
222 }
223
224 /**
225 * Decodes a byte array where each byte represents an ASCII '0' or '1'.
226 *
227 * @param ascii each byte represents an ASCII '0' or '1'.
228 * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
229 * @throws DecoderException if argument is not a byte[], char[] or String.
230 * @see org.apache.commons.codec.Decoder#decode(Object)
231 */
232 @Override
233 public Object decode(final Object ascii) throws DecoderException {
234 if (ascii == null) {
235 return EMPTY_BYTE_ARRAY;
236 }
237 if (ascii instanceof byte[]) {
238 return fromAscii((byte[]) ascii);
239 }
240 if (ascii instanceof char[]) {
241 return fromAscii((char[]) ascii);
242 }
243 if (ascii instanceof String) {
244 return fromAscii(((String) ascii).toCharArray());
245 }
246 throw new DecoderException("argument not a byte array");
247 }
248
249 /**
250 * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
251 *
252 * @param raw the raw binary data to convert.
253 * @return 0 and 1 ASCII character bytes one for each bit of the argument.
254 * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
255 */
256 @Override
257 public byte[] encode(final byte[] raw) {
258 return toAsciiBytes(raw);
259 }
260
261 /**
262 * Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
263 *
264 * @param raw the raw binary data to convert.
265 * @return 0 and 1 ASCII character chars one for each bit of the argument.
266 * @throws EncoderException if the argument is not a byte[].
267 * @see org.apache.commons.codec.Encoder#encode(Object)
268 */
269 @Override
270 public Object encode(final Object raw) throws EncoderException {
271 if (!(raw instanceof byte[])) {
272 throw new EncoderException("argument not a byte array");
273 }
274 return toAsciiChars((byte[]) raw);
275 }
276
277 /**
278 * Decodes a String where each char of the String represents an ASCII '0' or '1'.
279 *
280 * @param ascii String of '0' and '1' characters.
281 * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument.
282 * @see org.apache.commons.codec.Decoder#decode(Object)
283 */
284 public byte[] toByteArray(final String ascii) {
285 if (ascii == null) {
286 return EMPTY_BYTE_ARRAY;
287 }
288 return fromAscii(ascii.toCharArray());
289 }
290 }