View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import org.apache.commons.codec.BinaryDecoder;
21  import org.apache.commons.codec.BinaryEncoder;
22  import org.apache.commons.codec.DecoderException;
23  import org.apache.commons.codec.EncoderException;
24  
25  /**
26   * Converts between byte arrays and strings of "0"s and "1"s.
27   *
28   * <p>
29   * This class is immutable and thread-safe.
30   * </p>
31   *
32   * TODO: may want to add more bit vector functions like and/or/xor/nand TODO: also might be good to generate boolean[] from byte[] et cetera.
33   *
34   * @since 1.3
35   */
36  public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
37  
38      /*
39       * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth it.
40       */
41  
42      /** Empty char array. */
43      private static final char[] EMPTY_CHAR_ARRAY = {};
44  
45      /** Empty byte array. */
46      private static final byte[] EMPTY_BYTE_ARRAY = {};
47  
48      /** Mask for bit 0 of a byte. */
49      private static final int BIT_0 = 1;
50  
51      /** Mask for bit 1 of a byte. */
52      private static final int BIT_1 = 0x02;
53  
54      /** Mask for bit 2 of a byte. */
55      private static final int BIT_2 = 0x04;
56  
57      /** Mask for bit 3 of a byte. */
58      private static final int BIT_3 = 0x08;
59  
60      /** Mask for bit 4 of a byte. */
61      private static final int BIT_4 = 0x10;
62  
63      /** Mask for bit 5 of a byte. */
64      private static final int BIT_5 = 0x20;
65  
66      /** Mask for bit 6 of a byte. */
67      private static final int BIT_6 = 0x40;
68  
69      /** Mask for bit 7 of a byte. */
70      private static final int BIT_7 = 0x80;
71  
72      private static final int[] BITS = { BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7 };
73  
74      /**
75       * Decodes a byte array where each byte represents an ASCII '0' or '1'.
76       *
77       * @param ascii each byte represents an ASCII '0' or '1'.
78       * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument.
79       */
80      public static byte[] fromAscii(final byte[] ascii) {
81          if (isEmpty(ascii)) {
82              return EMPTY_BYTE_ARRAY;
83          }
84          final int asciiLength = ascii.length;
85          // get length/8 times bytes with 3 bit shifts to the right of the length
86          final byte[] raw = new byte[asciiLength >> 3];
87          /*
88           * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the loop.
89           */
90          for (int ii = 0, jj = asciiLength - 1; ii < raw.length; ii++, jj -= 8) {
91              for (int bits = 0; bits < BITS.length; ++bits) {
92                  if (ascii[jj - bits] == '1') {
93                      raw[ii] |= BITS[bits];
94                  }
95              }
96          }
97          return raw;
98      }
99  
100     /**
101      * Decodes a char array where each char represents an ASCII '0' or '1'.
102      *
103      * @param ascii each char represents an ASCII '0' or '1'.
104      * @return the raw encoded binary where each bit corresponds to a char in the char array argument.
105      */
106     public static byte[] fromAscii(final char[] ascii) {
107         if (ascii == null || ascii.length == 0) {
108             return EMPTY_BYTE_ARRAY;
109         }
110         final int asciiLength = ascii.length;
111         // get length/8 times bytes with 3 bit shifts to the right of the length
112         final byte[] raw = new byte[asciiLength >> 3];
113         /*
114          * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the loop.
115          */
116         for (int ii = 0, jj = asciiLength - 1; ii < raw.length; ii++, jj -= 8) {
117             for (int bits = 0; bits < BITS.length; ++bits) {
118                 if (ascii[jj - bits] == '1') {
119                     raw[ii] |= BITS[bits];
120                 }
121             }
122         }
123         return raw;
124     }
125 
126     /**
127      * Returns {@code true} if the given array is {@code null} or empty (size 0.)
128      *
129      * @param array the source array.
130      * @return {@code true} if the given array is {@code null} or empty (size 0.)
131      */
132     static boolean isEmpty(final byte[] array) {
133         return array == null || array.length == 0;
134     }
135 
136     /**
137      * Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated char.
138      *
139      * @param raw the raw binary data to convert.
140      * @return an array of 0 and 1 character bytes for each bit of the argument.
141      * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
142      */
143     public static byte[] toAsciiBytes(final byte[] raw) {
144         if (isEmpty(raw)) {
145             return EMPTY_BYTE_ARRAY;
146         }
147         final int rawLength = raw.length;
148         // get 8 times the bytes with 3 bit shifts to the left of the length
149         final byte[] ascii = new byte[rawLength << 3];
150         /*
151          * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the loop.
152          */
153         for (int ii = 0, jj = ascii.length - 1; ii < rawLength; ii++, jj -= 8) {
154             for (int bits = 0; bits < BITS.length; ++bits) {
155                 if ((raw[ii] & BITS[bits]) == 0) {
156                     ascii[jj - bits] = '0';
157                 } else {
158                     ascii[jj - bits] = '1';
159                 }
160             }
161         }
162         return ascii;
163     }
164 
165     /**
166      * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
167      *
168      * @param raw the raw binary data to convert.
169      * @return an array of 0 and 1 characters for each bit of the argument.
170      * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
171      */
172     public static char[] toAsciiChars(final byte[] raw) {
173         if (isEmpty(raw)) {
174             return EMPTY_CHAR_ARRAY;
175         }
176         final int rawLength = raw.length;
177         // get 8 times the bytes with 3 bit shifts to the left of the length
178         final char[] ascii = new char[rawLength << 3];
179         /*
180          * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the loop.
181          */
182         for (int ii = 0, jj = ascii.length - 1; ii < rawLength; ii++, jj -= 8) {
183             for (int bits = 0; bits < BITS.length; ++bits) {
184                 if ((raw[ii] & BITS[bits]) == 0) {
185                     ascii[jj - bits] = '0';
186                 } else {
187                     ascii[jj - bits] = '1';
188                 }
189             }
190         }
191         return ascii;
192     }
193 
194     /**
195      * Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
196      *
197      * @param raw the raw binary data to convert.
198      * @return a String of 0 and 1 characters representing the binary data.
199      * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
200      */
201     public static String toAsciiString(final byte[] raw) {
202         return new String(toAsciiChars(raw));
203     }
204 
205     /**
206      * Constructs a new instance.
207      */
208     public BinaryCodec() {
209         // empty
210     }
211 
212     /**
213      * Decodes a byte array where each byte represents an ASCII '0' or '1'.
214      *
215      * @param ascii each byte represents an ASCII '0' or '1'.
216      * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument.
217      * @see org.apache.commons.codec.Decoder#decode(Object)
218      */
219     @Override
220     public byte[] decode(final byte[] ascii) {
221         return fromAscii(ascii);
222     }
223 
224     /**
225      * Decodes a byte array where each byte represents an ASCII '0' or '1'.
226      *
227      * @param ascii each byte represents an ASCII '0' or '1'.
228      * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
229      * @throws DecoderException if argument is not a byte[], char[] or String.
230      * @see org.apache.commons.codec.Decoder#decode(Object)
231      */
232     @Override
233     public Object decode(final Object ascii) throws DecoderException {
234         if (ascii == null) {
235             return EMPTY_BYTE_ARRAY;
236         }
237         if (ascii instanceof byte[]) {
238             return fromAscii((byte[]) ascii);
239         }
240         if (ascii instanceof char[]) {
241             return fromAscii((char[]) ascii);
242         }
243         if (ascii instanceof String) {
244             return fromAscii(((String) ascii).toCharArray());
245         }
246         throw new DecoderException("argument not a byte array");
247     }
248 
249     /**
250      * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
251      *
252      * @param raw the raw binary data to convert.
253      * @return 0 and 1 ASCII character bytes one for each bit of the argument.
254      * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
255      */
256     @Override
257     public byte[] encode(final byte[] raw) {
258         return toAsciiBytes(raw);
259     }
260 
261     /**
262      * Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
263      *
264      * @param raw the raw binary data to convert.
265      * @return 0 and 1 ASCII character chars one for each bit of the argument.
266      * @throws EncoderException if the argument is not a byte[].
267      * @see org.apache.commons.codec.Encoder#encode(Object)
268      */
269     @Override
270     public Object encode(final Object raw) throws EncoderException {
271         if (!(raw instanceof byte[])) {
272             throw new EncoderException("argument not a byte array");
273         }
274         return toAsciiChars((byte[]) raw);
275     }
276 
277     /**
278      * Decodes a String where each char of the String represents an ASCII '0' or '1'.
279      *
280      * @param ascii String of '0' and '1' characters.
281      * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument.
282      * @see org.apache.commons.codec.Decoder#decode(Object)
283      */
284     public byte[] toByteArray(final String ascii) {
285         if (ascii == null) {
286             return EMPTY_BYTE_ARRAY;
287         }
288         return fromAscii(ascii.toCharArray());
289     }
290 }