View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import org.apache.commons.codec.BinaryDecoder;
21  import org.apache.commons.codec.BinaryEncoder;
22  import org.apache.commons.codec.DecoderException;
23  import org.apache.commons.codec.EncoderException;
24  
25  /**
26   * Converts between byte arrays and strings of "0"s and "1"s.
27   *
28   * <p>This class is immutable and thread-safe.</p>
29   *
30   * TODO: may want to add more bit vector functions like and/or/xor/nand
31   * TODO: also might be good to generate boolean[] from byte[] et cetera.
32   *
33   * @since 1.3
34   * @version $Id: BinaryCodec.html 889935 2013-12-11 05:05:13Z ggregory $
35   */
36  public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
37      /*
38       * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth
39       * it.
40       */
41      /** Empty char array. */
42      private static final char[] EMPTY_CHAR_ARRAY = new char[0];
43  
44      /** Empty byte array. */
45      private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
46  
47      /** Mask for bit 0 of a byte. */
48      private static final int BIT_0 = 1;
49  
50      /** Mask for bit 1 of a byte. */
51      private static final int BIT_1 = 0x02;
52  
53      /** Mask for bit 2 of a byte. */
54      private static final int BIT_2 = 0x04;
55  
56      /** Mask for bit 3 of a byte. */
57      private static final int BIT_3 = 0x08;
58  
59      /** Mask for bit 4 of a byte. */
60      private static final int BIT_4 = 0x10;
61  
62      /** Mask for bit 5 of a byte. */
63      private static final int BIT_5 = 0x20;
64  
65      /** Mask for bit 6 of a byte. */
66      private static final int BIT_6 = 0x40;
67  
68      /** Mask for bit 7 of a byte. */
69      private static final int BIT_7 = 0x80;
70  
71      private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7};
72  
73      /**
74       * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
75       *
76       * @param raw
77       *                  the raw binary data to convert
78       * @return 0 and 1 ASCII character bytes one for each bit of the argument
79       * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
80       */
81      @Override
82      public byte[] encode(byte[] raw) {
83          return toAsciiBytes(raw);
84      }
85  
86      /**
87       * Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
88       *
89       * @param raw
90       *                  the raw binary data to convert
91       * @return 0 and 1 ASCII character chars one for each bit of the argument
92       * @throws EncoderException
93       *                  if the argument is not a byte[]
94       * @see org.apache.commons.codec.Encoder#encode(Object)
95       */
96      @Override
97      public Object encode(Object raw) throws EncoderException {
98          if (!(raw instanceof byte[])) {
99              throw new EncoderException("argument not a byte array");
100         }
101         return toAsciiChars((byte[]) raw);
102     }
103 
104     /**
105      * Decodes a byte array where each byte represents an ASCII '0' or '1'.
106      *
107      * @param ascii
108      *                  each byte represents an ASCII '0' or '1'
109      * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
110      * @throws DecoderException
111      *                  if argument is not a byte[], char[] or String
112      * @see org.apache.commons.codec.Decoder#decode(Object)
113      */
114     @Override
115     public Object decode(Object ascii) throws DecoderException {
116         if (ascii == null) {
117             return EMPTY_BYTE_ARRAY;
118         }
119         if (ascii instanceof byte[]) {
120             return fromAscii((byte[]) ascii);
121         }
122         if (ascii instanceof char[]) {
123             return fromAscii((char[]) ascii);
124         }
125         if (ascii instanceof String) {
126             return fromAscii(((String) ascii).toCharArray());
127         }
128         throw new DecoderException("argument not a byte array");
129     }
130 
131     /**
132      * Decodes a byte array where each byte represents an ASCII '0' or '1'.
133      *
134      * @param ascii
135      *                  each byte represents an ASCII '0' or '1'
136      * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
137      * @see org.apache.commons.codec.Decoder#decode(Object)
138      */
139     @Override
140     public byte[] decode(byte[] ascii) {
141         return fromAscii(ascii);
142     }
143 
144     /**
145      * Decodes a String where each char of the String represents an ASCII '0' or '1'.
146      *
147      * @param ascii
148      *                  String of '0' and '1' characters
149      * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
150      * @see org.apache.commons.codec.Decoder#decode(Object)
151      */
152     public byte[] toByteArray(String ascii) {
153         if (ascii == null) {
154             return EMPTY_BYTE_ARRAY;
155         }
156         return fromAscii(ascii.toCharArray());
157     }
158 
159     // ------------------------------------------------------------------------
160     //
161     // static codec operations
162     //
163     // ------------------------------------------------------------------------
164     /**
165      * Decodes a char array where each char represents an ASCII '0' or '1'.
166      *
167      * @param ascii
168      *                  each char represents an ASCII '0' or '1'
169      * @return the raw encoded binary where each bit corresponds to a char in the char array argument
170      */
171     public static byte[] fromAscii(char[] ascii) {
172         if (ascii == null || ascii.length == 0) {
173             return EMPTY_BYTE_ARRAY;
174         }
175         // get length/8 times bytes with 3 bit shifts to the right of the length
176         byte[] l_raw = new byte[ascii.length >> 3];
177         /*
178          * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
179          * loop.
180          */
181         for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
182             for (int bits = 0; bits < BITS.length; ++bits) {
183                 if (ascii[jj - bits] == '1') {
184                     l_raw[ii] |= BITS[bits];
185                 }
186             }
187         }
188         return l_raw;
189     }
190 
191     /**
192      * Decodes a byte array where each byte represents an ASCII '0' or '1'.
193      *
194      * @param ascii
195      *                  each byte represents an ASCII '0' or '1'
196      * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
197      */
198     public static byte[] fromAscii(byte[] ascii) {
199         if (isEmpty(ascii)) {
200             return EMPTY_BYTE_ARRAY;
201         }
202         // get length/8 times bytes with 3 bit shifts to the right of the length
203         byte[] l_raw = new byte[ascii.length >> 3];
204         /*
205          * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
206          * loop.
207          */
208         for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
209             for (int bits = 0; bits < BITS.length; ++bits) {
210                 if (ascii[jj - bits] == '1') {
211                     l_raw[ii] |= BITS[bits];
212                 }
213             }
214         }
215         return l_raw;
216     }
217 
218     /**
219      * Returns {@code true} if the given array is {@code null} or empty (size 0.)
220      *
221      * @param array
222      *            the source array
223      * @return {@code true} if the given array is {@code null} or empty (size 0.)
224      */
225     private static boolean isEmpty(byte[] array) {
226         return array == null || array.length == 0;
227     }
228 
229     /**
230      * Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated
231      * char.
232      *
233      * @param raw
234      *                  the raw binary data to convert
235      * @return an array of 0 and 1 character bytes for each bit of the argument
236      * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
237      */
238     public static byte[] toAsciiBytes(byte[] raw) {
239         if (isEmpty(raw)) {
240             return EMPTY_BYTE_ARRAY;
241         }
242         // get 8 times the bytes with 3 bit shifts to the left of the length
243         byte[] l_ascii = new byte[raw.length << 3];
244         /*
245          * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
246          * loop.
247          */
248         for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
249             for (int bits = 0; bits < BITS.length; ++bits) {
250                 if ((raw[ii] & BITS[bits]) == 0) {
251                     l_ascii[jj - bits] = '0';
252                 } else {
253                     l_ascii[jj - bits] = '1';
254                 }
255             }
256         }
257         return l_ascii;
258     }
259 
260     /**
261      * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
262      *
263      * @param raw
264      *                  the raw binary data to convert
265      * @return an array of 0 and 1 characters for each bit of the argument
266      * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
267      */
268     public static char[] toAsciiChars(byte[] raw) {
269         if (isEmpty(raw)) {
270             return EMPTY_CHAR_ARRAY;
271         }
272         // get 8 times the bytes with 3 bit shifts to the left of the length
273         char[] l_ascii = new char[raw.length << 3];
274         /*
275          * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
276          * loop.
277          */
278         for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
279             for (int bits = 0; bits < BITS.length; ++bits) {
280                 if ((raw[ii] & BITS[bits]) == 0) {
281                     l_ascii[jj - bits] = '0';
282                 } else {
283                     l_ascii[jj - bits] = '1';
284                 }
285             }
286         }
287         return l_ascii;
288     }
289 
290     /**
291      * Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
292      *
293      * @param raw
294      *                  the raw binary data to convert
295      * @return a String of 0 and 1 characters representing the binary data
296      * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
297      */
298     public static String toAsciiString(byte[] raw) {
299         return new String(toAsciiChars(raw));
300     }
301 }