001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.binary;
019    
020    import org.apache.commons.codec.BinaryDecoder;
021    import org.apache.commons.codec.BinaryEncoder;
022    import org.apache.commons.codec.DecoderException;
023    import org.apache.commons.codec.EncoderException;
024    
025    /**
026     * Converts between byte arrays and strings of "0"s and "1"s.
027     *
028     * <p>This class is immutable and thread-safe.</p>
029     *
030     * TODO: may want to add more bit vector functions like and/or/xor/nand
031     * TODO: also might be good to generate boolean[] from byte[] et cetera.
032     *
033     * @since 1.3
034     * @version $Id: BinaryCodec.html 889935 2013-12-11 05:05:13Z ggregory $
035     */
036    public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
037        /*
038         * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth
039         * it.
040         */
041        /** Empty char array. */
042        private static final char[] EMPTY_CHAR_ARRAY = new char[0];
043    
044        /** Empty byte array. */
045        private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
046    
047        /** Mask for bit 0 of a byte. */
048        private static final int BIT_0 = 1;
049    
050        /** Mask for bit 1 of a byte. */
051        private static final int BIT_1 = 0x02;
052    
053        /** Mask for bit 2 of a byte. */
054        private static final int BIT_2 = 0x04;
055    
056        /** Mask for bit 3 of a byte. */
057        private static final int BIT_3 = 0x08;
058    
059        /** Mask for bit 4 of a byte. */
060        private static final int BIT_4 = 0x10;
061    
062        /** Mask for bit 5 of a byte. */
063        private static final int BIT_5 = 0x20;
064    
065        /** Mask for bit 6 of a byte. */
066        private static final int BIT_6 = 0x40;
067    
068        /** Mask for bit 7 of a byte. */
069        private static final int BIT_7 = 0x80;
070    
071        private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7};
072    
073        /**
074         * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
075         *
076         * @param raw
077         *                  the raw binary data to convert
078         * @return 0 and 1 ASCII character bytes one for each bit of the argument
079         * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
080         */
081        @Override
082        public byte[] encode(byte[] raw) {
083            return toAsciiBytes(raw);
084        }
085    
086        /**
087         * Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
088         *
089         * @param raw
090         *                  the raw binary data to convert
091         * @return 0 and 1 ASCII character chars one for each bit of the argument
092         * @throws EncoderException
093         *                  if the argument is not a byte[]
094         * @see org.apache.commons.codec.Encoder#encode(Object)
095         */
096        @Override
097        public Object encode(Object raw) throws EncoderException {
098            if (!(raw instanceof byte[])) {
099                throw new EncoderException("argument not a byte array");
100            }
101            return toAsciiChars((byte[]) raw);
102        }
103    
104        /**
105         * Decodes a byte array where each byte represents an ASCII '0' or '1'.
106         *
107         * @param ascii
108         *                  each byte represents an ASCII '0' or '1'
109         * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
110         * @throws DecoderException
111         *                  if argument is not a byte[], char[] or String
112         * @see org.apache.commons.codec.Decoder#decode(Object)
113         */
114        @Override
115        public Object decode(Object ascii) throws DecoderException {
116            if (ascii == null) {
117                return EMPTY_BYTE_ARRAY;
118            }
119            if (ascii instanceof byte[]) {
120                return fromAscii((byte[]) ascii);
121            }
122            if (ascii instanceof char[]) {
123                return fromAscii((char[]) ascii);
124            }
125            if (ascii instanceof String) {
126                return fromAscii(((String) ascii).toCharArray());
127            }
128            throw new DecoderException("argument not a byte array");
129        }
130    
131        /**
132         * Decodes a byte array where each byte represents an ASCII '0' or '1'.
133         *
134         * @param ascii
135         *                  each byte represents an ASCII '0' or '1'
136         * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
137         * @see org.apache.commons.codec.Decoder#decode(Object)
138         */
139        @Override
140        public byte[] decode(byte[] ascii) {
141            return fromAscii(ascii);
142        }
143    
144        /**
145         * Decodes a String where each char of the String represents an ASCII '0' or '1'.
146         *
147         * @param ascii
148         *                  String of '0' and '1' characters
149         * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
150         * @see org.apache.commons.codec.Decoder#decode(Object)
151         */
152        public byte[] toByteArray(String ascii) {
153            if (ascii == null) {
154                return EMPTY_BYTE_ARRAY;
155            }
156            return fromAscii(ascii.toCharArray());
157        }
158    
159        // ------------------------------------------------------------------------
160        //
161        // static codec operations
162        //
163        // ------------------------------------------------------------------------
164        /**
165         * Decodes a char array where each char represents an ASCII '0' or '1'.
166         *
167         * @param ascii
168         *                  each char represents an ASCII '0' or '1'
169         * @return the raw encoded binary where each bit corresponds to a char in the char array argument
170         */
171        public static byte[] fromAscii(char[] ascii) {
172            if (ascii == null || ascii.length == 0) {
173                return EMPTY_BYTE_ARRAY;
174            }
175            // get length/8 times bytes with 3 bit shifts to the right of the length
176            byte[] l_raw = new byte[ascii.length >> 3];
177            /*
178             * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
179             * loop.
180             */
181            for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
182                for (int bits = 0; bits < BITS.length; ++bits) {
183                    if (ascii[jj - bits] == '1') {
184                        l_raw[ii] |= BITS[bits];
185                    }
186                }
187            }
188            return l_raw;
189        }
190    
191        /**
192         * Decodes a byte array where each byte represents an ASCII '0' or '1'.
193         *
194         * @param ascii
195         *                  each byte represents an ASCII '0' or '1'
196         * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
197         */
198        public static byte[] fromAscii(byte[] ascii) {
199            if (isEmpty(ascii)) {
200                return EMPTY_BYTE_ARRAY;
201            }
202            // get length/8 times bytes with 3 bit shifts to the right of the length
203            byte[] l_raw = new byte[ascii.length >> 3];
204            /*
205             * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
206             * loop.
207             */
208            for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
209                for (int bits = 0; bits < BITS.length; ++bits) {
210                    if (ascii[jj - bits] == '1') {
211                        l_raw[ii] |= BITS[bits];
212                    }
213                }
214            }
215            return l_raw;
216        }
217    
218        /**
219         * Returns {@code true} if the given array is {@code null} or empty (size 0.)
220         *
221         * @param array
222         *            the source array
223         * @return {@code true} if the given array is {@code null} or empty (size 0.)
224         */
225        private static boolean isEmpty(byte[] array) {
226            return array == null || array.length == 0;
227        }
228    
229        /**
230         * Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated
231         * char.
232         *
233         * @param raw
234         *                  the raw binary data to convert
235         * @return an array of 0 and 1 character bytes for each bit of the argument
236         * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
237         */
238        public static byte[] toAsciiBytes(byte[] raw) {
239            if (isEmpty(raw)) {
240                return EMPTY_BYTE_ARRAY;
241            }
242            // get 8 times the bytes with 3 bit shifts to the left of the length
243            byte[] l_ascii = new byte[raw.length << 3];
244            /*
245             * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
246             * loop.
247             */
248            for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
249                for (int bits = 0; bits < BITS.length; ++bits) {
250                    if ((raw[ii] & BITS[bits]) == 0) {
251                        l_ascii[jj - bits] = '0';
252                    } else {
253                        l_ascii[jj - bits] = '1';
254                    }
255                }
256            }
257            return l_ascii;
258        }
259    
260        /**
261         * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
262         *
263         * @param raw
264         *                  the raw binary data to convert
265         * @return an array of 0 and 1 characters for each bit of the argument
266         * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
267         */
268        public static char[] toAsciiChars(byte[] raw) {
269            if (isEmpty(raw)) {
270                return EMPTY_CHAR_ARRAY;
271            }
272            // get 8 times the bytes with 3 bit shifts to the left of the length
273            char[] l_ascii = new char[raw.length << 3];
274            /*
275             * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
276             * loop.
277             */
278            for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
279                for (int bits = 0; bits < BITS.length; ++bits) {
280                    if ((raw[ii] & BITS[bits]) == 0) {
281                        l_ascii[jj - bits] = '0';
282                    } else {
283                        l_ascii[jj - bits] = '1';
284                    }
285                }
286            }
287            return l_ascii;
288        }
289    
290        /**
291         * Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
292         *
293         * @param raw
294         *                  the raw binary data to convert
295         * @return a String of 0 and 1 characters representing the binary data
296         * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
297         */
298        public static String toAsciiString(byte[] raw) {
299            return new String(toAsciiChars(raw));
300        }
301    }