001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import org.apache.commons.codec.BinaryDecoder;
021import org.apache.commons.codec.BinaryEncoder;
022import org.apache.commons.codec.DecoderException;
023import org.apache.commons.codec.EncoderException;
024
025/**
026 * Converts between byte arrays and strings of "0"s and "1"s.
027 *
028 * <p>This class is immutable and thread-safe.</p>
029 *
030 * TODO: may want to add more bit vector functions like and/or/xor/nand
031 * TODO: also might be good to generate boolean[] from byte[] et cetera.
032 *
033 * @since 1.3
034 * @version $Id: BinaryCodec.java 1619948 2014-08-22 22:53:55Z ggregory $
035 */
036public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
037    /*
038     * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth
039     * it.
040     */
041    /** Empty char array. */
042    private static final char[] EMPTY_CHAR_ARRAY = new char[0];
043
044    /** Empty byte array. */
045    private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
046
047    /** Mask for bit 0 of a byte. */
048    private static final int BIT_0 = 1;
049
050    /** Mask for bit 1 of a byte. */
051    private static final int BIT_1 = 0x02;
052
053    /** Mask for bit 2 of a byte. */
054    private static final int BIT_2 = 0x04;
055
056    /** Mask for bit 3 of a byte. */
057    private static final int BIT_3 = 0x08;
058
059    /** Mask for bit 4 of a byte. */
060    private static final int BIT_4 = 0x10;
061
062    /** Mask for bit 5 of a byte. */
063    private static final int BIT_5 = 0x20;
064
065    /** Mask for bit 6 of a byte. */
066    private static final int BIT_6 = 0x40;
067
068    /** Mask for bit 7 of a byte. */
069    private static final int BIT_7 = 0x80;
070
071    private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7};
072
073    /**
074     * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
075     *
076     * @param raw
077     *                  the raw binary data to convert
078     * @return 0 and 1 ASCII character bytes one for each bit of the argument
079     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
080     */
081    @Override
082    public byte[] encode(final byte[] raw) {
083        return toAsciiBytes(raw);
084    }
085
086    /**
087     * Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
088     *
089     * @param raw
090     *                  the raw binary data to convert
091     * @return 0 and 1 ASCII character chars one for each bit of the argument
092     * @throws EncoderException
093     *                  if the argument is not a byte[]
094     * @see org.apache.commons.codec.Encoder#encode(Object)
095     */
096    @Override
097    public Object encode(final Object raw) throws EncoderException {
098        if (!(raw instanceof byte[])) {
099            throw new EncoderException("argument not a byte array");
100        }
101        return toAsciiChars((byte[]) raw);
102    }
103
104    /**
105     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
106     *
107     * @param ascii
108     *                  each byte represents an ASCII '0' or '1'
109     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
110     * @throws DecoderException
111     *                  if argument is not a byte[], char[] or String
112     * @see org.apache.commons.codec.Decoder#decode(Object)
113     */
114    @Override
115    public Object decode(final Object ascii) throws DecoderException {
116        if (ascii == null) {
117            return EMPTY_BYTE_ARRAY;
118        }
119        if (ascii instanceof byte[]) {
120            return fromAscii((byte[]) ascii);
121        }
122        if (ascii instanceof char[]) {
123            return fromAscii((char[]) ascii);
124        }
125        if (ascii instanceof String) {
126            return fromAscii(((String) ascii).toCharArray());
127        }
128        throw new DecoderException("argument not a byte array");
129    }
130
131    /**
132     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
133     *
134     * @param ascii
135     *                  each byte represents an ASCII '0' or '1'
136     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
137     * @see org.apache.commons.codec.Decoder#decode(Object)
138     */
139    @Override
140    public byte[] decode(final byte[] ascii) {
141        return fromAscii(ascii);
142    }
143
144    /**
145     * Decodes a String where each char of the String represents an ASCII '0' or '1'.
146     *
147     * @param ascii
148     *                  String of '0' and '1' characters
149     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
150     * @see org.apache.commons.codec.Decoder#decode(Object)
151     */
152    public byte[] toByteArray(final String ascii) {
153        if (ascii == null) {
154            return EMPTY_BYTE_ARRAY;
155        }
156        return fromAscii(ascii.toCharArray());
157    }
158
159    // ------------------------------------------------------------------------
160    //
161    // static codec operations
162    //
163    // ------------------------------------------------------------------------
164    /**
165     * Decodes a char array where each char represents an ASCII '0' or '1'.
166     *
167     * @param ascii
168     *                  each char represents an ASCII '0' or '1'
169     * @return the raw encoded binary where each bit corresponds to a char in the char array argument
170     */
171    public static byte[] fromAscii(final char[] ascii) {
172        if (ascii == null || ascii.length == 0) {
173            return EMPTY_BYTE_ARRAY;
174        }
175        // get length/8 times bytes with 3 bit shifts to the right of the length
176        final byte[] l_raw = new byte[ascii.length >> 3];
177        /*
178         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
179         * loop.
180         */
181        for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
182            for (int bits = 0; bits < BITS.length; ++bits) {
183                if (ascii[jj - bits] == '1') {
184                    l_raw[ii] |= BITS[bits];
185                }
186            }
187        }
188        return l_raw;
189    }
190
191    /**
192     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
193     *
194     * @param ascii
195     *                  each byte represents an ASCII '0' or '1'
196     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
197     */
198    public static byte[] fromAscii(final byte[] ascii) {
199        if (isEmpty(ascii)) {
200            return EMPTY_BYTE_ARRAY;
201        }
202        // get length/8 times bytes with 3 bit shifts to the right of the length
203        final byte[] l_raw = new byte[ascii.length >> 3];
204        /*
205         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
206         * loop.
207         */
208        for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
209            for (int bits = 0; bits < BITS.length; ++bits) {
210                if (ascii[jj - bits] == '1') {
211                    l_raw[ii] |= BITS[bits];
212                }
213            }
214        }
215        return l_raw;
216    }
217
218    /**
219     * Returns <code>true</code> if the given array is <code>null</code> or empty (size 0.)
220     *
221     * @param array
222     *            the source array
223     * @return <code>true</code> if the given array is <code>null</code> or empty (size 0.)
224     */
225    private static boolean isEmpty(final byte[] array) {
226        return array == null || array.length == 0;
227    }
228
229    /**
230     * Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated
231     * char.
232     *
233     * @param raw
234     *                  the raw binary data to convert
235     * @return an array of 0 and 1 character bytes for each bit of the argument
236     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
237     */
238    public static byte[] toAsciiBytes(final byte[] raw) {
239        if (isEmpty(raw)) {
240            return EMPTY_BYTE_ARRAY;
241        }
242        // get 8 times the bytes with 3 bit shifts to the left of the length
243        final byte[] l_ascii = new byte[raw.length << 3];
244        /*
245         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
246         * loop.
247         */
248        for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
249            for (int bits = 0; bits < BITS.length; ++bits) {
250                if ((raw[ii] & BITS[bits]) == 0) {
251                    l_ascii[jj - bits] = '0';
252                } else {
253                    l_ascii[jj - bits] = '1';
254                }
255            }
256        }
257        return l_ascii;
258    }
259
260    /**
261     * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
262     *
263     * @param raw
264     *                  the raw binary data to convert
265     * @return an array of 0 and 1 characters for each bit of the argument
266     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
267     */
268    public static char[] toAsciiChars(final byte[] raw) {
269        if (isEmpty(raw)) {
270            return EMPTY_CHAR_ARRAY;
271        }
272        // get 8 times the bytes with 3 bit shifts to the left of the length
273        final char[] l_ascii = new char[raw.length << 3];
274        /*
275         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
276         * loop.
277         */
278        for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
279            for (int bits = 0; bits < BITS.length; ++bits) {
280                if ((raw[ii] & BITS[bits]) == 0) {
281                    l_ascii[jj - bits] = '0';
282                } else {
283                    l_ascii[jj - bits] = '1';
284                }
285            }
286        }
287        return l_ascii;
288    }
289
290    /**
291     * Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
292     *
293     * @param raw
294     *                  the raw binary data to convert
295     * @return a String of 0 and 1 characters representing the binary data
296     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
297     */
298    public static String toAsciiString(final byte[] raw) {
299        return new String(toAsciiChars(raw));
300    }
301}