001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import org.apache.commons.codec.BinaryDecoder;
021import org.apache.commons.codec.BinaryEncoder;
022import org.apache.commons.codec.DecoderException;
023import org.apache.commons.codec.EncoderException;
024
025/**
026 * Converts between byte arrays and strings of "0"s and "1"s.
027 *
028 * <p>This class is immutable and thread-safe.</p>
029 *
030 * TODO: may want to add more bit vector functions like and/or/xor/nand
031 * TODO: also might be good to generate boolean[] from byte[] et cetera.
032 *
033 * @since 1.3
034 */
035public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
036    /*
037     * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth
038     * it.
039     */
040    /** Empty char array. */
041    private static final char[] EMPTY_CHAR_ARRAY = new char[0];
042
043    /** Empty byte array. */
044    private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
045
046    /** Mask for bit 0 of a byte. */
047    private static final int BIT_0 = 1;
048
049    /** Mask for bit 1 of a byte. */
050    private static final int BIT_1 = 0x02;
051
052    /** Mask for bit 2 of a byte. */
053    private static final int BIT_2 = 0x04;
054
055    /** Mask for bit 3 of a byte. */
056    private static final int BIT_3 = 0x08;
057
058    /** Mask for bit 4 of a byte. */
059    private static final int BIT_4 = 0x10;
060
061    /** Mask for bit 5 of a byte. */
062    private static final int BIT_5 = 0x20;
063
064    /** Mask for bit 6 of a byte. */
065    private static final int BIT_6 = 0x40;
066
067    /** Mask for bit 7 of a byte. */
068    private static final int BIT_7 = 0x80;
069
070    private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7};
071
072    /**
073     * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
074     *
075     * @param raw
076     *                  the raw binary data to convert
077     * @return 0 and 1 ASCII character bytes one for each bit of the argument
078     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
079     */
080    @Override
081    public byte[] encode(final byte[] raw) {
082        return toAsciiBytes(raw);
083    }
084
085    /**
086     * Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
087     *
088     * @param raw
089     *                  the raw binary data to convert
090     * @return 0 and 1 ASCII character chars one for each bit of the argument
091     * @throws EncoderException
092     *                  if the argument is not a byte[]
093     * @see org.apache.commons.codec.Encoder#encode(Object)
094     */
095    @Override
096    public Object encode(final Object raw) throws EncoderException {
097        if (!(raw instanceof byte[])) {
098            throw new EncoderException("argument not a byte array");
099        }
100        return toAsciiChars((byte[]) raw);
101    }
102
103    /**
104     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
105     *
106     * @param ascii
107     *                  each byte represents an ASCII '0' or '1'
108     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
109     * @throws DecoderException
110     *                  if argument is not a byte[], char[] or String
111     * @see org.apache.commons.codec.Decoder#decode(Object)
112     */
113    @Override
114    public Object decode(final Object ascii) throws DecoderException {
115        if (ascii == null) {
116            return EMPTY_BYTE_ARRAY;
117        }
118        if (ascii instanceof byte[]) {
119            return fromAscii((byte[]) ascii);
120        }
121        if (ascii instanceof char[]) {
122            return fromAscii((char[]) ascii);
123        }
124        if (ascii instanceof String) {
125            return fromAscii(((String) ascii).toCharArray());
126        }
127        throw new DecoderException("argument not a byte array");
128    }
129
130    /**
131     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
132     *
133     * @param ascii
134     *                  each byte represents an ASCII '0' or '1'
135     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
136     * @see org.apache.commons.codec.Decoder#decode(Object)
137     */
138    @Override
139    public byte[] decode(final byte[] ascii) {
140        return fromAscii(ascii);
141    }
142
143    /**
144     * Decodes a String where each char of the String represents an ASCII '0' or '1'.
145     *
146     * @param ascii
147     *                  String of '0' and '1' characters
148     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
149     * @see org.apache.commons.codec.Decoder#decode(Object)
150     */
151    public byte[] toByteArray(final String ascii) {
152        if (ascii == null) {
153            return EMPTY_BYTE_ARRAY;
154        }
155        return fromAscii(ascii.toCharArray());
156    }
157
158    // ------------------------------------------------------------------------
159    //
160    // static codec operations
161    //
162    // ------------------------------------------------------------------------
163    /**
164     * Decodes a char array where each char represents an ASCII '0' or '1'.
165     *
166     * @param ascii
167     *                  each char represents an ASCII '0' or '1'
168     * @return the raw encoded binary where each bit corresponds to a char in the char array argument
169     */
170    public static byte[] fromAscii(final char[] ascii) {
171        if (ascii == null || ascii.length == 0) {
172            return EMPTY_BYTE_ARRAY;
173        }
174        // get length/8 times bytes with 3 bit shifts to the right of the length
175        final byte[] l_raw = new byte[ascii.length >> 3];
176        /*
177         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
178         * loop.
179         */
180        for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
181            for (int bits = 0; bits < BITS.length; ++bits) {
182                if (ascii[jj - bits] == '1') {
183                    l_raw[ii] |= BITS[bits];
184                }
185            }
186        }
187        return l_raw;
188    }
189
190    /**
191     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
192     *
193     * @param ascii
194     *                  each byte represents an ASCII '0' or '1'
195     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
196     */
197    public static byte[] fromAscii(final byte[] ascii) {
198        if (isEmpty(ascii)) {
199            return EMPTY_BYTE_ARRAY;
200        }
201        // get length/8 times bytes with 3 bit shifts to the right of the length
202        final byte[] l_raw = new byte[ascii.length >> 3];
203        /*
204         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
205         * loop.
206         */
207        for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
208            for (int bits = 0; bits < BITS.length; ++bits) {
209                if (ascii[jj - bits] == '1') {
210                    l_raw[ii] |= BITS[bits];
211                }
212            }
213        }
214        return l_raw;
215    }
216
217    /**
218     * Returns <code>true</code> if the given array is <code>null</code> or empty (size 0.)
219     *
220     * @param array
221     *            the source array
222     * @return <code>true</code> if the given array is <code>null</code> or empty (size 0.)
223     */
224    private static boolean isEmpty(final byte[] array) {
225        return array == null || array.length == 0;
226    }
227
228    /**
229     * Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated
230     * char.
231     *
232     * @param raw
233     *                  the raw binary data to convert
234     * @return an array of 0 and 1 character bytes for each bit of the argument
235     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
236     */
237    public static byte[] toAsciiBytes(final byte[] raw) {
238        if (isEmpty(raw)) {
239            return EMPTY_BYTE_ARRAY;
240        }
241        // get 8 times the bytes with 3 bit shifts to the left of the length
242        final byte[] l_ascii = new byte[raw.length << 3];
243        /*
244         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
245         * loop.
246         */
247        for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
248            for (int bits = 0; bits < BITS.length; ++bits) {
249                if ((raw[ii] & BITS[bits]) == 0) {
250                    l_ascii[jj - bits] = '0';
251                } else {
252                    l_ascii[jj - bits] = '1';
253                }
254            }
255        }
256        return l_ascii;
257    }
258
259    /**
260     * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
261     *
262     * @param raw
263     *                  the raw binary data to convert
264     * @return an array of 0 and 1 characters for each bit of the argument
265     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
266     */
267    public static char[] toAsciiChars(final byte[] raw) {
268        if (isEmpty(raw)) {
269            return EMPTY_CHAR_ARRAY;
270        }
271        // get 8 times the bytes with 3 bit shifts to the left of the length
272        final char[] l_ascii = new char[raw.length << 3];
273        /*
274         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
275         * loop.
276         */
277        for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
278            for (int bits = 0; bits < BITS.length; ++bits) {
279                if ((raw[ii] & BITS[bits]) == 0) {
280                    l_ascii[jj - bits] = '0';
281                } else {
282                    l_ascii[jj - bits] = '1';
283                }
284            }
285        }
286        return l_ascii;
287    }
288
289    /**
290     * Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
291     *
292     * @param raw
293     *                  the raw binary data to convert
294     * @return a String of 0 and 1 characters representing the binary data
295     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
296     */
297    public static String toAsciiString(final byte[] raw) {
298        return new String(toAsciiChars(raw));
299    }
300}