001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import org.apache.commons.codec.BinaryDecoder;
021import org.apache.commons.codec.BinaryEncoder;
022import org.apache.commons.codec.DecoderException;
023import org.apache.commons.codec.EncoderException;
024
025/**
026 * Converts between byte arrays and strings of "0"s and "1"s.
027 *
028 * <p>
029 * This class is immutable and thread-safe.
030 * </p>
031 *
032 * TODO: may want to add more bit vector functions like and/or/xor/nand TODO: also might be good to generate boolean[] from byte[] et cetera.
033 *
034 * @since 1.3
035 */
036public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
037
038    /*
039     * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth it.
040     */
041
042    /** Empty char array. */
043    private static final char[] EMPTY_CHAR_ARRAY = {};
044
045    /** Empty byte array. */
046    private static final byte[] EMPTY_BYTE_ARRAY = {};
047
048    /** Mask for bit 0 of a byte. */
049    private static final int BIT_0 = 1;
050
051    /** Mask for bit 1 of a byte. */
052    private static final int BIT_1 = 0x02;
053
054    /** Mask for bit 2 of a byte. */
055    private static final int BIT_2 = 0x04;
056
057    /** Mask for bit 3 of a byte. */
058    private static final int BIT_3 = 0x08;
059
060    /** Mask for bit 4 of a byte. */
061    private static final int BIT_4 = 0x10;
062
063    /** Mask for bit 5 of a byte. */
064    private static final int BIT_5 = 0x20;
065
066    /** Mask for bit 6 of a byte. */
067    private static final int BIT_6 = 0x40;
068
069    /** Mask for bit 7 of a byte. */
070    private static final int BIT_7 = 0x80;
071
072    private static final int[] BITS = { BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7 };
073
074    /**
075     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
076     *
077     * @param ascii each byte represents an ASCII '0' or '1'.
078     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument.
079     */
080    public static byte[] fromAscii(final byte[] ascii) {
081        if (isEmpty(ascii)) {
082            return EMPTY_BYTE_ARRAY;
083        }
084        final int asciiLength = ascii.length;
085        // get length/8 times bytes with 3 bit shifts to the right of the length
086        final byte[] raw = new byte[asciiLength >> 3];
087        /*
088         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the loop.
089         */
090        for (int ii = 0, jj = asciiLength - 1; ii < raw.length; ii++, jj -= 8) {
091            for (int bits = 0; bits < BITS.length; ++bits) {
092                if (ascii[jj - bits] == '1') {
093                    raw[ii] |= BITS[bits];
094                }
095            }
096        }
097        return raw;
098    }
099
100    /**
101     * Decodes a char array where each char represents an ASCII '0' or '1'.
102     *
103     * @param ascii each char represents an ASCII '0' or '1'.
104     * @return the raw encoded binary where each bit corresponds to a char in the char array argument.
105     */
106    public static byte[] fromAscii(final char[] ascii) {
107        if (ascii == null || ascii.length == 0) {
108            return EMPTY_BYTE_ARRAY;
109        }
110        final int asciiLength = ascii.length;
111        // get length/8 times bytes with 3 bit shifts to the right of the length
112        final byte[] raw = new byte[asciiLength >> 3];
113        /*
114         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the loop.
115         */
116        for (int ii = 0, jj = asciiLength - 1; ii < raw.length; ii++, jj -= 8) {
117            for (int bits = 0; bits < BITS.length; ++bits) {
118                if (ascii[jj - bits] == '1') {
119                    raw[ii] |= BITS[bits];
120                }
121            }
122        }
123        return raw;
124    }
125
126    /**
127     * Returns {@code true} if the given array is {@code null} or empty (size 0.)
128     *
129     * @param array the source array.
130     * @return {@code true} if the given array is {@code null} or empty (size 0.)
131     */
132    static boolean isEmpty(final byte[] array) {
133        return array == null || array.length == 0;
134    }
135
136    /**
137     * Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated char.
138     *
139     * @param raw the raw binary data to convert.
140     * @return an array of 0 and 1 character bytes for each bit of the argument.
141     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
142     */
143    public static byte[] toAsciiBytes(final byte[] raw) {
144        if (isEmpty(raw)) {
145            return EMPTY_BYTE_ARRAY;
146        }
147        final int rawLength = raw.length;
148        // get 8 times the bytes with 3 bit shifts to the left of the length
149        final byte[] ascii = new byte[rawLength << 3];
150        /*
151         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the loop.
152         */
153        for (int ii = 0, jj = ascii.length - 1; ii < rawLength; ii++, jj -= 8) {
154            for (int bits = 0; bits < BITS.length; ++bits) {
155                if ((raw[ii] & BITS[bits]) == 0) {
156                    ascii[jj - bits] = '0';
157                } else {
158                    ascii[jj - bits] = '1';
159                }
160            }
161        }
162        return ascii;
163    }
164
165    /**
166     * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
167     *
168     * @param raw the raw binary data to convert.
169     * @return an array of 0 and 1 characters for each bit of the argument.
170     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
171     */
172    public static char[] toAsciiChars(final byte[] raw) {
173        if (isEmpty(raw)) {
174            return EMPTY_CHAR_ARRAY;
175        }
176        final int rawLength = raw.length;
177        // get 8 times the bytes with 3 bit shifts to the left of the length
178        final char[] ascii = new char[rawLength << 3];
179        /*
180         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the loop.
181         */
182        for (int ii = 0, jj = ascii.length - 1; ii < rawLength; ii++, jj -= 8) {
183            for (int bits = 0; bits < BITS.length; ++bits) {
184                if ((raw[ii] & BITS[bits]) == 0) {
185                    ascii[jj - bits] = '0';
186                } else {
187                    ascii[jj - bits] = '1';
188                }
189            }
190        }
191        return ascii;
192    }
193
194    /**
195     * Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
196     *
197     * @param raw the raw binary data to convert.
198     * @return a String of 0 and 1 characters representing the binary data.
199     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
200     */
201    public static String toAsciiString(final byte[] raw) {
202        return new String(toAsciiChars(raw));
203    }
204
205    /**
206     * Constructs a new instance.
207     */
208    public BinaryCodec() {
209        // empty
210    }
211
212    /**
213     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
214     *
215     * @param ascii each byte represents an ASCII '0' or '1'.
216     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument.
217     * @see org.apache.commons.codec.Decoder#decode(Object)
218     */
219    @Override
220    public byte[] decode(final byte[] ascii) {
221        return fromAscii(ascii);
222    }
223
224    /**
225     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
226     *
227     * @param ascii each byte represents an ASCII '0' or '1'.
228     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
229     * @throws DecoderException if argument is not a byte[], char[] or String.
230     * @see org.apache.commons.codec.Decoder#decode(Object)
231     */
232    @Override
233    public Object decode(final Object ascii) throws DecoderException {
234        if (ascii == null) {
235            return EMPTY_BYTE_ARRAY;
236        }
237        if (ascii instanceof byte[]) {
238            return fromAscii((byte[]) ascii);
239        }
240        if (ascii instanceof char[]) {
241            return fromAscii((char[]) ascii);
242        }
243        if (ascii instanceof String) {
244            return fromAscii(((String) ascii).toCharArray());
245        }
246        throw new DecoderException("argument not a byte array");
247    }
248
249    /**
250     * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
251     *
252     * @param raw the raw binary data to convert.
253     * @return 0 and 1 ASCII character bytes one for each bit of the argument.
254     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
255     */
256    @Override
257    public byte[] encode(final byte[] raw) {
258        return toAsciiBytes(raw);
259    }
260
261    /**
262     * Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
263     *
264     * @param raw the raw binary data to convert.
265     * @return 0 and 1 ASCII character chars one for each bit of the argument.
266     * @throws EncoderException if the argument is not a byte[].
267     * @see org.apache.commons.codec.Encoder#encode(Object)
268     */
269    @Override
270    public Object encode(final Object raw) throws EncoderException {
271        if (!(raw instanceof byte[])) {
272            throw new EncoderException("argument not a byte array");
273        }
274        return toAsciiChars((byte[]) raw);
275    }
276
277    /**
278     * Decodes a String where each char of the String represents an ASCII '0' or '1'.
279     *
280     * @param ascii String of '0' and '1' characters.
281     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument.
282     * @see org.apache.commons.codec.Decoder#decode(Object)
283     */
284    public byte[] toByteArray(final String ascii) {
285        if (ascii == null) {
286            return EMPTY_BYTE_ARRAY;
287        }
288        return fromAscii(ascii.toCharArray());
289    }
290}