BinaryCodec.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.codec.binary;

  18. import org.apache.commons.codec.BinaryDecoder;
  19. import org.apache.commons.codec.BinaryEncoder;
  20. import org.apache.commons.codec.DecoderException;
  21. import org.apache.commons.codec.EncoderException;

  22. /**
  23.  * Converts between byte arrays and strings of "0"s and "1"s.
  24.  *
  25.  * <p>This class is immutable and thread-safe.</p>
  26.  *
  27.  * TODO: may want to add more bit vector functions like and/or/xor/nand
  28.  * TODO: also might be good to generate boolean[] from byte[] et cetera.
  29.  *
  30.  * @since 1.3
  31.  */
  32. public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
  33.     /*
  34.      * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth
  35.      * it.
  36.      */
  37.     /** Empty char array. */
  38.     private static final char[] EMPTY_CHAR_ARRAY = {};

  39.     /** Empty byte array. */
  40.     private static final byte[] EMPTY_BYTE_ARRAY = {};

  41.     /** Mask for bit 0 of a byte. */
  42.     private static final int BIT_0 = 1;

  43.     /** Mask for bit 1 of a byte. */
  44.     private static final int BIT_1 = 0x02;

  45.     /** Mask for bit 2 of a byte. */
  46.     private static final int BIT_2 = 0x04;

  47.     /** Mask for bit 3 of a byte. */
  48.     private static final int BIT_3 = 0x08;

  49.     /** Mask for bit 4 of a byte. */
  50.     private static final int BIT_4 = 0x10;

  51.     /** Mask for bit 5 of a byte. */
  52.     private static final int BIT_5 = 0x20;

  53.     /** Mask for bit 6 of a byte. */
  54.     private static final int BIT_6 = 0x40;

  55.     /** Mask for bit 7 of a byte. */
  56.     private static final int BIT_7 = 0x80;

  57.     private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7};

  58.     /**
  59.      * Decodes a byte array where each byte represents an ASCII '0' or '1'.
  60.      *
  61.      * @param ascii
  62.      *                  each byte represents an ASCII '0' or '1'
  63.      * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
  64.      */
  65.     public static byte[] fromAscii(final byte[] ascii) {
  66.         if (isEmpty(ascii)) {
  67.             return EMPTY_BYTE_ARRAY;
  68.         }
  69.         final int asciiLength = ascii.length;
  70.         // get length/8 times bytes with 3 bit shifts to the right of the length
  71.         final byte[] raw = new byte[asciiLength >> 3];
  72.         /*
  73.          * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
  74.          * loop.
  75.          */
  76.         for (int ii = 0, jj = asciiLength - 1; ii < raw.length; ii++, jj -= 8) {
  77.             for (int bits = 0; bits < BITS.length; ++bits) {
  78.                 if (ascii[jj - bits] == '1') {
  79.                     raw[ii] |= BITS[bits];
  80.                 }
  81.             }
  82.         }
  83.         return raw;
  84.     }

  85.     /**
  86.      * Decodes a char array where each char represents an ASCII '0' or '1'.
  87.      *
  88.      * @param ascii
  89.      *                  each char represents an ASCII '0' or '1'
  90.      * @return the raw encoded binary where each bit corresponds to a char in the char array argument
  91.      */
  92.     public static byte[] fromAscii(final char[] ascii) {
  93.         if (ascii == null || ascii.length == 0) {
  94.             return EMPTY_BYTE_ARRAY;
  95.         }
  96.         final int asciiLength = ascii.length;
  97.         // get length/8 times bytes with 3 bit shifts to the right of the length
  98.         final byte[] raw = new byte[asciiLength >> 3];
  99.         /*
  100.          * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
  101.          * loop.
  102.          */
  103.         for (int ii = 0, jj = asciiLength - 1; ii < raw.length; ii++, jj -= 8) {
  104.             for (int bits = 0; bits < BITS.length; ++bits) {
  105.                 if (ascii[jj - bits] == '1') {
  106.                     raw[ii] |= BITS[bits];
  107.                 }
  108.             }
  109.         }
  110.         return raw;
  111.     }

  112.     /**
  113.      * Returns {@code true} if the given array is {@code null} or empty (size 0.)
  114.      *
  115.      * @param array
  116.      *            the source array
  117.      * @return {@code true} if the given array is {@code null} or empty (size 0.)
  118.      */
  119.     static boolean isEmpty(final byte[] array) {
  120.         return array == null || array.length == 0;
  121.     }

  122.     /**
  123.      * Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated
  124.      * char.
  125.      *
  126.      * @param raw
  127.      *                  the raw binary data to convert
  128.      * @return an array of 0 and 1 character bytes for each bit of the argument
  129.      * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
  130.      */
  131.     public static byte[] toAsciiBytes(final byte[] raw) {
  132.         if (isEmpty(raw)) {
  133.             return EMPTY_BYTE_ARRAY;
  134.         }
  135.         final int rawLength = raw.length;
  136.         // get 8 times the bytes with 3 bit shifts to the left of the length
  137.         final byte[] l_ascii = new byte[rawLength << 3];
  138.         /*
  139.          * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
  140.          * loop.
  141.          */
  142.         for (int ii = 0, jj = l_ascii.length - 1; ii < rawLength; ii++, jj -= 8) {
  143.             for (int bits = 0; bits < BITS.length; ++bits) {
  144.                 if ((raw[ii] & BITS[bits]) == 0) {
  145.                     l_ascii[jj - bits] = '0';
  146.                 } else {
  147.                     l_ascii[jj - bits] = '1';
  148.                 }
  149.             }
  150.         }
  151.         return l_ascii;
  152.     }

  153.     /**
  154.      * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
  155.      *
  156.      * @param raw
  157.      *                  the raw binary data to convert
  158.      * @return an array of 0 and 1 characters for each bit of the argument
  159.      * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
  160.      */
  161.     public static char[] toAsciiChars(final byte[] raw) {
  162.         if (isEmpty(raw)) {
  163.             return EMPTY_CHAR_ARRAY;
  164.         }
  165.         final int rawLength = raw.length;
  166.         // get 8 times the bytes with 3 bit shifts to the left of the length
  167.         final char[] l_ascii = new char[rawLength << 3];
  168.         /*
  169.          * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
  170.          * loop.
  171.          */
  172.         for (int ii = 0, jj = l_ascii.length - 1; ii < rawLength; ii++, jj -= 8) {
  173.             for (int bits = 0; bits < BITS.length; ++bits) {
  174.                 if ((raw[ii] & BITS[bits]) == 0) {
  175.                     l_ascii[jj - bits] = '0';
  176.                 } else {
  177.                     l_ascii[jj - bits] = '1';
  178.                 }
  179.             }
  180.         }
  181.         return l_ascii;
  182.     }

  183.     /**
  184.      * Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
  185.      *
  186.      * @param raw
  187.      *                  the raw binary data to convert
  188.      * @return a String of 0 and 1 characters representing the binary data
  189.      * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
  190.      */
  191.     public static String toAsciiString(final byte[] raw) {
  192.         return new String(toAsciiChars(raw));
  193.     }

  194.     /**
  195.      * Constructs a new instance.
  196.      */
  197.     public BinaryCodec() {
  198.         // empty
  199.     }

  200.     /**
  201.      * Decodes a byte array where each byte represents an ASCII '0' or '1'.
  202.      *
  203.      * @param ascii
  204.      *                  each byte represents an ASCII '0' or '1'
  205.      * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
  206.      * @see org.apache.commons.codec.Decoder#decode(Object)
  207.      */
  208.     @Override
  209.     public byte[] decode(final byte[] ascii) {
  210.         return fromAscii(ascii);
  211.     }

  212.     /**
  213.      * Decodes a byte array where each byte represents an ASCII '0' or '1'.
  214.      *
  215.      * @param ascii
  216.      *                  each byte represents an ASCII '0' or '1'
  217.      * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
  218.      * @throws DecoderException
  219.      *                  if argument is not a byte[], char[] or String
  220.      * @see org.apache.commons.codec.Decoder#decode(Object)
  221.      */
  222.     @Override
  223.     public Object decode(final Object ascii) throws DecoderException {
  224.         if (ascii == null) {
  225.             return EMPTY_BYTE_ARRAY;
  226.         }
  227.         if (ascii instanceof byte[]) {
  228.             return fromAscii((byte[]) ascii);
  229.         }
  230.         if (ascii instanceof char[]) {
  231.             return fromAscii((char[]) ascii);
  232.         }
  233.         if (ascii instanceof String) {
  234.             return fromAscii(((String) ascii).toCharArray());
  235.         }
  236.         throw new DecoderException("argument not a byte array");
  237.     }

  238.     /**
  239.      * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
  240.      *
  241.      * @param raw
  242.      *                  the raw binary data to convert
  243.      * @return 0 and 1 ASCII character bytes one for each bit of the argument
  244.      * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
  245.      */
  246.     @Override
  247.     public byte[] encode(final byte[] raw) {
  248.         return toAsciiBytes(raw);
  249.     }

  250.     /**
  251.      * Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
  252.      *
  253.      * @param raw
  254.      *                  the raw binary data to convert
  255.      * @return 0 and 1 ASCII character chars one for each bit of the argument
  256.      * @throws EncoderException
  257.      *                  if the argument is not a byte[]
  258.      * @see org.apache.commons.codec.Encoder#encode(Object)
  259.      */
  260.     @Override
  261.     public Object encode(final Object raw) throws EncoderException {
  262.         if (!(raw instanceof byte[])) {
  263.             throw new EncoderException("argument not a byte array");
  264.         }
  265.         return toAsciiChars((byte[]) raw);
  266.     }

  267.     /**
  268.      * Decodes a String where each char of the String represents an ASCII '0' or '1'.
  269.      *
  270.      * @param ascii
  271.      *                  String of '0' and '1' characters
  272.      * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
  273.      * @see org.apache.commons.codec.Decoder#decode(Object)
  274.      */
  275.     public byte[] toByteArray(final String ascii) {
  276.         if (ascii == null) {
  277.             return EMPTY_BYTE_ARRAY;
  278.         }
  279.         return fromAscii(ascii.toCharArray());
  280.     }
  281. }