Base16.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.codec.binary;

  18. import java.util.Objects;

  19. import org.apache.commons.codec.CodecPolicy;

  20. /**
  21.  * Provides Base32 encoding and decoding as defined by <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>.
  22.  *
  23.  * <p>
  24.  * This class is thread-safe.
  25.  * </p>
  26.  * <p>
  27.  * This implementation strictly follows RFC 4648, and as such unlike the {@link Base32} and {@link Base64} implementations, it does not ignore invalid alphabet
  28.  * characters or whitespace, neither does it offer chunking or padding characters.
  29.  * </p>
  30.  * <p>
  31.  * The only additional feature above those specified in RFC 4648 is support for working with a lower-case alphabet in addition to the default upper-case
  32.  * alphabet.
  33.  * </p>
  34.  *
  35.  * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>
  36.  * @since 1.15
  37.  */
  38. public class Base16 extends BaseNCodec {

  39.     /**
  40.      * BASE16 characters are 4 bits in length. They are formed by taking an 8-bit group, which is converted into two BASE16 characters.
  41.      */
  42.     private static final int BITS_PER_ENCODED_BYTE = 4;
  43.     private static final int BYTES_PER_ENCODED_BLOCK = 2;
  44.     private static final int BYTES_PER_UNENCODED_BLOCK = 1;

  45.     /**
  46.      * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified in Table 5 of RFC 4648) into their 4-bit
  47.      * positive integer equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1.
  48.      */
  49.     // @formatter:off
  50.     private static final byte[] UPPER_CASE_DECODE_TABLE = {
  51.             //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
  52.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
  53.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
  54.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
  55.              0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
  56.             -1, 10, 11, 12, 13, 14, 15                                      // 40-46 A-F
  57.     };
  58.     // @formatter:on

  59.     /**
  60.      * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" equivalents as specified in Table 5 of RFC
  61.      * 4648.
  62.      */
  63.     private static final byte[] UPPER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };

  64.     /**
  65.      * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet" into their 4-bit positive integer
  66.      * equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1.
  67.      */
  68.     // @formatter:off
  69.     private static final byte[] LOWER_CASE_DECODE_TABLE = {
  70.             //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
  71.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
  72.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
  73.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
  74.              0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
  75.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f
  76.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f
  77.             -1, 10, 11, 12, 13, 14, 15                                      // 60-66 a-f
  78.     };
  79.     // @formatter:on

  80.     /**
  81.      * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" lower-case equivalents.
  82.      */
  83.     private static final byte[] LOWER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };

  84.     /** Mask used to extract 4 bits, used when decoding character. */
  85.     private static final int MASK_4BITS = 0x0f;

  86.     /**
  87.      * Decode table to use.
  88.      */
  89.     private final byte[] decodeTable;

  90.     /**
  91.      * Encode table to use.
  92.      */
  93.     private final byte[] encodeTable;

  94.     /**
  95.      * Constructs a Base16 codec used for decoding and encoding.
  96.      */
  97.     public Base16() {
  98.         this(false);
  99.     }

  100.     /**
  101.      * Constructs a Base16 codec used for decoding and encoding.
  102.      *
  103.      * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
  104.      */
  105.     public Base16(final boolean lowerCase) {
  106.         this(lowerCase, DECODING_POLICY_DEFAULT);
  107.     }

  108.     /**
  109.      * Constructs a Base16 codec used for decoding and encoding.
  110.      *
  111.      * @param lowerCase      if {@code true} then use a lower-case Base16 alphabet.
  112.      * @param encodeTable    the encode table.
  113.      * @param decodingPolicy Decoding policy.
  114.      */
  115.     private Base16(final boolean lowerCase, final byte[] encodeTable, final CodecPolicy decodingPolicy) {
  116.         super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0, PAD_DEFAULT, decodingPolicy);
  117.         Objects.requireNonNull(encodeTable, "encodeTable");
  118.         this.encodeTable = encodeTable;
  119.         this.decodeTable = encodeTable == LOWER_CASE_ENCODE_TABLE ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE;
  120.     }

  121.     /**
  122.      * Constructs a Base16 codec used for decoding and encoding.
  123.      *
  124.      * @param lowerCase      if {@code true} then use a lower-case Base16 alphabet.
  125.      * @param decodingPolicy Decoding policy.
  126.      */
  127.     public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) {
  128.         this(lowerCase, lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE, decodingPolicy);
  129.     }

  130.     @Override
  131.     void decode(final byte[] data, int offset, final int length, final Context context) {
  132.         if (context.eof || length < 0) {
  133.             context.eof = true;
  134.             if (context.ibitWorkArea != 0) {
  135.                 validateTrailingCharacter();
  136.             }
  137.             return;
  138.         }
  139.         final int dataLen = Math.min(data.length - offset, length);
  140.         final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen;
  141.         // small optimization to short-cut the rest of this method when it is fed byte-by-byte
  142.         if (availableChars == 1 && availableChars == dataLen) {
  143.             // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
  144.             context.ibitWorkArea = decodeOctet(data[offset]) + 1;
  145.             return;
  146.         }
  147.         // we must have an even number of chars to decode
  148.         final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1;
  149.         final int end = offset + dataLen;
  150.         final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context);
  151.         int result;
  152.         if (dataLen < availableChars) {
  153.             // we have 1/2 byte from previous invocation to decode
  154.             result = context.ibitWorkArea - 1 << BITS_PER_ENCODED_BYTE;
  155.             result |= decodeOctet(data[offset++]);
  156.             buffer[context.pos++] = (byte) result;
  157.             // reset to empty-value for next invocation!
  158.             context.ibitWorkArea = 0;
  159.         }
  160.         final int loopEnd = end - 1;
  161.         while (offset < loopEnd) {
  162.             result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE;
  163.             result |= decodeOctet(data[offset++]);
  164.             buffer[context.pos++] = (byte) result;
  165.         }
  166.         // we have one char of a hex-pair left over
  167.         if (offset < end) {
  168.             // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
  169.             context.ibitWorkArea = decodeOctet(data[offset]) + 1;
  170.         }
  171.     }

  172.     private int decodeOctet(final byte octet) {
  173.         int decoded = -1;
  174.         if ((octet & 0xff) < decodeTable.length) {
  175.             decoded = decodeTable[octet];
  176.         }
  177.         if (decoded == -1) {
  178.             throw new IllegalArgumentException("Invalid octet in encoded value: " + (int) octet);
  179.         }
  180.         return decoded;
  181.     }

  182.     @Override
  183.     void encode(final byte[] data, final int offset, final int length, final Context context) {
  184.         if (context.eof) {
  185.             return;
  186.         }
  187.         if (length < 0) {
  188.             context.eof = true;
  189.             return;
  190.         }
  191.         final int size = length * BYTES_PER_ENCODED_BLOCK;
  192.         if (size < 0) {
  193.             throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length);
  194.         }
  195.         final byte[] buffer = ensureBufferSize(size, context);
  196.         final int end = offset + length;
  197.         for (int i = offset; i < end; i++) {
  198.             final int value = data[i];
  199.             final int high = value >> BITS_PER_ENCODED_BYTE & MASK_4BITS;
  200.             final int low = value & MASK_4BITS;
  201.             buffer[context.pos++] = encodeTable[high];
  202.             buffer[context.pos++] = encodeTable[low];
  203.         }
  204.     }

  205.     /**
  206.      * Returns whether or not the {@code octet} is in the Base16 alphabet.
  207.      *
  208.      * @param octet The value to test.
  209.      * @return {@code true} if the value is defined in the Base16 alphabet {@code false} otherwise.
  210.      */
  211.     @Override
  212.     public boolean isInAlphabet(final byte octet) {
  213.         return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1;
  214.     }

  215.     /**
  216.      * Validates whether decoding allows an entire final trailing character that cannot be used for a complete byte.
  217.      *
  218.      * @throws IllegalArgumentException if strict decoding is enabled
  219.      */
  220.     private void validateTrailingCharacter() {
  221.         if (isStrictDecoding()) {
  222.             throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid base 16 alphabet character but not a possible encoding. " +
  223.                     "Decoding requires at least two characters to create one byte.");
  224.         }
  225.     }
  226. }