View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import java.util.Objects;
21  
22  import org.apache.commons.codec.CodecPolicy;
23  
24  /**
25   * Provides Base16 encoding and decoding.
26   *
27   * <p>
28   * This class is thread-safe.
29   * </p>
30   * <p>
31   * This implementation strictly follows RFC 4648, and as such unlike the {@link Base32} and {@link Base64} implementations, it does not ignore invalid alphabet
32   * characters or whitespace, neither does it offer chunking or padding characters.
33   * </p>
34   * <p>
35   * The only additional feature above those specified in RFC 4648 is support for working with a lower-case alphabet in addition to the default upper-case
36   * alphabet.
37   * </p>
38   *
39   * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>
40   *
41   * @since 1.15
42   */
43  public class Base16 extends BaseNCodec {
44  
45      /**
46       * BASE16 characters are 4 bits in length. They are formed by taking an 8-bit group, which is converted into two BASE16 characters.
47       */
48      private static final int BITS_PER_ENCODED_BYTE = 4;
49      private static final int BYTES_PER_ENCODED_BLOCK = 2;
50      private static final int BYTES_PER_UNENCODED_BLOCK = 1;
51  
52      /**
53       * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified in Table 5 of RFC 4648) into their 4-bit
54       * positive integer equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1.
55       */
56      // @formatter:off
57      private static final byte[] UPPER_CASE_DECODE_TABLE = {
58              //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
59              -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
60              -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
61              -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
62               0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
63              -1, 10, 11, 12, 13, 14, 15                                      // 40-46 A-F
64      };
65      // @formatter:on
66  
67      /**
68       * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" equivalents as specified in Table 5 of RFC
69       * 4648.
70       */
71      private static final byte[] UPPER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
72  
73      /**
74       * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet" into their 4-bit positive integer
75       * equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1.
76       */
77      // @formatter:off
78      private static final byte[] LOWER_CASE_DECODE_TABLE = {
79              //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
80              -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
81              -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
82              -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
83               0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
84              -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f
85              -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f
86              -1, 10, 11, 12, 13, 14, 15                                      // 60-66 a-f
87      };
88      // @formatter:on
89  
90      /**
91       * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" lower-case equivalents.
92       */
93      private static final byte[] LOWER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
94  
95      /** Mask used to extract 4 bits, used when decoding character. */
96      private static final int MASK_4BITS = 0x0f;
97  
98      /**
99       * Decode table to use.
100      */
101     private final byte[] decodeTable;
102 
103     /**
104      * Encode table to use.
105      */
106     private final byte[] encodeTable;
107 
108     /**
109      * Constructs a Base16 codec used for decoding and encoding.
110      */
111     public Base16() {
112         this(false);
113     }
114 
115     /**
116      * Constructs a Base16 codec used for decoding and encoding.
117      *
118      * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
119      */
120     public Base16(final boolean lowerCase) {
121         this(lowerCase, DECODING_POLICY_DEFAULT);
122     }
123 
124     /**
125      * Constructs a Base16 codec used for decoding and encoding.
126      *
127      * @param lowerCase      if {@code true} then use a lower-case Base16 alphabet.
128      * @param encodeTable    the encode table.
129      * @param decodingPolicy Decoding policy.
130      */
131     private Base16(final boolean lowerCase, final byte[] encodeTable, final CodecPolicy decodingPolicy) {
132         super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0, PAD_DEFAULT, decodingPolicy);
133         Objects.requireNonNull(encodeTable, "encodeTable");
134         this.encodeTable = encodeTable;
135         this.decodeTable = encodeTable == LOWER_CASE_ENCODE_TABLE ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE;
136     }
137 
138     /**
139      * Constructs a Base16 codec used for decoding and encoding.
140      *
141      * @param lowerCase      if {@code true} then use a lower-case Base16 alphabet.
142      * @param decodingPolicy Decoding policy.
143      */
144     public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) {
145         this(lowerCase, lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE, decodingPolicy);
146     }
147 
148     @Override
149     void decode(final byte[] data, int offset, final int length, final Context context) {
150         if (context.eof || length < 0) {
151             context.eof = true;
152             if (context.ibitWorkArea != 0) {
153                 validateTrailingCharacter();
154             }
155             return;
156         }
157         final int dataLen = Math.min(data.length - offset, length);
158         final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen;
159         // small optimization to short-cut the rest of this method when it is fed byte-by-byte
160         if (availableChars == 1 && availableChars == dataLen) {
161             // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
162             context.ibitWorkArea = decodeOctet(data[offset]) + 1;
163             return;
164         }
165         // we must have an even number of chars to decode
166         final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1;
167         final int end = offset + dataLen;
168         final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context);
169         int result;
170         if (dataLen < availableChars) {
171             // we have 1/2 byte from previous invocation to decode
172             result = context.ibitWorkArea - 1 << BITS_PER_ENCODED_BYTE;
173             result |= decodeOctet(data[offset++]);
174             buffer[context.pos++] = (byte) result;
175             // reset to empty-value for next invocation!
176             context.ibitWorkArea = 0;
177         }
178         final int loopEnd = end - 1;
179         while (offset < loopEnd) {
180             result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE;
181             result |= decodeOctet(data[offset++]);
182             buffer[context.pos++] = (byte) result;
183         }
184         // we have one char of a hex-pair left over
185         if (offset < end) {
186             // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
187             context.ibitWorkArea = decodeOctet(data[offset]) + 1;
188         }
189     }
190 
191     private int decodeOctet(final byte octet) {
192         int decoded = -1;
193         if ((octet & 0xff) < decodeTable.length) {
194             decoded = decodeTable[octet];
195         }
196         if (decoded == -1) {
197             throw new IllegalArgumentException("Invalid octet in encoded value: " + (int) octet);
198         }
199         return decoded;
200     }
201 
202     @Override
203     void encode(final byte[] data, final int offset, final int length, final Context context) {
204         if (context.eof) {
205             return;
206         }
207         if (length < 0) {
208             context.eof = true;
209             return;
210         }
211         final int size = length * BYTES_PER_ENCODED_BLOCK;
212         if (size < 0) {
213             throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length);
214         }
215         final byte[] buffer = ensureBufferSize(size, context);
216         final int end = offset + length;
217         for (int i = offset; i < end; i++) {
218             final int value = data[i];
219             final int high = value >> BITS_PER_ENCODED_BYTE & MASK_4BITS;
220             final int low = value & MASK_4BITS;
221             buffer[context.pos++] = encodeTable[high];
222             buffer[context.pos++] = encodeTable[low];
223         }
224     }
225 
226     /**
227      * Returns whether or not the {@code octet} is in the Base16 alphabet.
228      *
229      * @param octet The value to test.
230      *
231      * @return {@code true} if the value is defined in the Base16 alphabet {@code false} otherwise.
232      */
233     @Override
234     public boolean isInAlphabet(final byte octet) {
235         return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1;
236     }
237 
238     /**
239      * Validates whether decoding allows an entire final trailing character that cannot be used for a complete byte.
240      *
241      * @throws IllegalArgumentException if strict decoding is enabled
242      */
243     private void validateTrailingCharacter() {
244         if (isStrictDecoding()) {
245             throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid base 16 alphabet character but not a possible encoding. " +
246                     "Decoding requires at least two characters to create one byte.");
247         }
248     }
249 }