1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.binary;
19
20 import java.util.Objects;
21
22 import org.apache.commons.codec.CodecPolicy;
23
24 /**
25 * Provides Base16 encoding and decoding as defined by <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>.
26 *
27 * <p>
28 * This class is thread-safe.
29 * </p>
30 * <p>
31 * This implementation strictly follows RFC 4648, and as such unlike the {@link Base32} and {@link Base64} implementations, it does not ignore invalid alphabet
32 * characters or whitespace, neither does it offer chunking or padding characters.
33 * </p>
34 * <p>
35 * The only additional feature above those specified in RFC 4648 is support for working with a lower-case alphabet in addition to the default upper-case
36 * alphabet.
37 * </p>
38 *
39 * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>
40 * @since 1.15
41 */
42 public class Base16 extends BaseNCodec {
43
44 /**
45 * BASE16 characters are 4 bits in length. They are formed by taking an 8-bit group, which is converted into two BASE16 characters.
46 */
47 private static final int BITS_PER_ENCODED_BYTE = 4;
48 private static final int BYTES_PER_ENCODED_BLOCK = 2;
49 private static final int BYTES_PER_UNENCODED_BLOCK = 1;
50
51 /**
52 * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified in Table 5 of RFC 4648) into their 4-bit
53 * positive integer equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1.
54 */
55 // @formatter:off
56 private static final byte[] UPPER_CASE_DECODE_TABLE = {
57 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
58 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
59 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
60 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
61 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
62 -1, 10, 11, 12, 13, 14, 15 // 40-46 A-F
63 };
64 // @formatter:on
65
66 /**
67 * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" equivalents as specified in Table 5 of RFC
68 * 4648.
69 */
70 private static final byte[] UPPER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
71
72 /**
73 * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet" into their 4-bit positive integer
74 * equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1.
75 */
76 // @formatter:off
77 private static final byte[] LOWER_CASE_DECODE_TABLE = {
78 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
79 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
80 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
81 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
82 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
83 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f
84 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f
85 -1, 10, 11, 12, 13, 14, 15 // 60-66 a-f
86 };
87 // @formatter:on
88
89 /**
90 * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" lower-case equivalents.
91 */
92 private static final byte[] LOWER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
93
94 /** Mask used to extract 4 bits, used when decoding character. */
95 private static final int MASK_4_BITS = 0x0f;
96
97 /**
98 * Decode table to use.
99 */
100 private final byte[] decodeTable;
101
102 /**
103 * Encode table to use.
104 */
105 private final byte[] encodeTable;
106
107 /**
108 * Constructs a Base16 codec used for decoding and encoding.
109 */
110 public Base16() {
111 this(false);
112 }
113
114 /**
115 * Constructs a Base16 codec used for decoding and encoding.
116 *
117 * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
118 */
119 public Base16(final boolean lowerCase) {
120 this(lowerCase, DECODING_POLICY_DEFAULT);
121 }
122
123 /**
124 * Constructs a Base16 codec used for decoding and encoding.
125 * @param encodeTable the encode table.
126 * @param decodingPolicy Decoding policy.
127 */
128 private Base16(final byte[] encodeTable, final CodecPolicy decodingPolicy) {
129 super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0, PAD_DEFAULT, decodingPolicy);
130 Objects.requireNonNull(encodeTable, "encodeTable");
131 this.encodeTable = encodeTable;
132 this.decodeTable = encodeTable == LOWER_CASE_ENCODE_TABLE ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE;
133 }
134
135 /**
136 * Constructs a Base16 codec used for decoding and encoding.
137 *
138 * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
139 * @param decodingPolicy Decoding policy.
140 */
141 public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) {
142 this(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE, decodingPolicy);
143 }
144
145 @Override
146 void decode(final byte[] data, int offset, final int length, final Context context) {
147 if (context.eof || length < 0) {
148 context.eof = true;
149 if (context.ibitWorkArea != 0) {
150 validateTrailingCharacter();
151 }
152 return;
153 }
154 final int dataLen = Math.min(data.length - offset, length);
155 final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen;
156 // small optimization to short-cut the rest of this method when it is fed byte-by-byte
157 if (availableChars == 1 && availableChars == dataLen) {
158 // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
159 context.ibitWorkArea = decodeOctet(data[offset]) + 1;
160 return;
161 }
162 // we must have an even number of chars to decode
163 final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1;
164 final int end = offset + dataLen;
165 final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context);
166 int result;
167 if (dataLen < availableChars) {
168 // we have 1/2 byte from previous invocation to decode
169 result = context.ibitWorkArea - 1 << BITS_PER_ENCODED_BYTE;
170 result |= decodeOctet(data[offset++]);
171 buffer[context.pos++] = (byte) result;
172 // reset to empty-value for next invocation!
173 context.ibitWorkArea = 0;
174 }
175 final int loopEnd = end - 1;
176 while (offset < loopEnd) {
177 result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE;
178 result |= decodeOctet(data[offset++]);
179 buffer[context.pos++] = (byte) result;
180 }
181 // we have one char of a hex-pair left over
182 if (offset < end) {
183 // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
184 context.ibitWorkArea = decodeOctet(data[offset]) + 1;
185 }
186 }
187
188 private int decodeOctet(final byte octet) {
189 int decoded = -1;
190 if ((octet & 0xff) < decodeTable.length) {
191 decoded = decodeTable[octet];
192 }
193 if (decoded == -1) {
194 throw new IllegalArgumentException("Invalid octet in encoded value: " + (int) octet);
195 }
196 return decoded;
197 }
198
199 @Override
200 void encode(final byte[] data, final int offset, final int length, final Context context) {
201 if (context.eof) {
202 return;
203 }
204 if (length < 0) {
205 context.eof = true;
206 return;
207 }
208 final int size = length * BYTES_PER_ENCODED_BLOCK;
209 if (size < 0) {
210 throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length);
211 }
212 final byte[] buffer = ensureBufferSize(size, context);
213 final int end = offset + length;
214 for (int i = offset; i < end; i++) {
215 final int value = data[i];
216 final int high = value >> BITS_PER_ENCODED_BYTE & MASK_4_BITS;
217 final int low = value & MASK_4_BITS;
218 buffer[context.pos++] = encodeTable[high];
219 buffer[context.pos++] = encodeTable[low];
220 }
221 }
222
223 /**
224 * Returns whether or not the {@code octet} is in the Base16 alphabet.
225 *
226 * @param octet The value to test.
227 * @return {@code true} if the value is defined in the Base16 alphabet {@code false} otherwise.
228 */
229 @Override
230 public boolean isInAlphabet(final byte octet) {
231 return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1;
232 }
233
234 /**
235 * Validates whether decoding allows an entire final trailing character that cannot be used for a complete byte.
236 *
237 * @throws IllegalArgumentException if strict decoding is enabled
238 */
239 private void validateTrailingCharacter() {
240 if (isStrictDecoding()) {
241 throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid base 16 alphabet character but not a possible encoding. " +
242 "Decoding requires at least two characters to create one byte.");
243 }
244 }
245 }