001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.util.Arrays; 021 022import org.apache.commons.codec.CodecPolicy; 023 024/** 025 * Provides Base16 encoding and decoding as defined by <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>. 026 * 027 * <p> 028 * This class is thread-safe. 029 * </p> 030 * <p> 031 * This implementation strictly follows RFC 4648, and as such unlike the {@link Base32} and {@link Base64} implementations, it does not ignore invalid alphabet 032 * characters or whitespace, neither does it offer chunking or padding characters. 033 * </p> 034 * <p> 035 * The only additional feature above those specified in RFC 4648 is support for working with a lower-case alphabet in addition to the default upper-case 036 * alphabet. 037 * </p> 038 * 039 * @see Base16InputStream 040 * @see Base16OutputStream 041 * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a> 042 * @since 1.15 043 */ 044public class Base16 extends BaseNCodec { 045 046 /** 047 * Builds {@link Base16} instances. 048 * 049 * <p> 050 * To configure a new instance, use a {@link Builder}. For example: 051 * </p> 052 * 053 * <pre> 054 * Base16 Base16 = Base16.builder() 055 * .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient 056 * .get() 057 * </pre> 058 * 059 * @since 1.20.0 060 */ 061 public static class Builder extends AbstractBuilder<Base16, Builder> { 062 063 /** 064 * Constructs a new instance. 065 */ 066 public Builder() { 067 super(null); 068 setDecodeTable(UPPER_CASE_DECODE_TABLE); 069 setEncodeTable(UPPER_CASE_ENCODE_TABLE); 070 setEncodedBlockSize(BYTES_PER_ENCODED_BLOCK); 071 setUnencodedBlockSize(BYTES_PER_UNENCODED_BLOCK); 072 setLineLength(0); 073 setLineSeparator(EMPTY_BYTE_ARRAY); 074 } 075 076 @Override 077 public Base16 get() { 078 return new Base16(this); 079 } 080 081 @Override 082 public Builder setEncodeTable(final byte... encodeTable) { 083 super.setDecodeTableRaw(Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE) ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE); 084 return super.setEncodeTable(encodeTable); 085 } 086 087 /** 088 * Sets whether to use the lower-case Base16 alphabet. 089 * 090 * @param lowerCase {@code true} to use the lower-case Base16 alphabet. 091 * @return {@code this} instance. 092 */ 093 public Builder setLowerCase(final boolean lowerCase) { 094 setEncodeTableRaw(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE); 095 return asThis(); 096 } 097 098 } 099 100 /** 101 * BASE16 characters are 4 bits in length. They are formed by taking an 8-bit group, which is converted into two BASE16 characters. 102 */ 103 private static final int BITS_PER_ENCODED_BYTE = 4; 104 105 private static final int BYTES_PER_ENCODED_BLOCK = 2; 106 107 private static final int BYTES_PER_UNENCODED_BLOCK = 1; 108 109 /** 110 * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified in Table 5 of RFC 4648) into their 4-bit 111 * positive integer equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1. 112 */ 113 // @formatter:off 114 private static final byte[] UPPER_CASE_DECODE_TABLE = { 115 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 116 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 117 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 118 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f 119 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 120 -1, 10, 11, 12, 13, 14, 15 // 40-46 A-F 121 }; 122 // @formatter:on 123 124 /** 125 * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" equivalents as specified in Table 5 of RFC 126 * 4648. 127 */ 128 private static final byte[] UPPER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; 129 130 /** 131 * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet" into their 4-bit positive integer 132 * equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1. 133 */ 134 // @formatter:off 135 private static final byte[] LOWER_CASE_DECODE_TABLE = { 136 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 137 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 138 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 139 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f 140 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 141 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f 142 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f 143 -1, 10, 11, 12, 13, 14, 15 // 60-66 a-f 144 }; 145 // @formatter:on 146 147 /** 148 * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" lower-case equivalents. 149 */ 150 private static final byte[] LOWER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; 151 152 /** Mask used to extract 4 bits, used when decoding character. */ 153 private static final int MASK_4_BITS = 0x0f; 154 155 /** 156 * Constructs a new builder. 157 * 158 * @return a new builder. 159 * @since 1.20.0 160 */ 161 public static Builder builder() { 162 return new Builder(); 163 } 164 165 /** 166 * Constructs a Base16 codec used for decoding and encoding. 167 */ 168 public Base16() { 169 this(false); 170 } 171 172 /** 173 * Constructs a Base16 codec used for decoding and encoding. 174 * 175 * @param lowerCase {@code true} to use the lower-case Base16 alphabet. 176 * @deprecated Use {@link #builder()} and {@link Builder}. 177 */ 178 @Deprecated 179 public Base16(final boolean lowerCase) { 180 this(lowerCase, DECODING_POLICY_DEFAULT); 181 } 182 183 /** 184 * Constructs a Base16 codec used for decoding and encoding. 185 * 186 * @param lowerCase {@code true} to use the lower-case Base16 alphabet. 187 * @param decodingPolicy Decoding policy. 188 * @deprecated Use {@link #builder()} and {@link Builder}. 189 */ 190 @Deprecated 191 public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) { 192 this(builder().setEncodeTable(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE).setDecodingPolicy(decodingPolicy)); 193 } 194 195 private Base16(final Builder builder) { 196 super(builder); 197 } 198 199 @Override 200 void decode(final byte[] data, int offset, final int length, final Context context) { 201 if (context.eof || length < 0) { 202 context.eof = true; 203 if (context.ibitWorkArea != 0) { 204 validateTrailingCharacter(); 205 } 206 return; 207 } 208 final int dataLen = Math.min(data.length - offset, length); 209 final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen; 210 // small optimization to short-cut the rest of this method when it is fed byte-by-byte 211 if (availableChars == 1 && availableChars == dataLen) { 212 // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0 213 context.ibitWorkArea = decodeOctet(data[offset]) + 1; 214 return; 215 } 216 // we must have an even number of chars to decode 217 final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1; 218 final int end = offset + dataLen; 219 final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context); 220 int result; 221 if (dataLen < availableChars) { 222 // we have 1/2 byte from previous invocation to decode 223 result = context.ibitWorkArea - 1 << BITS_PER_ENCODED_BYTE; 224 result |= decodeOctet(data[offset++]); 225 buffer[context.pos++] = (byte) result; 226 // reset to empty-value for next invocation! 227 context.ibitWorkArea = 0; 228 } 229 final int loopEnd = end - 1; 230 while (offset < loopEnd) { 231 result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE; 232 result |= decodeOctet(data[offset++]); 233 buffer[context.pos++] = (byte) result; 234 } 235 // we have one char of a hex-pair left over 236 if (offset < end) { 237 // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0 238 context.ibitWorkArea = decodeOctet(data[offset]) + 1; 239 } 240 } 241 242 private int decodeOctet(final byte octet) { 243 int decoded = -1; 244 if ((octet & 0xff) < decodeTable.length) { 245 decoded = decodeTable[octet]; 246 } 247 if (decoded == -1) { 248 throw new IllegalArgumentException("Invalid octet in encoded value: " + (int) octet); 249 } 250 return decoded; 251 } 252 253 @Override 254 void encode(final byte[] data, final int offset, final int length, final Context context) { 255 if (context.eof) { 256 return; 257 } 258 if (length < 0) { 259 context.eof = true; 260 return; 261 } 262 final int size = length * BYTES_PER_ENCODED_BLOCK; 263 if (size < 0) { 264 throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length); 265 } 266 final byte[] buffer = ensureBufferSize(size, context); 267 final int end = offset + length; 268 for (int i = offset; i < end; i++) { 269 final int value = data[i]; 270 final int high = value >> BITS_PER_ENCODED_BYTE & MASK_4_BITS; 271 final int low = value & MASK_4_BITS; 272 buffer[context.pos++] = encodeTable[high]; 273 buffer[context.pos++] = encodeTable[low]; 274 } 275 } 276 277 /** 278 * Returns whether or not the {@code octet} is in the Base16 alphabet. 279 * 280 * @param octet The value to test. 281 * @return {@code true} if the value is defined in the Base16 alphabet {@code false} otherwise. 282 */ 283 @Override 284 public boolean isInAlphabet(final byte octet) { 285 return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1; 286 } 287 288 /** 289 * Validates whether decoding allows an entire final trailing character that cannot be used for a complete byte. 290 * 291 * @throws IllegalArgumentException if strict decoding is enabled. 292 */ 293 private void validateTrailingCharacter() { 294 if (isStrictDecoding()) { 295 throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid Base 16 alphabet character but not a possible encoding. " + 296 "Decoding requires at least two characters to create one byte."); 297 } 298 } 299}