001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.util.Arrays; 021 022import org.apache.commons.codec.CodecPolicy; 023 024/** 025 * Provides Base16 encoding and decoding as defined by <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>. 026 * 027 * <p> 028 * This class is thread-safe. 029 * </p> 030 * <p> 031 * This implementation strictly follows RFC 4648, and as such unlike the {@link Base32} and {@link Base64} implementations, it does not ignore invalid alphabet 032 * characters or whitespace, neither does it offer chunking or padding characters. 033 * </p> 034 * <p> 035 * The only additional feature above those specified in RFC 4648 is support for working with a lower-case alphabet in addition to the default upper-case 036 * alphabet. 037 * </p> 038 * 039 * @see Base16InputStream 040 * @see Base16OutputStream 041 * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a> 042 * @since 1.15 043 */ 044public class Base16 extends BaseNCodec { 045 046 /** 047 * Builds {@link Base16} instances. 048 * 049 * <p> 050 * To configure a new instance, use a {@link Builder}. For example: 051 * </p> 052 * 053 * <pre> 054 * Base16 Base16 = Base16.builder() 055 * .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient 056 * .get() 057 * </pre> 058 * 059 * @since 1.20.0 060 */ 061 public static class Builder extends AbstractBuilder<Base16, Builder> { 062 063 /** 064 * Constructs a new instance. 065 */ 066 public Builder() { 067 super(null); 068 setDecodeTable(UPPER_CASE_DECODE_TABLE); 069 setEncodeTable(UPPER_CASE_ENCODE_TABLE); 070 setEncodedBlockSize(BYTES_PER_ENCODED_BLOCK); 071 setUnencodedBlockSize(BYTES_PER_UNENCODED_BLOCK); 072 setLineLength(0); 073 setLineSeparator(EMPTY_BYTE_ARRAY); 074 } 075 076 @Override 077 public Base16 get() { 078 return new Base16(this); 079 } 080 081 @Override 082 public Builder setEncodeTable(final byte... encodeTable) { 083 super.setDecodeTableRaw(Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE) ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE); 084 return super.setEncodeTable(encodeTable); 085 } 086 087 /** 088 * Sets whether to use the the lower-case Base16 alphabet. 089 * 090 * @param lowerCase {@code true} to use the lower-case Base16 alphabet. 091 * @return {@code this} instance. 092 */ 093 public Builder setLowerCase(final boolean lowerCase) { 094 setEncodeTableRaw(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE); 095 return asThis(); 096 } 097 098 } 099 100 /** 101 * BASE16 characters are 4 bits in length. They are formed by taking an 8-bit group, which is converted into two BASE16 characters. 102 */ 103 private static final int BITS_PER_ENCODED_BYTE = 4; 104 105 private static final int BYTES_PER_ENCODED_BLOCK = 2; 106 107 private static final int BYTES_PER_UNENCODED_BLOCK = 1; 108 /** 109 * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified in Table 5 of RFC 4648) into their 4-bit 110 * positive integer equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1. 111 */ 112 // @formatter:off 113 private static final byte[] UPPER_CASE_DECODE_TABLE = { 114 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 115 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 116 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 117 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f 118 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 119 -1, 10, 11, 12, 13, 14, 15 // 40-46 A-F 120 }; 121 // @formatter:on 122 /** 123 * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" equivalents as specified in Table 5 of RFC 124 * 4648. 125 */ 126 private static final byte[] UPPER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; 127 128 /** 129 * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet" into their 4-bit positive integer 130 * equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1. 131 */ 132 // @formatter:off 133 private static final byte[] LOWER_CASE_DECODE_TABLE = { 134 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 135 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 136 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 137 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f 138 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 139 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f 140 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f 141 -1, 10, 11, 12, 13, 14, 15 // 60-66 a-f 142 }; 143 // @formatter:on 144 /** 145 * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" lower-case equivalents. 146 */ 147 private static final byte[] LOWER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; 148 149 /** Mask used to extract 4 bits, used when decoding character. */ 150 private static final int MASK_4_BITS = 0x0f; 151 152 /** 153 * Constructs a new builder. 154 * 155 * @return a new builder. 156 * @since 1.20.0 157 */ 158 public static Builder builder() { 159 return new Builder(); 160 } 161 162 /** 163 * Constructs a Base16 codec used for decoding and encoding. 164 */ 165 public Base16() { 166 this(false); 167 } 168 169 /** 170 * Constructs a Base16 codec used for decoding and encoding. 171 * 172 * @param lowerCase {@code true} to use the lower-case Base16 alphabet. 173 * @deprecated Use {@link #builder()} and {@link Builder}. 174 */ 175 @Deprecated 176 public Base16(final boolean lowerCase) { 177 this(lowerCase, DECODING_POLICY_DEFAULT); 178 } 179 180 /** 181 * Constructs a Base16 codec used for decoding and encoding. 182 * 183 * @param lowerCase {@code true} to use the lower-case Base16 alphabet. 184 * @param decodingPolicy Decoding policy. 185 * @deprecated Use {@link #builder()} and {@link Builder}. 186 */ 187 @Deprecated 188 public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) { 189 this(builder().setEncodeTable(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE).setDecodingPolicy(decodingPolicy)); 190 } 191 192 private Base16(final Builder builder) { 193 super(builder); 194 } 195 196 @Override 197 void decode(final byte[] data, int offset, final int length, final Context context) { 198 if (context.eof || length < 0) { 199 context.eof = true; 200 if (context.ibitWorkArea != 0) { 201 validateTrailingCharacter(); 202 } 203 return; 204 } 205 final int dataLen = Math.min(data.length - offset, length); 206 final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen; 207 // small optimization to short-cut the rest of this method when it is fed byte-by-byte 208 if (availableChars == 1 && availableChars == dataLen) { 209 // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0 210 context.ibitWorkArea = decodeOctet(data[offset]) + 1; 211 return; 212 } 213 // we must have an even number of chars to decode 214 final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1; 215 final int end = offset + dataLen; 216 final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context); 217 int result; 218 if (dataLen < availableChars) { 219 // we have 1/2 byte from previous invocation to decode 220 result = context.ibitWorkArea - 1 << BITS_PER_ENCODED_BYTE; 221 result |= decodeOctet(data[offset++]); 222 buffer[context.pos++] = (byte) result; 223 // reset to empty-value for next invocation! 224 context.ibitWorkArea = 0; 225 } 226 final int loopEnd = end - 1; 227 while (offset < loopEnd) { 228 result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE; 229 result |= decodeOctet(data[offset++]); 230 buffer[context.pos++] = (byte) result; 231 } 232 // we have one char of a hex-pair left over 233 if (offset < end) { 234 // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0 235 context.ibitWorkArea = decodeOctet(data[offset]) + 1; 236 } 237 } 238 239 private int decodeOctet(final byte octet) { 240 int decoded = -1; 241 if ((octet & 0xff) < decodeTable.length) { 242 decoded = decodeTable[octet]; 243 } 244 if (decoded == -1) { 245 throw new IllegalArgumentException("Invalid octet in encoded value: " + (int) octet); 246 } 247 return decoded; 248 } 249 250 @Override 251 void encode(final byte[] data, final int offset, final int length, final Context context) { 252 if (context.eof) { 253 return; 254 } 255 if (length < 0) { 256 context.eof = true; 257 return; 258 } 259 final int size = length * BYTES_PER_ENCODED_BLOCK; 260 if (size < 0) { 261 throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length); 262 } 263 final byte[] buffer = ensureBufferSize(size, context); 264 final int end = offset + length; 265 for (int i = offset; i < end; i++) { 266 final int value = data[i]; 267 final int high = value >> BITS_PER_ENCODED_BYTE & MASK_4_BITS; 268 final int low = value & MASK_4_BITS; 269 buffer[context.pos++] = encodeTable[high]; 270 buffer[context.pos++] = encodeTable[low]; 271 } 272 } 273 274 /** 275 * Returns whether or not the {@code octet} is in the Base16 alphabet. 276 * 277 * @param octet The value to test. 278 * @return {@code true} if the value is defined in the Base16 alphabet {@code false} otherwise. 279 */ 280 @Override 281 public boolean isInAlphabet(final byte octet) { 282 return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1; 283 } 284 285 /** 286 * Validates whether decoding allows an entire final trailing character that cannot be used for a complete byte. 287 * 288 * @throws IllegalArgumentException if strict decoding is enabled 289 */ 290 private void validateTrailingCharacter() { 291 if (isStrictDecoding()) { 292 throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid Base 16 alphabet character but not a possible encoding. " + 293 "Decoding requires at least two characters to create one byte."); 294 } 295 } 296}