001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.util.Objects; 021 022import org.apache.commons.codec.CodecPolicy; 023 024/** 025 * Provides Base16 encoding and decoding as defined by <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>. 026 * 027 * <p> 028 * This class is thread-safe. 029 * </p> 030 * <p> 031 * This implementation strictly follows RFC 4648, and as such unlike the {@link Base32} and {@link Base64} implementations, it does not ignore invalid alphabet 032 * characters or whitespace, neither does it offer chunking or padding characters. 033 * </p> 034 * <p> 035 * The only additional feature above those specified in RFC 4648 is support for working with a lower-case alphabet in addition to the default upper-case 036 * alphabet. 037 * </p> 038 * 039 * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a> 040 * @since 1.15 041 */ 042public class Base16 extends BaseNCodec { 043 044 /** 045 * BASE16 characters are 4 bits in length. They are formed by taking an 8-bit group, which is converted into two BASE16 characters. 046 */ 047 private static final int BITS_PER_ENCODED_BYTE = 4; 048 private static final int BYTES_PER_ENCODED_BLOCK = 2; 049 private static final int BYTES_PER_UNENCODED_BLOCK = 1; 050 051 /** 052 * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified in Table 5 of RFC 4648) into their 4-bit 053 * positive integer equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1. 054 */ 055 // @formatter:off 056 private static final byte[] UPPER_CASE_DECODE_TABLE = { 057 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 058 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 059 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 060 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f 061 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 062 -1, 10, 11, 12, 13, 14, 15 // 40-46 A-F 063 }; 064 // @formatter:on 065 066 /** 067 * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" equivalents as specified in Table 5 of RFC 068 * 4648. 069 */ 070 private static final byte[] UPPER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; 071 072 /** 073 * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet" into their 4-bit positive integer 074 * equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1. 075 */ 076 // @formatter:off 077 private static final byte[] LOWER_CASE_DECODE_TABLE = { 078 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 079 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 080 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 081 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f 082 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 083 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f 084 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f 085 -1, 10, 11, 12, 13, 14, 15 // 60-66 a-f 086 }; 087 // @formatter:on 088 089 /** 090 * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" lower-case equivalents. 091 */ 092 private static final byte[] LOWER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; 093 094 /** Mask used to extract 4 bits, used when decoding character. */ 095 private static final int MASK_4_BITS = 0x0f; 096 097 /** 098 * Decode table to use. 099 */ 100 private final byte[] decodeTable; 101 102 /** 103 * Encode table to use. 104 */ 105 private final byte[] encodeTable; 106 107 /** 108 * Constructs a Base16 codec used for decoding and encoding. 109 */ 110 public Base16() { 111 this(false); 112 } 113 114 /** 115 * Constructs a Base16 codec used for decoding and encoding. 116 * 117 * @param lowerCase if {@code true} then use a lower-case Base16 alphabet. 118 */ 119 public Base16(final boolean lowerCase) { 120 this(lowerCase, DECODING_POLICY_DEFAULT); 121 } 122 123 /** 124 * Constructs a Base16 codec used for decoding and encoding. 125 * @param encodeTable the encode table. 126 * @param decodingPolicy Decoding policy. 127 */ 128 private Base16(final byte[] encodeTable, final CodecPolicy decodingPolicy) { 129 super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0, PAD_DEFAULT, decodingPolicy); 130 Objects.requireNonNull(encodeTable, "encodeTable"); 131 this.encodeTable = encodeTable; 132 this.decodeTable = encodeTable == LOWER_CASE_ENCODE_TABLE ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE; 133 } 134 135 /** 136 * Constructs a Base16 codec used for decoding and encoding. 137 * 138 * @param lowerCase if {@code true} then use a lower-case Base16 alphabet. 139 * @param decodingPolicy Decoding policy. 140 */ 141 public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) { 142 this(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE, decodingPolicy); 143 } 144 145 @Override 146 void decode(final byte[] data, int offset, final int length, final Context context) { 147 if (context.eof || length < 0) { 148 context.eof = true; 149 if (context.ibitWorkArea != 0) { 150 validateTrailingCharacter(); 151 } 152 return; 153 } 154 final int dataLen = Math.min(data.length - offset, length); 155 final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen; 156 // small optimization to short-cut the rest of this method when it is fed byte-by-byte 157 if (availableChars == 1 && availableChars == dataLen) { 158 // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0 159 context.ibitWorkArea = decodeOctet(data[offset]) + 1; 160 return; 161 } 162 // we must have an even number of chars to decode 163 final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1; 164 final int end = offset + dataLen; 165 final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context); 166 int result; 167 if (dataLen < availableChars) { 168 // we have 1/2 byte from previous invocation to decode 169 result = context.ibitWorkArea - 1 << BITS_PER_ENCODED_BYTE; 170 result |= decodeOctet(data[offset++]); 171 buffer[context.pos++] = (byte) result; 172 // reset to empty-value for next invocation! 173 context.ibitWorkArea = 0; 174 } 175 final int loopEnd = end - 1; 176 while (offset < loopEnd) { 177 result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE; 178 result |= decodeOctet(data[offset++]); 179 buffer[context.pos++] = (byte) result; 180 } 181 // we have one char of a hex-pair left over 182 if (offset < end) { 183 // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0 184 context.ibitWorkArea = decodeOctet(data[offset]) + 1; 185 } 186 } 187 188 private int decodeOctet(final byte octet) { 189 int decoded = -1; 190 if ((octet & 0xff) < decodeTable.length) { 191 decoded = decodeTable[octet]; 192 } 193 if (decoded == -1) { 194 throw new IllegalArgumentException("Invalid octet in encoded value: " + (int) octet); 195 } 196 return decoded; 197 } 198 199 @Override 200 void encode(final byte[] data, final int offset, final int length, final Context context) { 201 if (context.eof) { 202 return; 203 } 204 if (length < 0) { 205 context.eof = true; 206 return; 207 } 208 final int size = length * BYTES_PER_ENCODED_BLOCK; 209 if (size < 0) { 210 throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length); 211 } 212 final byte[] buffer = ensureBufferSize(size, context); 213 final int end = offset + length; 214 for (int i = offset; i < end; i++) { 215 final int value = data[i]; 216 final int high = value >> BITS_PER_ENCODED_BYTE & MASK_4_BITS; 217 final int low = value & MASK_4_BITS; 218 buffer[context.pos++] = encodeTable[high]; 219 buffer[context.pos++] = encodeTable[low]; 220 } 221 } 222 223 /** 224 * Returns whether or not the {@code octet} is in the Base16 alphabet. 225 * 226 * @param octet The value to test. 227 * @return {@code true} if the value is defined in the Base16 alphabet {@code false} otherwise. 228 */ 229 @Override 230 public boolean isInAlphabet(final byte octet) { 231 return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1; 232 } 233 234 /** 235 * Validates whether decoding allows an entire final trailing character that cannot be used for a complete byte. 236 * 237 * @throws IllegalArgumentException if strict decoding is enabled 238 */ 239 private void validateTrailingCharacter() { 240 if (isStrictDecoding()) { 241 throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid base 16 alphabet character but not a possible encoding. " + 242 "Decoding requires at least two characters to create one byte."); 243 } 244 } 245}