001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import java.util.Arrays;
021
022import org.apache.commons.codec.CodecPolicy;
023
024/**
025 * Provides Base16 encoding and decoding as defined by <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>.
026 *
027 * <p>
028 * This class is thread-safe.
029 * </p>
030 * <p>
031 * This implementation strictly follows RFC 4648, and as such unlike the {@link Base32} and {@link Base64} implementations, it does not ignore invalid alphabet
032 * characters or whitespace, neither does it offer chunking or padding characters.
033 * </p>
034 * <p>
035 * The only additional feature above those specified in RFC 4648 is support for working with a lower-case alphabet in addition to the default upper-case
036 * alphabet.
037 * </p>
038 *
039 * @see Base16InputStream
040 * @see Base16OutputStream
041 * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>
042 * @since 1.15
043 */
044public class Base16 extends BaseNCodec {
045
046    /**
047     * Builds {@link Base16} instances.
048     *
049     * <p>
050     * To configure a new instance, use a {@link Builder}. For example:
051     * </p>
052     *
053     * <pre>
054     * Base16 Base16 = Base16.builder()
055     *   .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient
056     *   .get()
057     * </pre>
058     *
059     * @since 1.20.0
060     */
061    public static class Builder extends AbstractBuilder<Base16, Builder> {
062
063        /**
064         * Constructs a new instance.
065         */
066        public Builder() {
067            super(null);
068            setDecodeTable(UPPER_CASE_DECODE_TABLE);
069            setEncodeTable(UPPER_CASE_ENCODE_TABLE);
070            setEncodedBlockSize(BYTES_PER_ENCODED_BLOCK);
071            setUnencodedBlockSize(BYTES_PER_UNENCODED_BLOCK);
072            setLineLength(0);
073            setLineSeparator(EMPTY_BYTE_ARRAY);
074        }
075
076        @Override
077        public Base16 get() {
078            return new Base16(this);
079        }
080
081        @Override
082        public Builder setEncodeTable(final byte... encodeTable) {
083            super.setDecodeTableRaw(Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE) ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE);
084            return super.setEncodeTable(encodeTable);
085        }
086
087        /**
088         * Sets whether to use the lower-case Base16 alphabet.
089         *
090         * @param lowerCase {@code true} to use the lower-case Base16 alphabet.
091         * @return {@code this} instance.
092         */
093        public Builder setLowerCase(final boolean lowerCase) {
094            setEncodeTableRaw(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE);
095            return asThis();
096        }
097
098    }
099
100    /**
101     * BASE16 characters are 4 bits in length. They are formed by taking an 8-bit group, which is converted into two BASE16 characters.
102     */
103    private static final int BITS_PER_ENCODED_BYTE = 4;
104
105    private static final int BYTES_PER_ENCODED_BLOCK = 2;
106
107    private static final int BYTES_PER_UNENCODED_BLOCK = 1;
108
109    /**
110     * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified in Table 5 of RFC 4648) into their 4-bit
111     * positive integer equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1.
112     */
113    // @formatter:off
114    private static final byte[] UPPER_CASE_DECODE_TABLE = {
115            //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
116            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
117            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
118            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
119             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
120            -1, 10, 11, 12, 13, 14, 15                                      // 40-46 A-F
121    };
122    // @formatter:on
123
124    /**
125     * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" equivalents as specified in Table 5 of RFC
126     * 4648.
127     */
128    private static final byte[] UPPER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
129
130    /**
131     * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet" into their 4-bit positive integer
132     * equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1.
133     */
134    // @formatter:off
135    private static final byte[] LOWER_CASE_DECODE_TABLE = {
136            //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
137            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
138            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
139            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
140             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
141            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f
142            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f
143            -1, 10, 11, 12, 13, 14, 15                                      // 60-66 a-f
144    };
145    // @formatter:on
146
147    /**
148     * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" lower-case equivalents.
149     */
150    private static final byte[] LOWER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
151
152    /** Mask used to extract 4 bits, used when decoding character. */
153    private static final int MASK_4_BITS = 0x0f;
154
155    /**
156     * Constructs a new builder.
157     *
158     * @return a new builder.
159     * @since 1.20.0
160     */
161    public static Builder builder() {
162        return new Builder();
163    }
164
165    /**
166     * Constructs a Base16 codec used for decoding and encoding.
167     */
168    public Base16() {
169        this(false);
170    }
171
172    /**
173     * Constructs a Base16 codec used for decoding and encoding.
174     *
175     * @param lowerCase {@code true} to use the lower-case Base16 alphabet.
176     * @deprecated Use {@link #builder()} and {@link Builder}.
177     */
178    @Deprecated
179    public Base16(final boolean lowerCase) {
180        this(lowerCase, DECODING_POLICY_DEFAULT);
181    }
182
183    /**
184     * Constructs a Base16 codec used for decoding and encoding.
185     *
186     * @param lowerCase      {@code true} to use the lower-case Base16 alphabet.
187     * @param decodingPolicy Decoding policy.
188     * @deprecated Use {@link #builder()} and {@link Builder}.
189     */
190    @Deprecated
191    public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) {
192        this(builder().setEncodeTable(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE).setDecodingPolicy(decodingPolicy));
193    }
194
195    private Base16(final Builder builder) {
196        super(builder);
197    }
198
199    @Override
200    void decode(final byte[] data, int offset, final int length, final Context context) {
201        if (context.eof || length < 0) {
202            context.eof = true;
203            if (context.ibitWorkArea != 0) {
204                validateTrailingCharacter();
205            }
206            return;
207        }
208        final int dataLen = Math.min(data.length - offset, length);
209        final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen;
210        // small optimization to short-cut the rest of this method when it is fed byte-by-byte
211        if (availableChars == 1 && availableChars == dataLen) {
212            // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
213            context.ibitWorkArea = decodeOctet(data[offset]) + 1;
214            return;
215        }
216        // we must have an even number of chars to decode
217        final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1;
218        final int end = offset + dataLen;
219        final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context);
220        int result;
221        if (dataLen < availableChars) {
222            // we have 1/2 byte from previous invocation to decode
223            result = context.ibitWorkArea - 1 << BITS_PER_ENCODED_BYTE;
224            result |= decodeOctet(data[offset++]);
225            buffer[context.pos++] = (byte) result;
226            // reset to empty-value for next invocation!
227            context.ibitWorkArea = 0;
228        }
229        final int loopEnd = end - 1;
230        while (offset < loopEnd) {
231            result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE;
232            result |= decodeOctet(data[offset++]);
233            buffer[context.pos++] = (byte) result;
234        }
235        // we have one char of a hex-pair left over
236        if (offset < end) {
237            // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
238            context.ibitWorkArea = decodeOctet(data[offset]) + 1;
239        }
240    }
241
242    private int decodeOctet(final byte octet) {
243        int decoded = -1;
244        if ((octet & 0xff) < decodeTable.length) {
245            decoded = decodeTable[octet];
246        }
247        if (decoded == -1) {
248            throw new IllegalArgumentException("Invalid octet in encoded value: " + (int) octet);
249        }
250        return decoded;
251    }
252
253    @Override
254    void encode(final byte[] data, final int offset, final int length, final Context context) {
255        if (context.eof) {
256            return;
257        }
258        if (length < 0) {
259            context.eof = true;
260            return;
261        }
262        final int size = length * BYTES_PER_ENCODED_BLOCK;
263        if (size < 0) {
264            throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length);
265        }
266        final byte[] buffer = ensureBufferSize(size, context);
267        final int end = offset + length;
268        for (int i = offset; i < end; i++) {
269            final int value = data[i];
270            final int high = value >> BITS_PER_ENCODED_BYTE & MASK_4_BITS;
271            final int low = value & MASK_4_BITS;
272            buffer[context.pos++] = encodeTable[high];
273            buffer[context.pos++] = encodeTable[low];
274        }
275    }
276
277    /**
278     * Returns whether or not the {@code octet} is in the Base16 alphabet.
279     *
280     * @param octet The value to test.
281     * @return {@code true} if the value is defined in the Base16 alphabet {@code false} otherwise.
282     */
283    @Override
284    public boolean isInAlphabet(final byte octet) {
285        return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1;
286    }
287
288    /**
289     * Validates whether decoding allows an entire final trailing character that cannot be used for a complete byte.
290     *
291     * @throws IllegalArgumentException if strict decoding is enabled.
292     */
293    private void validateTrailingCharacter() {
294        if (isStrictDecoding()) {
295            throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid Base 16 alphabet character but not a possible encoding. " +
296                    "Decoding requires at least two characters to create one byte.");
297        }
298    }
299}