Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import java.util.Objects;
021
022import org.apache.commons.codec.CodecPolicy;
023
024/**
025 * Provides Base16 encoding and decoding.
026 *
027 * <p>
028 * This class is thread-safe.
029 * </p>
030 * <p>
031 * This implementation strictly follows RFC 4648, and as such unlike the {@link Base32} and {@link Base64} implementations, it does not ignore invalid alphabet
032 * characters or whitespace, neither does it offer chunking or padding characters.
033 * </p>
034 * <p>
035 * The only additional feature above those specified in RFC 4648 is support for working with a lower-case alphabet in addition to the default upper-case
036 * alphabet.
037 * </p>
038 *
039 * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>
040 *
041 * @since 1.15
042 */
043public class Base16 extends BaseNCodec {
044
045    /**
046     * BASE16 characters are 4 bits in length. They are formed by taking an 8-bit group, which is converted into two BASE16 characters.
047     */
048    private static final int BITS_PER_ENCODED_BYTE = 4;
049    private static final int BYTES_PER_ENCODED_BLOCK = 2;
050    private static final int BYTES_PER_UNENCODED_BLOCK = 1;
051
052    /**
053     * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified in Table 5 of RFC 4648) into their 4-bit
054     * positive integer equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1.
055     */
056    // @formatter:off
057    private static final byte[] UPPER_CASE_DECODE_TABLE = {
058            //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
059            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
060            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
061            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
062             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
063            -1, 10, 11, 12, 13, 14, 15                                      // 40-46 A-F
064    };
065    // @formatter:on
066
067    /**
068     * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" equivalents as specified in Table 5 of RFC
069     * 4648.
070     */
071    private static final byte[] UPPER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
072
073    /**
074     * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet" into their 4-bit positive integer
075     * equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1.
076     */
077    // @formatter:off
078    private static final byte[] LOWER_CASE_DECODE_TABLE = {
079            //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
080            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
081            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
082            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
083             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
084            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f
085            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f
086            -1, 10, 11, 12, 13, 14, 15                                      // 60-66 a-f
087    };
088    // @formatter:on
089
090    /**
091     * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" lower-case equivalents.
092     */
093    private static final byte[] LOWER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
094
095    /** Mask used to extract 4 bits, used when decoding character. */
096    private static final int MASK_4BITS = 0x0f;
097
098    /**
099     * Decode table to use.
100     */
101    private final byte[] decodeTable;
102
103    /**
104     * Encode table to use.
105     */
106    private final byte[] encodeTable;
107
108    /**
109     * Constructs a Base16 codec used for decoding and encoding.
110     */
111    public Base16() {
112        this(false);
113    }
114
115    /**
116     * Constructs a Base16 codec used for decoding and encoding.
117     *
118     * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
119     */
120    public Base16(final boolean lowerCase) {
121        this(lowerCase, DECODING_POLICY_DEFAULT);
122    }
123
124    /**
125     * Constructs a Base16 codec used for decoding and encoding.
126     *
127     * @param lowerCase      if {@code true} then use a lower-case Base16 alphabet.
128     * @param encodeTable    the encode table.
129     * @param decodingPolicy Decoding policy.
130     */
131    private Base16(final boolean lowerCase, final byte[] encodeTable, final CodecPolicy decodingPolicy) {
132        super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0, PAD_DEFAULT, decodingPolicy);
133        Objects.requireNonNull(encodeTable, "encodeTable");
134        this.encodeTable = encodeTable;
135        this.decodeTable = encodeTable == LOWER_CASE_ENCODE_TABLE ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE;
136    }
137
138    /**
139     * Constructs a Base16 codec used for decoding and encoding.
140     *
141     * @param lowerCase      if {@code true} then use a lower-case Base16 alphabet.
142     * @param decodingPolicy Decoding policy.
143     */
144    public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) {
145        this(lowerCase, lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE, decodingPolicy);
146    }
147
148    @Override
149    void decode(final byte[] data, int offset, final int length, final Context context) {
150        if (context.eof || length < 0) {
151            context.eof = true;
152            if (context.ibitWorkArea != 0) {
153                validateTrailingCharacter();
154            }
155            return;
156        }
157        final int dataLen = Math.min(data.length - offset, length);
158        final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen;
159        // small optimization to short-cut the rest of this method when it is fed byte-by-byte
160        if (availableChars == 1 && availableChars == dataLen) {
161            // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
162            context.ibitWorkArea = decodeOctet(data[offset]) + 1;
163            return;
164        }
165        // we must have an even number of chars to decode
166        final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1;
167        final int end = offset + dataLen;
168        final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context);
169        int result;
170        if (dataLen < availableChars) {
171            // we have 1/2 byte from previous invocation to decode
172            result = context.ibitWorkArea - 1 << BITS_PER_ENCODED_BYTE;
173            result |= decodeOctet(data[offset++]);
174            buffer[context.pos++] = (byte) result;
175            // reset to empty-value for next invocation!
176            context.ibitWorkArea = 0;
177        }
178        final int loopEnd = end - 1;
179        while (offset < loopEnd) {
180            result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE;
181            result |= decodeOctet(data[offset++]);
182            buffer[context.pos++] = (byte) result;
183        }
184        // we have one char of a hex-pair left over
185        if (offset < end) {
186            // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
187            context.ibitWorkArea = decodeOctet(data[offset]) + 1;
188        }
189    }
190
191    private int decodeOctet(final byte octet) {
192        int decoded = -1;
193        if ((octet & 0xff) < decodeTable.length) {
194            decoded = decodeTable[octet];
195        }
196        if (decoded == -1) {
197            throw new IllegalArgumentException("Invalid octet in encoded value: " + (int) octet);
198        }
199        return decoded;
200    }
201
202    @Override
203    void encode(final byte[] data, final int offset, final int length, final Context context) {
204        if (context.eof) {
205            return;
206        }
207        if (length < 0) {
208            context.eof = true;
209            return;
210        }
211        final int size = length * BYTES_PER_ENCODED_BLOCK;
212        if (size < 0) {
213            throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length);
214        }
215        final byte[] buffer = ensureBufferSize(size, context);
216        final int end = offset + length;
217        for (int i = offset; i < end; i++) {
218            final int value = data[i];
219            final int high = value >> BITS_PER_ENCODED_BYTE & MASK_4BITS;
220            final int low = value & MASK_4BITS;
221            buffer[context.pos++] = encodeTable[high];
222            buffer[context.pos++] = encodeTable[low];
223        }
224    }
225
226    /**
227     * Returns whether or not the {@code octet} is in the Base16 alphabet.
228     *
229     * @param octet The value to test.
230     *
231     * @return {@code true} if the value is defined in the Base16 alphabet {@code false} otherwise.
232     */
233    @Override
234    public boolean isInAlphabet(final byte octet) {
235        return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1;
236    }
237
238    /**
239     * Validates whether decoding allows an entire final trailing character that cannot be used for a complete byte.
240     *
241     * @throws IllegalArgumentException if strict decoding is enabled
242     */
243    private void validateTrailingCharacter() {
244        if (isStrictDecoding()) {
245            throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid base 16 alphabet character but not a possible encoding. " +
246                    "Decoding requires at least two characters to create one byte.");
247        }
248    }
249}