Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import java.util.Arrays;
021
022import org.apache.commons.codec.CodecPolicy;
023
024/**
025 * Provides Base32 encoding and decoding as defined by <a href="https://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>.
026 *
027 * <p>
028 * The class can be parameterized in the following manner with various constructors:
029 * </p>
030 * <ul>
031 * <li>Whether to use the "base32hex" variant instead of the default "base32"</li>
032 * <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
033 * <li>Line separator: Default is CRLF ("\r\n")</li>
034 * </ul>
035 * <p>
036 * This class operates directly on byte streams, and not character streams.
037 * </p>
038 * <p>
039 * This class is thread-safe.
040 * </p>
041 * <p>
042 * To configure a new instance, use a {@link Builder}. For example:
043 * </p>
044 * <pre>
045 * Base32 base32 = Base32.builder()
046 *   .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient
047 *   .setLineLength(0)                          // default is none
048 *   .setLineSeparator('\r', '\n')              // default is CR LF
049 *   .setPadding('=')                           // default is '='
050 *   .setEncodeTable(customEncodeTable)         // default is RFC 4648 Section 6, Table 3: The Base 32 Alphabet
051 *   .get()
052 * </pre>
053 *
054 * @see Base32InputStream
055 * @see Base32OutputStream
056 * @see <a href="https://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>
057 * @since 1.5
058 */
059public class Base32 extends BaseNCodec {
060
061    /**
062     * Builds {@link Base32} instances.
063     *
064     * <p>
065     * To configure a new instance, use a {@link Builder}. For example:
066     * </p>
067     *
068     * <pre>
069     * Base32 base32 = Base32.builder()
070     *   .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient
071     *   .setLineLength(0)                          // default is none
072     *   .setLineSeparator('\r', '\n')              // default is CR LF
073     *   .setPadding('=')                           // default is '='
074     *   .setEncodeTable(customEncodeTable)         // default is RFC 4648 Section 6, Table 3: The Base 32 Alphabet
075     *   .get()
076     * </pre>
077     *
078     * @since 1.17.0
079     */
080    public static class Builder extends AbstractBuilder<Base32, Builder> {
081
082        /**
083         * Constructs a new instance using <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32
084         * Alphabet</a>.
085         */
086        public Builder() {
087            super(ENCODE_TABLE);
088            setDecodeTableRaw(DECODE_TABLE);
089            setEncodeTableRaw(ENCODE_TABLE);
090            setEncodedBlockSize(BYTES_PER_ENCODED_BLOCK);
091            setUnencodedBlockSize(BYTES_PER_UNENCODED_BLOCK);
092        }
093
094        @Override
095        public Base32 get() {
096            return new Base32(this);
097        }
098
099        @Override
100        public Builder setEncodeTable(final byte... encodeTable) {
101            super.setDecodeTableRaw(Arrays.equals(encodeTable, HEX_ENCODE_TABLE) ? HEX_DECODE_TABLE : DECODE_TABLE);
102            return super.setEncodeTable(encodeTable);
103        }
104
105        /**
106         * Sets the decode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
107         * <p>
108         * This overrides a value previously set with {@link #setEncodeTable(byte...)}.
109         * </p>
110         *
111         * @param useHex use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
112         * @return {@code this} instance.
113         * @since 1.18.0
114         */
115        public Builder setHexDecodeTable(final boolean useHex) {
116            return setEncodeTable(decodeTable(useHex));
117        }
118
119        /**
120         * Sets the encode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
121         * <p>
122         * This overrides a value previously set with {@link #setEncodeTable(byte...)}.
123         * </p>
124         *
125         * @param useHex
126         *               <ul>
127         *               <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding
128         *               with Extended Hex Alphabet</a></li>
129         *               <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32
130         *               Alphabet</a></li>
131         *               </ul>
132         * @return {@code this} instance.
133         * @since 1.18.0
134         */
135        public Builder setHexEncodeTable(final boolean useHex) {
136            return setEncodeTable(encodeTable(useHex));
137        }
138    }
139
140    /**
141     * BASE32 characters are 5 bits in length. They are formed by taking a block of five octets to form a 40-bit string, which is converted into eight BASE32
142     * characters.
143     */
144    private static final int BITS_PER_ENCODED_BYTE = 5;
145
146    private static final int BYTES_PER_ENCODED_BLOCK = 8;
147    private static final int BYTES_PER_UNENCODED_BLOCK = 5;
148
149    /**
150     * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified in Table 3 of RFC 4648) into their 5-bit
151     * positive integer equivalents. Characters that are not in the Base32 alphabet but fall within the bounds of the array are translated to -1.
152     */
153    // @formatter:off
154    private static final byte[] DECODE_TABLE = {
155         //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
156            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
157            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
158            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
159            -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
160            -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 40-4f A-O
161            15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 50-5a P-Z
162                                                        -1, -1, -1, -1, -1, // 5b-5f
163            -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 60-6f a-o
164            15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 70-7a p-z
165    };
166    // @formatter:on
167
168    /**
169     * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet" equivalents as specified in
170     * <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32 Alphabet</a>.
171     *
172     * @see <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32 Alphabet</a>
173     */
174    // @formatter:off
175    private static final byte[] ENCODE_TABLE = {
176            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
177            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
178            '2', '3', '4', '5', '6', '7',
179    };
180    // @formatter:on
181
182    /**
183     * This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as specified in Table 4 of RFC 4648) into their
184     * 5-bit positive integer equivalents. Characters that are not in the Base32 Hex alphabet but fall within the bounds of the array are translated to -1.
185     */
186    // @formatter:off
187    private static final byte[] HEX_DECODE_TABLE = {
188         //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
189            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
190            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
191            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
192             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
193            -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O
194            25, 26, 27, 28, 29, 30, 31,                                     // 50-56 P-V
195                                        -1, -1, -1, -1, -1, -1, -1, -1, -1, // 57-5f
196            -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60-6f a-o
197            25, 26, 27, 28, 29, 30, 31                                      // 70-76 p-v
198    };
199    // @formatter:on
200
201    /**
202     * This array is a lookup table that translates 5-bit positive integer index values into their "Base 32 Encoding with Extended Hex Alphabet" equivalents as
203     * specified in <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with Extended Hex
204     * Alphabet</a>.
205     *
206     * @see <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with Extended Hex Alphabet</a>
207     */
208    // @formatter:off
209    private static final byte[] HEX_ENCODE_TABLE = {
210            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
211            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
212            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
213    };
214    // @formatter:on
215
216    /** Mask used to extract 5 bits, used when encoding Base32 bytes */
217    private static final int MASK_5_BITS = 0x1f;
218
219    /** Mask used to extract 4 bits, used when decoding final trailing character. */
220    private static final long MASK_4_BITS = 0x0fL;
221
222    /** Mask used to extract 3 bits, used when decoding final trailing character. */
223    private static final long MASK_3_BITS = 0x07L;
224
225    /** Mask used to extract 2 bits, used when decoding final trailing character. */
226    private static final long MASK_2_BITS = 0x03L;
227
228    /** Mask used to extract 1 bits, used when decoding final trailing character. */
229    private static final long MASK_1_BITS = 0x01L;
230
231    // The static final fields above are used for the original static byte[] methods on Base32.
232    // The private member fields below are used with the new streaming approach, which requires
233    // some state be preserved between calls of encode() and decode().
234
235    /**
236     * Creates a new Builder.
237     *
238     * <p>
239     * To configure a new instance, use a {@link Builder}. For example:
240     * </p>
241     *
242     * <pre>
243     * Base32 base32 = Base32.builder()
244     *   .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient
245     *   .setLineLength(0)                          // default is none
246     *   .setLineSeparator('\r', '\n')              // default is CR LF
247     *   .setPadding('=')                           // default is '='
248     *   .setEncodeTable(customEncodeTable)         // default is RFC 4648 Section 6, Table 3: The Base 32 Alphabet
249     *   .get()
250     * </pre>
251     *
252     * @return a new Builder.
253     * @since 1.17.0
254     */
255    public static Builder builder() {
256        return new Builder();
257    }
258
259    private static byte[] decodeTable(final boolean useHex) {
260        return useHex ? HEX_DECODE_TABLE : DECODE_TABLE;
261    }
262
263    /**
264     * Gets the encoding table that matches {@code useHex}.
265     *
266     * @param useHex
267     *               <ul>
268     *               <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with
269     *               Extended Hex Alphabet</a></li>
270     *               <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32
271     *               Alphabet</a></li>
272     *               </ul>
273     * @return the encoding table that matches {@code useHex}.
274     */
275    private static byte[] encodeTable(final boolean useHex) {
276        return useHex ? HEX_ENCODE_TABLE : ENCODE_TABLE;
277    }
278
279    /**
280     * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. {@code encodeSize = {@link
281     * #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;}
282     */
283    private final int encodeSize;
284
285    /**
286     * Line separator for encoding. Not used when decoding. Only used if lineLength &gt; 0.
287     */
288    private final byte[] lineSeparator;
289
290    /**
291     * Constructs a Base32 codec used for decoding and encoding.
292     * <p>
293     * When encoding the line length is 0 (no chunking).
294     * </p>
295     */
296    public Base32() {
297        this(false);
298    }
299
300    /**
301     * Constructs a Base32 codec used for decoding and encoding.
302     * <p>
303     * When encoding the line length is 0 (no chunking).
304     * </p>
305     *
306     * @param useHex
307     *               <ul>
308     *               <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with
309     *               Extended Hex Alphabet</a></li>
310     *               <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32
311     *               Alphabet</a></li>
312     *               </ul>
313     * @deprecated Use {@link #builder()} and {@link Builder}.
314     */
315    @Deprecated
316    public Base32(final boolean useHex) {
317        this(0, null, useHex, PAD_DEFAULT);
318    }
319
320    /**
321     * Constructs a Base32 codec used for decoding and encoding.
322     * <p>
323     * When encoding the line length is 0 (no chunking).
324     * </p>
325     *
326     * @param useHex
327     *               <ul>
328     *               <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with
329     *               Extended Hex Alphabet</a></li>
330     *               <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32
331     *               Alphabet</a></li>
332     *               </ul>
333     * @param padding byte used as padding byte.
334     * @deprecated Use {@link #builder()} and {@link Builder}.
335     */
336    @Deprecated
337    public Base32(final boolean useHex, final byte padding) {
338        this(0, null, useHex, padding);
339    }
340
341    private Base32(final Builder builder) {
342        super(builder);
343        if (builder.getLineLength() > 0) {
344            final byte[] lineSeparator = builder.getLineSeparator();
345            // Must be done after initializing the tables
346            if (containsAlphabetOrPad(lineSeparator)) {
347                final String sep = StringUtils.newStringUtf8(lineSeparator);
348                throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]");
349            }
350            this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
351            this.lineSeparator = lineSeparator;
352        } else {
353            this.encodeSize = BYTES_PER_ENCODED_BLOCK;
354            this.lineSeparator = null;
355        }
356        if (isInAlphabet(builder.getPadding()) || Character.isWhitespace(builder.getPadding())) {
357            throw new IllegalArgumentException("pad must not be in alphabet or whitespace");
358        }
359    }
360
361    /**
362     * Constructs a Base32 codec used for decoding and encoding.
363     * <p>
364     * When encoding the line length is 0 (no chunking).
365     * </p>
366     *
367     * @param pad byte used as padding byte.
368     * @deprecated Use {@link #builder()} and {@link Builder}.
369     */
370    @Deprecated
371    public Base32(final byte pad) {
372        this(false, pad);
373    }
374
375    /**
376     * Constructs a Base32 codec used for decoding and encoding.
377     * <p>
378     * When encoding the line length is given in the constructor, the line separator is CRLF.
379     * </p>
380     *
381     * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0, then
382     *                   the output will not be divided into lines (chunks). Ignored when decoding.
383     * @deprecated Use {@link #builder()} and {@link Builder}.
384     */
385    @Deprecated
386    public Base32(final int lineLength) {
387        this(lineLength, CHUNK_SEPARATOR);
388    }
389
390    /**
391     * Constructs a Base32 codec used for decoding and encoding.
392     * <p>
393     * When encoding the line length and line separator are given in the constructor.
394     * </p>
395     * <p>
396     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
397     * </p>
398     *
399     * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
400     *                      then the output will not be divided into lines (chunks). Ignored when decoding.
401     * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
402     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters.
403     * @deprecated Use {@link #builder()} and {@link Builder}.
404     */
405    @Deprecated
406    public Base32(final int lineLength, final byte[] lineSeparator) {
407        this(lineLength, lineSeparator, false, PAD_DEFAULT);
408    }
409
410    /**
411     * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
412     * <p>
413     * When encoding the line length and line separator are given in the constructor.
414     * </p>
415     * <p>
416     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
417     * </p>
418     *
419     * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
420     *                      then the output will not be divided into lines (chunks). Ignored when decoding.
421     * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
422     * @param useHex
423     *               <ul>
424     *               <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with
425     *               Extended Hex Alphabet</a></li>
426     *               <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32
427     *               Alphabet</a></li>
428     *               </ul>
429     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
430     * @deprecated Use {@link #builder()} and {@link Builder}.
431     */
432    @Deprecated
433    public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex) {
434        this(lineLength, lineSeparator, useHex, PAD_DEFAULT);
435    }
436
437    /**
438     * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
439     * <p>
440     * When encoding the line length and line separator are given in the constructor.
441     * </p>
442     * <p>
443     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
444     * </p>
445     *
446     * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
447     *                      then the output will not be divided into lines (chunks). Ignored when decoding.
448     * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
449     * @param useHex
450     *               <ul>
451     *               <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with
452     *               Extended Hex Alphabet</a></li>
453     *               <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32
454     *               Alphabet</a></li>
455     *               </ul>
456     * @param padding       padding byte.
457     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
458     * @deprecated Use {@link #builder()} and {@link Builder}.
459     */
460    @Deprecated
461    public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding) {
462        this(lineLength, lineSeparator, useHex, padding, DECODING_POLICY_DEFAULT);
463    }
464
465    /**
466     * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
467     * <p>
468     * When encoding the line length and line separator are given in the constructor.
469     * </p>
470     * <p>
471     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
472     * </p>
473     *
474     * @param lineLength     Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
475     *                       then the output will not be divided into lines (chunks). Ignored when decoding.
476     * @param lineSeparator  Each line of encoded data will end with this sequence of bytes.
477     * @param useHex
478     *               <ul>
479     *               <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with
480     *               Extended Hex Alphabet</a></li>
481     *               <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32
482     *               Alphabet</a></li>
483     *               </ul>
484     * @param padding        padding byte.
485     * @param decodingPolicy The decoding policy.
486     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
487     * @since 1.15
488     * @deprecated Use {@link #builder()} and {@link Builder}.
489     */
490    @Deprecated
491    public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding, final CodecPolicy decodingPolicy) {
492        // @formatter:off
493        this(builder()
494                .setLineLength(lineLength)
495                .setLineSeparator(lineSeparator != null ? lineSeparator : EMPTY_BYTE_ARRAY)
496                .setDecodeTable(decodeTable(useHex))
497                .setEncodeTableRaw(encodeTable(useHex))
498                .setPadding(padding)
499                .setDecodingPolicy(decodingPolicy));
500        // @formatter:on
501    }
502
503    /**
504     * <p>
505     * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once with the data to decode, and once with
506     * inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" call is not necessary when decoding, but it doesn't hurt, either.
507     * </p>
508     * <p>
509     * Ignores all non-Base32 characters. This is how chunked (for example 76 character) data is handled, since CR and LF are silently ignored, but has implications
510     * for other bytes, too. This method subscribes to the garbage-in, garbage-out philosophy: it will not check the provided data for validity.
511     * </p>
512     * <p>
513     * Output is written to {@link org.apache.commons.codec.binary.BaseNCodec.Context#buffer Context#buffer} as 8-bit octets, using
514     * {@link org.apache.commons.codec.binary.BaseNCodec.Context#pos Context#pos} as the buffer position
515     * </p>
516     *
517     * @param input   byte[] array of ASCII data to Base32 decode.
518     * @param inPos   Position to start reading data from.
519     * @param inAvail Amount of bytes available from input for decoding.
520     * @param context the context to be used.
521     */
522    @Override
523    void decode(final byte[] input, int inPos, final int inAvail, final Context context) {
524        // package protected for access from I/O streams
525        if (context.eof) {
526            return;
527        }
528        if (inAvail < 0) {
529            context.eof = true;
530        }
531        final int decodeSize = this.encodeSize - 1;
532        for (int i = 0; i < inAvail; i++) {
533            final byte b = input[inPos++];
534            if (b == pad) {
535                // We're done.
536                context.eof = true;
537                break;
538            }
539            final byte[] buffer = ensureBufferSize(decodeSize, context);
540            if (b >= 0 && b < this.decodeTable.length) {
541                final int result = this.decodeTable[b];
542                if (result >= 0) {
543                    context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK;
544                    // collect decoded bytes
545                    context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result;
546                    if (context.modulus == 0) { // we can output the 5 bytes
547                        buffer[context.pos++] = (byte) (context.lbitWorkArea >> 32 & MASK_8BITS);
548                        buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS);
549                        buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
550                        buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
551                        buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
552                    }
553                }
554            }
555        }
556        // Two forms of EOF as far as Base32 decoder is concerned: actual
557        // EOF (-1) and first time '=' character is encountered in stream.
558        // This approach makes the '=' padding characters completely optional.
559        if (context.eof && context.modulus > 0) { // if modulus == 0, nothing to do
560            final byte[] buffer = ensureBufferSize(decodeSize, context);
561            // We ignore partial bytes, i.e. only multiples of 8 count.
562            // Any combination not part of a valid encoding is either partially decoded
563            // or will raise an exception. Possible trailing characters are 2, 4, 5, 7.
564            // It is not possible to encode with 1, 3, 6 trailing characters.
565            // For backwards compatibility 3 & 6 chars are decoded anyway rather than discarded.
566            // See the encode(byte[]) method EOF section.
567            switch (context.modulus) {
568//              case 0 : // impossible, as excluded above
569            case 1: // 5 bits - either ignore entirely, or raise an exception
570                validateTrailingCharacters();
571                // falls-through
572            case 2: // 10 bits, drop 2 and output one byte
573                validateCharacter(MASK_2_BITS, context);
574                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 2 & MASK_8BITS);
575                break;
576            case 3: // 15 bits, drop 7 and output 1 byte, or raise an exception
577                validateTrailingCharacters();
578                // Not possible from a valid encoding but decode anyway
579                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 7 & MASK_8BITS);
580                break;
581            case 4: // 20 bits = 2*8 + 4
582                validateCharacter(MASK_4_BITS, context);
583                context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits
584                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
585                buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
586                break;
587            case 5: // 25 bits = 3*8 + 1
588                validateCharacter(MASK_1_BITS, context);
589                context.lbitWorkArea = context.lbitWorkArea >> 1;
590                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
591                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
592                buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
593                break;
594            case 6: // 30 bits = 3*8 + 6, or raise an exception
595                validateTrailingCharacters();
596                // Not possible from a valid encoding but decode anyway
597                context.lbitWorkArea = context.lbitWorkArea >> 6;
598                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
599                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
600                buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
601                break;
602            case 7: // 35 bits = 4*8 +3
603                validateCharacter(MASK_3_BITS, context);
604                context.lbitWorkArea = context.lbitWorkArea >> 3;
605                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS);
606                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
607                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
608                buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
609                break;
610            default:
611                // modulus can be 0-7, and we excluded 0,1 already
612                throw new IllegalStateException("Impossible modulus " + context.modulus);
613            }
614        }
615    }
616
617    /**
618     * <p>
619     * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with the data to encode, and once with
620     * inAvail set to "-1" to alert encoder that EOF has been reached, so flush last remaining bytes (if not multiple of 5).
621     * </p>
622     *
623     * @param input   byte[] array of binary data to Base32 encode.
624     * @param inPos   Position to start reading data from.
625     * @param inAvail Amount of bytes available from input for encoding.
626     * @param context the context to be used.
627     */
628    @Override
629    void encode(final byte[] input, int inPos, final int inAvail, final Context context) {
630        // package protected for access from I/O streams
631        if (context.eof) {
632            return;
633        }
634        // inAvail < 0 is how we're informed of EOF in the underlying data we're
635        // encoding.
636        if (inAvail < 0) {
637            context.eof = true;
638            if (0 == context.modulus && lineLength == 0) {
639                return; // no leftovers to process and not using chunking
640            }
641            final byte[] buffer = ensureBufferSize(encodeSize, context);
642            final int savedPos = context.pos;
643            switch (context.modulus) { // % 5
644            case 0:
645                break;
646            case 1: // Only 1 octet; take top 5 bits then remainder
647                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 3) & MASK_5_BITS]; // 8-1*5 = 3
648                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 2) & MASK_5_BITS]; // 5-3=2
649                buffer[context.pos++] = pad;
650                buffer[context.pos++] = pad;
651                buffer[context.pos++] = pad;
652                buffer[context.pos++] = pad;
653                buffer[context.pos++] = pad;
654                buffer[context.pos++] = pad;
655                break;
656            case 2: // 2 octets = 16 bits to use
657                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 11) & MASK_5_BITS]; // 16-1*5 = 11
658                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 6) & MASK_5_BITS]; // 16-2*5 = 6
659                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 1) & MASK_5_BITS]; // 16-3*5 = 1
660                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 4) & MASK_5_BITS]; // 5-1 = 4
661                buffer[context.pos++] = pad;
662                buffer[context.pos++] = pad;
663                buffer[context.pos++] = pad;
664                buffer[context.pos++] = pad;
665                break;
666            case 3: // 3 octets = 24 bits to use
667                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 19) & MASK_5_BITS]; // 24-1*5 = 19
668                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 14) & MASK_5_BITS]; // 24-2*5 = 14
669                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 9) & MASK_5_BITS]; // 24-3*5 = 9
670                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 4) & MASK_5_BITS]; // 24-4*5 = 4
671                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 1) & MASK_5_BITS]; // 5-4 = 1
672                buffer[context.pos++] = pad;
673                buffer[context.pos++] = pad;
674                buffer[context.pos++] = pad;
675                break;
676            case 4: // 4 octets = 32 bits to use
677                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 27) & MASK_5_BITS]; // 32-1*5 = 27
678                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 22) & MASK_5_BITS]; // 32-2*5 = 22
679                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 17) & MASK_5_BITS]; // 32-3*5 = 17
680                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 12) & MASK_5_BITS]; // 32-4*5 = 12
681                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 7) & MASK_5_BITS]; // 32-5*5 = 7
682                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 2) & MASK_5_BITS]; // 32-6*5 = 2
683                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 3) & MASK_5_BITS]; // 5-2 = 3
684                buffer[context.pos++] = pad;
685                break;
686            default:
687                throw new IllegalStateException("Impossible modulus " + context.modulus);
688            }
689            context.currentLinePos += context.pos - savedPos; // keep track of current line position
690            // if currentPos == 0 we are at the start of a line, so don't add CRLF
691            if (lineLength > 0 && context.currentLinePos > 0) { // add chunk separator if required
692                System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
693                context.pos += lineSeparator.length;
694            }
695        } else {
696            for (int i = 0; i < inAvail; i++) {
697                final byte[] buffer = ensureBufferSize(encodeSize, context);
698                context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK;
699                int b = input[inPos++];
700                if (b < 0) {
701                    b += 256;
702                }
703                context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE
704                if (0 == context.modulus) { // we have enough bytes to create our output
705                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 35) & MASK_5_BITS];
706                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 30) & MASK_5_BITS];
707                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 25) & MASK_5_BITS];
708                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 20) & MASK_5_BITS];
709                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 15) & MASK_5_BITS];
710                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 10) & MASK_5_BITS];
711                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 5) & MASK_5_BITS];
712                    buffer[context.pos++] = encodeTable[(int) context.lbitWorkArea & MASK_5_BITS];
713                    context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
714                    if (lineLength > 0 && lineLength <= context.currentLinePos) {
715                        System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
716                        context.pos += lineSeparator.length;
717                        context.currentLinePos = 0;
718                    }
719                }
720            }
721        }
722    }
723
724    /**
725     * Gets the line separator (for testing only).
726     *
727     * @return the line separator.
728     */
729    byte[] getLineSeparator() {
730        return lineSeparator;
731    }
732
733    /**
734     * Returns whether or not the {@code octet} is in the Base32 alphabet.
735     *
736     * @param octet The value to test.
737     * @return {@code true} if the value is defined in the Base32 alphabet {@code false} otherwise.
738     */
739    @Override
740    public boolean isInAlphabet(final byte octet) {
741        return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
742    }
743
744    /**
745     * Validates whether decoding the final trailing character is possible in the context of the set of possible Base32 values.
746     * <p>
747     * The character is valid if the lower bits within the provided mask are zero. This is used to test the final trailing base-32 digit is zero in the bits
748     * that will be discarded.
749     * </p>
750     *
751     * @param emptyBitsMask The mask of the lower bits that should be empty.
752     * @param context       the context to be used.
753     * @throws IllegalArgumentException if the bits being checked contain any non-zero value.
754     */
755    private void validateCharacter(final long emptyBitsMask, final Context context) {
756        // Use the long bit work area
757        if (isStrictDecoding() && (context.lbitWorkArea & emptyBitsMask) != 0) {
758            throw new IllegalArgumentException("Strict decoding: Last encoded character (before the paddings if any) is a valid " +
759                    "Base32 alphabet but not a possible encoding. Expected the discarded bits from the character to be zero.");
760        }
761    }
762
763    /**
764     * Validates whether decoding allows final trailing characters that cannot be created during encoding.
765     *
766     * @throws IllegalArgumentException if strict decoding is enabled.
767     */
768    private void validateTrailingCharacters() {
769        if (isStrictDecoding()) {
770            throw new IllegalArgumentException("Strict decoding: Last encoded character(s) (before the paddings if any) are valid " +
771                    "Base32 alphabet but not a possible encoding. Decoding requires either 2, 4, 5, or 7 trailing 5-bit characters to create bytes.");
772        }
773    }
774}