001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import java.util.Objects;
021
022import org.apache.commons.codec.CodecPolicy;
023
024/**
025 * Provides Base32 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>.
026 *
027 * <p>
028 * The class can be parameterized in the following manner with various constructors:
029 * </p>
030 * <ul>
031 * <li>Whether to use the "base32hex" variant instead of the default "base32"</li>
032 * <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
033 * <li>Line separator: Default is CRLF ("\r\n")</li>
034 * </ul>
035 * <p>
036 * This class operates directly on byte streams, and not character streams.
037 * </p>
038 * <p>
039 * This class is thread-safe.
040 * </p>
041 * <p>
042 * You can configure instances with the {@link Builder}.
043 * </p>
044 * <pre>
045 * Base32 base32 = Base32.builder()
046 *   .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient
047 *   .setEncodeTable(customEncodeTable)
048 *   .setLineLength(0)                          // default is none
049 *   .setLineSeparator('\r', '\n')              // default is CR LF
050 *   .setPadding('=')                           // default is =
051 *   .get()
052 * </pre>
053 *
054 * @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>
055 * @since 1.5
056 */
057public class Base32 extends BaseNCodec {
058
059    /**
060     * Builds {@link Base32} instances.
061     *
062     * @since 1.17.0
063     */
064    public static class Builder extends AbstractBuilder<Base32, Builder> {
065
066        /**
067         * Constructs a new instance.
068         */
069        public Builder() {
070            super(ENCODE_TABLE);
071        }
072
073        @Override
074        public Base32 get() {
075            return new Base32(getLineLength(), getLineSeparator(), getEncodeTable(), getPadding(), getDecodingPolicy());
076        }
077
078        /**
079         * Sets the decode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
080         * <p>
081         * This overrides a value previously set with {@link #setEncodeTable(byte...)}.
082         * </p>
083         *
084         * @param useHex use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
085         * @return this instance.
086         * @since 1.18.0
087         */
088        public Builder setHexDecodeTable(final boolean useHex) {
089            return setEncodeTable(decodeTable(useHex));
090        }
091
092        /**
093         * Sets the encode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
094         * <p>
095         * This overrides a value previously set with {@link #setEncodeTable(byte...)}.
096         * </p>
097         *
098         * @param useHex use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
099         * @return this instance.
100         * @since 1.18.0
101         */
102        public Builder setHexEncodeTable(final boolean useHex) {
103            return setEncodeTable(encodeTable(useHex));
104        }
105    }
106
107    /**
108     * BASE32 characters are 5 bits in length. They are formed by taking a block of five octets to form a 40-bit string, which is converted into eight BASE32
109     * characters.
110     */
111    private static final int BITS_PER_ENCODED_BYTE = 5;
112
113    private static final int BYTES_PER_ENCODED_BLOCK = 8;
114    private static final int BYTES_PER_UNENCODED_BLOCK = 5;
115    /**
116     * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified in Table 3 of RFC 4648) into their 5-bit
117     * positive integer equivalents. Characters that are not in the Base32 alphabet but fall within the bounds of the array are translated to -1.
118     */
119    // @formatter:off
120    private static final byte[] DECODE_TABLE = {
121         //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
122            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
123            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
124            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
125            -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
126            -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 40-4f A-O
127            15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 50-5a P-Z
128                                                        -1, -1, -1, -1, -1, // 5b-5f
129            -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 60-6f a-o
130            15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 70-7a p-z
131    };
132    // @formatter:on
133
134    /**
135     * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet" equivalents as specified in Table 3 of RFC
136     * 4648.
137     */
138    // @formatter:off
139    private static final byte[] ENCODE_TABLE = {
140            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
141            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
142            '2', '3', '4', '5', '6', '7',
143    };
144    // @formatter:on
145
146    /**
147     * This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as specified in Table 4 of RFC 4648) into their
148     * 5-bit positive integer equivalents. Characters that are not in the Base32 Hex alphabet but fall within the bounds of the array are translated to -1.
149     */
150    // @formatter:off
151    private static final byte[] HEX_DECODE_TABLE = {
152         //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
153            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
154            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
155            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
156             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
157            -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O
158            25, 26, 27, 28, 29, 30, 31,                                     // 50-56 P-V
159                                        -1, -1, -1, -1, -1, -1, -1, -1, -1, // 57-5f
160            -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60-6f a-o
161            25, 26, 27, 28, 29, 30, 31                                      // 70-76 p-v
162    };
163    // @formatter:on
164
165    /**
166     * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Hex Alphabet" equivalents as specified in Table 4 of
167     * RFC 4648.
168     */
169    // @formatter:off
170    private static final byte[] HEX_ENCODE_TABLE = {
171            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
172            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
173            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
174    };
175    // @formatter:on
176
177    /** Mask used to extract 5 bits, used when encoding Base32 bytes */
178    private static final int MASK_5_BITS = 0x1f;
179
180    /** Mask used to extract 4 bits, used when decoding final trailing character. */
181    private static final long MASK_4_BITS = 0x0fL;
182
183    /** Mask used to extract 3 bits, used when decoding final trailing character. */
184    private static final long MASK_3_BITS = 0x07L;
185
186    /** Mask used to extract 2 bits, used when decoding final trailing character. */
187    private static final long MASK_2_BITS = 0x03L;
188
189    /** Mask used to extract 1 bits, used when decoding final trailing character. */
190    private static final long MASK_1_BITS = 0x01L;
191
192    // The static final fields above are used for the original static byte[] methods on Base32.
193    // The private member fields below are used with the new streaming approach, which requires
194    // some state be preserved between calls of encode() and decode().
195
196    /**
197     * Creates a new Builder.
198     *
199     * @return a new Builder.
200     * @since 1.17.0
201     */
202    public static Builder builder() {
203        return new Builder();
204    }
205
206    private static byte[] decodeTable(final boolean useHex) {
207        return useHex ? HEX_DECODE_TABLE : DECODE_TABLE;
208    }
209
210    private static byte[] encodeTable(final boolean useHex) {
211        return useHex ? HEX_ENCODE_TABLE : ENCODE_TABLE;
212    }
213
214    /**
215     * Decode table to use.
216     */
217    private final byte[] decodeTable;
218
219    /**
220     * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. {@code encodeSize = {@link
221     * #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;}
222     */
223    private final int encodeSize;
224
225    /**
226     * Encode table to use.
227     */
228    private final byte[] encodeTable;
229
230    /**
231     * Line separator for encoding. Not used when decoding. Only used if lineLength &gt; 0.
232     */
233    private final byte[] lineSeparator;
234
235    /**
236     * Constructs a Base32 codec used for decoding and encoding.
237     * <p>
238     * When encoding the line length is 0 (no chunking).
239     * </p>
240     */
241    public Base32() {
242        this(false);
243    }
244
245    /**
246     * Constructs a Base32 codec used for decoding and encoding.
247     * <p>
248     * When encoding the line length is 0 (no chunking).
249     * </p>
250     *
251     * @param useHex if {@code true} then use Base32 Hex alphabet
252     */
253    public Base32(final boolean useHex) {
254        this(0, null, useHex, PAD_DEFAULT);
255    }
256
257    /**
258     * Constructs a Base32 codec used for decoding and encoding.
259     * <p>
260     * When encoding the line length is 0 (no chunking).
261     * </p>
262     *
263     * @param useHex  if {@code true} then use Base32 Hex alphabet
264     * @param padding byte used as padding byte.
265     */
266    public Base32(final boolean useHex, final byte padding) {
267        this(0, null, useHex, padding);
268    }
269
270    /**
271     * Constructs a Base32 codec used for decoding and encoding.
272     * <p>
273     * When encoding the line length is 0 (no chunking).
274     * </p>
275     *
276     * @param pad byte used as padding byte.
277     */
278    public Base32(final byte pad) {
279        this(false, pad);
280    }
281
282    /**
283     * Constructs a Base32 codec used for decoding and encoding.
284     * <p>
285     * When encoding the line length is given in the constructor, the line separator is CRLF.
286     * </p>
287     *
288     * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0, then
289     *                   the output will not be divided into lines (chunks). Ignored when decoding.
290     */
291    public Base32(final int lineLength) {
292        this(lineLength, CHUNK_SEPARATOR);
293    }
294
295    /**
296     * Constructs a Base32 codec used for decoding and encoding.
297     * <p>
298     * When encoding the line length and line separator are given in the constructor.
299     * </p>
300     * <p>
301     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
302     * </p>
303     *
304     * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
305     *                      then the output will not be divided into lines (chunks). Ignored when decoding.
306     * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
307     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters.
308     */
309    public Base32(final int lineLength, final byte[] lineSeparator) {
310        this(lineLength, lineSeparator, false, PAD_DEFAULT);
311    }
312
313    /**
314     * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
315     * <p>
316     * When encoding the line length and line separator are given in the constructor.
317     * </p>
318     * <p>
319     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
320     * </p>
321     *
322     * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
323     *                      then the output will not be divided into lines (chunks). Ignored when decoding.
324     * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
325     * @param useHex        if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
326     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
327     */
328    public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex) {
329        this(lineLength, lineSeparator, useHex, PAD_DEFAULT);
330    }
331
332    /**
333     * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
334     * <p>
335     * When encoding the line length and line separator are given in the constructor.
336     * </p>
337     * <p>
338     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
339     * </p>
340     *
341     * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
342     *                      then the output will not be divided into lines (chunks). Ignored when decoding.
343     * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
344     * @param useHex        if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
345     * @param padding       padding byte.
346     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
347     */
348    public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding) {
349        this(lineLength, lineSeparator, useHex, padding, DECODING_POLICY_DEFAULT);
350    }
351
352    /**
353     * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
354     * <p>
355     * When encoding the line length and line separator are given in the constructor.
356     * </p>
357     * <p>
358     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
359     * </p>
360     *
361     * @param lineLength     Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
362     *                       then the output will not be divided into lines (chunks). Ignored when decoding.
363     * @param lineSeparator  Each line of encoded data will end with this sequence of bytes.
364     * @param useHex         use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
365     * @param padding        padding byte.
366     * @param decodingPolicy The decoding policy.
367     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
368     * @since 1.15
369     */
370    public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding, final CodecPolicy decodingPolicy) {
371        this(lineLength, lineSeparator, encodeTable(useHex), padding, decodingPolicy);
372    }
373
374    /**
375     * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
376     * <p>
377     * When encoding the line length and line separator are given in the constructor.
378     * </p>
379     * <p>
380     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
381     * </p>
382     *
383     * @param lineLength     Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
384     *                       then the output will not be divided into lines (chunks). Ignored when decoding.
385     * @param lineSeparator  Each line of encoded data will end with this sequence of bytes.
386     * @param encodeTable    A Base32 alphabet.
387     * @param padding        padding byte.
388     * @param decodingPolicy The decoding policy.
389     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
390     */
391    private Base32(final int lineLength, final byte[] lineSeparator, final byte[] encodeTable, final byte padding, final CodecPolicy decodingPolicy) {
392        super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, toLength(lineSeparator), padding, decodingPolicy);
393        Objects.requireNonNull(encodeTable, "encodeTable");
394        this.encodeTable = encodeTable;
395        this.decodeTable = encodeTable == HEX_ENCODE_TABLE ? HEX_DECODE_TABLE : DECODE_TABLE;
396        if (lineLength > 0) {
397            if (lineSeparator == null) {
398                throw new IllegalArgumentException("lineLength " + lineLength + " > 0, but lineSeparator is null");
399            }
400            final byte[] lineSeparatorCopy = lineSeparator.clone();
401            // Must be done after initializing the tables
402            if (containsAlphabetOrPad(lineSeparatorCopy)) {
403                final String sep = StringUtils.newStringUtf8(lineSeparatorCopy);
404                throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]");
405            }
406            this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparatorCopy.length;
407            this.lineSeparator = lineSeparatorCopy;
408        } else {
409            this.encodeSize = BYTES_PER_ENCODED_BLOCK;
410            this.lineSeparator = null;
411        }
412        if (isInAlphabet(padding) || Character.isWhitespace(padding)) {
413            throw new IllegalArgumentException("pad must not be in alphabet or whitespace");
414        }
415    }
416
417    /**
418     * <p>
419     * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once with the data to decode, and once with
420     * inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" call is not necessary when decoding, but it doesn't hurt, either.
421     * </p>
422     * <p>
423     * Ignores all non-Base32 characters. This is how chunked (for example 76 character) data is handled, since CR and LF are silently ignored, but has implications
424     * for other bytes, too. This method subscribes to the garbage-in, garbage-out philosophy: it will not check the provided data for validity.
425     * </p>
426     * <p>
427     * Output is written to {@link org.apache.commons.codec.binary.BaseNCodec.Context#buffer Context#buffer} as 8-bit octets, using
428     * {@link org.apache.commons.codec.binary.BaseNCodec.Context#pos Context#pos} as the buffer position
429     * </p>
430     *
431     * @param input   byte[] array of ASCII data to Base32 decode.
432     * @param inPos   Position to start reading data from.
433     * @param inAvail Amount of bytes available from input for decoding.
434     * @param context the context to be used
435     */
436    @Override
437    void decode(final byte[] input, int inPos, final int inAvail, final Context context) {
438        // package protected for access from I/O streams
439        if (context.eof) {
440            return;
441        }
442        if (inAvail < 0) {
443            context.eof = true;
444        }
445        final int decodeSize = this.encodeSize - 1;
446        for (int i = 0; i < inAvail; i++) {
447            final byte b = input[inPos++];
448            if (b == pad) {
449                // We're done.
450                context.eof = true;
451                break;
452            }
453            final byte[] buffer = ensureBufferSize(decodeSize, context);
454            if (b >= 0 && b < this.decodeTable.length) {
455                final int result = this.decodeTable[b];
456                if (result >= 0) {
457                    context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK;
458                    // collect decoded bytes
459                    context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result;
460                    if (context.modulus == 0) { // we can output the 5 bytes
461                        buffer[context.pos++] = (byte) (context.lbitWorkArea >> 32 & MASK_8BITS);
462                        buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS);
463                        buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
464                        buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
465                        buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
466                    }
467                }
468            }
469        }
470        // Two forms of EOF as far as Base32 decoder is concerned: actual
471        // EOF (-1) and first time '=' character is encountered in stream.
472        // This approach makes the '=' padding characters completely optional.
473        if (context.eof && context.modulus > 0) { // if modulus == 0, nothing to do
474            final byte[] buffer = ensureBufferSize(decodeSize, context);
475            // We ignore partial bytes, i.e. only multiples of 8 count.
476            // Any combination not part of a valid encoding is either partially decoded
477            // or will raise an exception. Possible trailing characters are 2, 4, 5, 7.
478            // It is not possible to encode with 1, 3, 6 trailing characters.
479            // For backwards compatibility 3 & 6 chars are decoded anyway rather than discarded.
480            // See the encode(byte[]) method EOF section.
481            switch (context.modulus) {
482//              case 0 : // impossible, as excluded above
483            case 1: // 5 bits - either ignore entirely, or raise an exception
484                validateTrailingCharacters();
485                // falls-through
486            case 2: // 10 bits, drop 2 and output one byte
487                validateCharacter(MASK_2_BITS, context);
488                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 2 & MASK_8BITS);
489                break;
490            case 3: // 15 bits, drop 7 and output 1 byte, or raise an exception
491                validateTrailingCharacters();
492                // Not possible from a valid encoding but decode anyway
493                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 7 & MASK_8BITS);
494                break;
495            case 4: // 20 bits = 2*8 + 4
496                validateCharacter(MASK_4_BITS, context);
497                context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits
498                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
499                buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
500                break;
501            case 5: // 25 bits = 3*8 + 1
502                validateCharacter(MASK_1_BITS, context);
503                context.lbitWorkArea = context.lbitWorkArea >> 1;
504                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
505                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
506                buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
507                break;
508            case 6: // 30 bits = 3*8 + 6, or raise an exception
509                validateTrailingCharacters();
510                // Not possible from a valid encoding but decode anyway
511                context.lbitWorkArea = context.lbitWorkArea >> 6;
512                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
513                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
514                buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
515                break;
516            case 7: // 35 bits = 4*8 +3
517                validateCharacter(MASK_3_BITS, context);
518                context.lbitWorkArea = context.lbitWorkArea >> 3;
519                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS);
520                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
521                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
522                buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
523                break;
524            default:
525                // modulus can be 0-7, and we excluded 0,1 already
526                throw new IllegalStateException("Impossible modulus " + context.modulus);
527            }
528        }
529    }
530
531    /**
532     * <p>
533     * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with the data to encode, and once with
534     * inAvail set to "-1" to alert encoder that EOF has been reached, so flush last remaining bytes (if not multiple of 5).
535     * </p>
536     *
537     * @param input   byte[] array of binary data to Base32 encode.
538     * @param inPos   Position to start reading data from.
539     * @param inAvail Amount of bytes available from input for encoding.
540     * @param context the context to be used
541     */
542    @Override
543    void encode(final byte[] input, int inPos, final int inAvail, final Context context) {
544        // package protected for access from I/O streams
545        if (context.eof) {
546            return;
547        }
548        // inAvail < 0 is how we're informed of EOF in the underlying data we're
549        // encoding.
550        if (inAvail < 0) {
551            context.eof = true;
552            if (0 == context.modulus && lineLength == 0) {
553                return; // no leftovers to process and not using chunking
554            }
555            final byte[] buffer = ensureBufferSize(encodeSize, context);
556            final int savedPos = context.pos;
557            switch (context.modulus) { // % 5
558            case 0:
559                break;
560            case 1: // Only 1 octet; take top 5 bits then remainder
561                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 3) & MASK_5_BITS]; // 8-1*5 = 3
562                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 2) & MASK_5_BITS]; // 5-3=2
563                buffer[context.pos++] = pad;
564                buffer[context.pos++] = pad;
565                buffer[context.pos++] = pad;
566                buffer[context.pos++] = pad;
567                buffer[context.pos++] = pad;
568                buffer[context.pos++] = pad;
569                break;
570            case 2: // 2 octets = 16 bits to use
571                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 11) & MASK_5_BITS]; // 16-1*5 = 11
572                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 6) & MASK_5_BITS]; // 16-2*5 = 6
573                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 1) & MASK_5_BITS]; // 16-3*5 = 1
574                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 4) & MASK_5_BITS]; // 5-1 = 4
575                buffer[context.pos++] = pad;
576                buffer[context.pos++] = pad;
577                buffer[context.pos++] = pad;
578                buffer[context.pos++] = pad;
579                break;
580            case 3: // 3 octets = 24 bits to use
581                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 19) & MASK_5_BITS]; // 24-1*5 = 19
582                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 14) & MASK_5_BITS]; // 24-2*5 = 14
583                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 9) & MASK_5_BITS]; // 24-3*5 = 9
584                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 4) & MASK_5_BITS]; // 24-4*5 = 4
585                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 1) & MASK_5_BITS]; // 5-4 = 1
586                buffer[context.pos++] = pad;
587                buffer[context.pos++] = pad;
588                buffer[context.pos++] = pad;
589                break;
590            case 4: // 4 octets = 32 bits to use
591                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 27) & MASK_5_BITS]; // 32-1*5 = 27
592                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 22) & MASK_5_BITS]; // 32-2*5 = 22
593                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 17) & MASK_5_BITS]; // 32-3*5 = 17
594                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 12) & MASK_5_BITS]; // 32-4*5 = 12
595                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 7) & MASK_5_BITS]; // 32-5*5 = 7
596                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 2) & MASK_5_BITS]; // 32-6*5 = 2
597                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 3) & MASK_5_BITS]; // 5-2 = 3
598                buffer[context.pos++] = pad;
599                break;
600            default:
601                throw new IllegalStateException("Impossible modulus " + context.modulus);
602            }
603            context.currentLinePos += context.pos - savedPos; // keep track of current line position
604            // if currentPos == 0 we are at the start of a line, so don't add CRLF
605            if (lineLength > 0 && context.currentLinePos > 0) { // add chunk separator if required
606                System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
607                context.pos += lineSeparator.length;
608            }
609        } else {
610            for (int i = 0; i < inAvail; i++) {
611                final byte[] buffer = ensureBufferSize(encodeSize, context);
612                context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK;
613                int b = input[inPos++];
614                if (b < 0) {
615                    b += 256;
616                }
617                context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE
618                if (0 == context.modulus) { // we have enough bytes to create our output
619                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 35) & MASK_5_BITS];
620                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 30) & MASK_5_BITS];
621                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 25) & MASK_5_BITS];
622                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 20) & MASK_5_BITS];
623                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 15) & MASK_5_BITS];
624                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 10) & MASK_5_BITS];
625                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 5) & MASK_5_BITS];
626                    buffer[context.pos++] = encodeTable[(int) context.lbitWorkArea & MASK_5_BITS];
627                    context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
628                    if (lineLength > 0 && lineLength <= context.currentLinePos) {
629                        System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
630                        context.pos += lineSeparator.length;
631                        context.currentLinePos = 0;
632                    }
633                }
634            }
635        }
636    }
637
638    /**
639     * Gets the line separator (for testing only).
640     *
641     * @return the line separator.
642     */
643    byte[] getLineSeparator() {
644        return lineSeparator;
645    }
646
647    /**
648     * Returns whether or not the {@code octet} is in the Base32 alphabet.
649     *
650     * @param octet The value to test
651     * @return {@code true} if the value is defined in the Base32 alphabet {@code false} otherwise.
652     */
653    @Override
654    public boolean isInAlphabet(final byte octet) {
655        return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
656    }
657
658    /**
659     * Validates whether decoding the final trailing character is possible in the context of the set of possible base 32 values.
660     * <p>
661     * The character is valid if the lower bits within the provided mask are zero. This is used to test the final trailing base-32 digit is zero in the bits
662     * that will be discarded.
663     * </p>
664     *
665     * @param emptyBitsMask The mask of the lower bits that should be empty
666     * @param context       the context to be used
667     * @throws IllegalArgumentException if the bits being checked contain any non-zero value
668     */
669    private void validateCharacter(final long emptyBitsMask, final Context context) {
670        // Use the long bit work area
671        if (isStrictDecoding() && (context.lbitWorkArea & emptyBitsMask) != 0) {
672            throw new IllegalArgumentException("Strict decoding: Last encoded character (before the paddings if any) is a valid " +
673                    "base 32 alphabet but not a possible encoding. Expected the discarded bits from the character to be zero.");
674        }
675    }
676
677    /**
678     * Validates whether decoding allows final trailing characters that cannot be created during encoding.
679     *
680     * @throws IllegalArgumentException if strict decoding is enabled
681     */
682    private void validateTrailingCharacters() {
683        if (isStrictDecoding()) {
684            throw new IllegalArgumentException("Strict decoding: Last encoded character(s) (before the paddings if any) are valid " +
685                    "base 32 alphabet but not a possible encoding. Decoding requires either 2, 4, 5, or 7 trailing 5-bit characters to create bytes.");
686        }
687    }
688}