Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import java.util.Objects;
021
022import org.apache.commons.codec.CodecPolicy;
023
024/**
025 * Provides Base32 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>.
026 *
027 * <p>
028 * The class can be parameterized in the following manner with various constructors:
029 * </p>
030 * <ul>
031 * <li>Whether to use the "base32hex" variant instead of the default "base32"</li>
032 * <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
033 * <li>Line separator: Default is CRLF ("\r\n")</li>
034 * </ul>
035 * <p>
036 * This class operates directly on byte streams, and not character streams.
037 * </p>
038 * <p>
039 * This class is thread-safe.
040 * </p>
041 * <p>
042 * You can configure instances with the {@link Builder}.
043 * </p>
044 * <pre>
045 * Base32 base32 = Base32.builder()
046 *   .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient
047 *   .setEncodeTable(customEncodeTable)
048 *   .setLineLength(0)                          // default is none
049 *   .setLineSeparator('\r', '\n')              // default is CR LF
050 *   .setPadding('=')                           // default is =
051 *   .get()
052 * </pre>
053 *
054 * @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>
055 * @since 1.5
056 */
057public class Base32 extends BaseNCodec {
058
059    /**
060     * Builds {@link Base32} instances.
061     *
062     * @since 1.17.0
063     */
064    public static class Builder extends AbstractBuilder<Base32, Builder> {
065
066        /**
067         * Constructs a new instance.
068         */
069        public Builder() {
070            super(ENCODE_TABLE);
071        }
072
073        @Override
074        public Base32 get() {
075            return new Base32(getLineLength(), getLineSeparator(), getEncodeTable(), getPadding(), getDecodingPolicy());
076        }
077
078    }
079
080    /**
081     * BASE32 characters are 5 bits in length. They are formed by taking a block of five octets to form a 40-bit string, which is converted into eight BASE32
082     * characters.
083     */
084    private static final int BITS_PER_ENCODED_BYTE = 5;
085
086    private static final int BYTES_PER_ENCODED_BLOCK = 8;
087    private static final int BYTES_PER_UNENCODED_BLOCK = 5;
088    /**
089     * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified in Table 3 of RFC 4648) into their 5-bit
090     * positive integer equivalents. Characters that are not in the Base32 alphabet but fall within the bounds of the array are translated to -1.
091     */
092    // @formatter:off
093    private static final byte[] DECODE_TABLE = {
094         //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
095            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
096            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
097            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
098            -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
099            -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 40-4f A-O
100            15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 50-5a P-Z
101                                                        -1, -1, -1, -1, -1, // 5b-5f
102            -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 60-6f a-o
103            15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 70-7a p-z
104    };
105    // @formatter:on
106
107    /**
108     * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet" equivalents as specified in Table 3 of RFC
109     * 4648.
110     */
111    // @formatter:off
112    private static final byte[] ENCODE_TABLE = {
113            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
114            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
115            '2', '3', '4', '5', '6', '7',
116    };
117    // @formatter:on
118
119    /**
120     * This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as specified in Table 4 of RFC 4648) into their
121     * 5-bit positive integer equivalents. Characters that are not in the Base32 Hex alphabet but fall within the bounds of the array are translated to -1.
122     */
123    // @formatter:off
124    private static final byte[] HEX_DECODE_TABLE = {
125         //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
126            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
127            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
128            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
129             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
130            -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O
131            25, 26, 27, 28, 29, 30, 31,                                     // 50-56 P-V
132                                        -1, -1, -1, -1, -1, -1, -1, -1, -1, // 57-5f
133            -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60-6f a-o
134            25, 26, 27, 28, 29, 30, 31                                      // 70-76 p-v
135    };
136    // @formatter:on
137
138    /**
139     * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Hex Alphabet" equivalents as specified in Table 4 of
140     * RFC 4648.
141     */
142    // @formatter:off
143    private static final byte[] HEX_ENCODE_TABLE = {
144            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
145            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
146            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
147    };
148    // @formatter:on
149
150    /** Mask used to extract 5 bits, used when encoding Base32 bytes */
151    private static final int MASK_5BITS = 0x1f;
152
153    /** Mask used to extract 4 bits, used when decoding final trailing character. */
154    private static final long MASK_4BITS = 0x0fL;
155
156    /** Mask used to extract 3 bits, used when decoding final trailing character. */
157    private static final long MASK_3BITS = 0x07L;
158
159    /** Mask used to extract 2 bits, used when decoding final trailing character. */
160    private static final long MASK_2BITS = 0x03L;
161
162    /** Mask used to extract 1 bits, used when decoding final trailing character. */
163    private static final long MASK_1BITS = 0x01L;
164
165    /**
166     * Creates a new Builder.
167     *
168     * @return a new Builder.
169     * @since 1.17.0
170     */
171    public static Builder builder() {
172        return new Builder();
173    }
174
175    // The static final fields above are used for the original static byte[] methods on Base32.
176    // The private member fields below are used with the new streaming approach, which requires
177    // some state be preserved between calls of encode() and decode().
178
179    /**
180     * Decode table to use.
181     */
182    private final byte[] decodeTable;
183
184    /**
185     * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. {@code encodeSize = {@link
186     * #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;}
187     */
188    private final int encodeSize;
189
190    /**
191     * Encode table to use.
192     */
193    private final byte[] encodeTable;
194
195    /**
196     * Line separator for encoding. Not used when decoding. Only used if lineLength &gt; 0.
197     */
198    private final byte[] lineSeparator;
199
200    /**
201     * Constructs a Base32 codec used for decoding and encoding.
202     * <p>
203     * When encoding the line length is 0 (no chunking).
204     * </p>
205     */
206    public Base32() {
207        this(false);
208    }
209
210    /**
211     * Constructs a Base32 codec used for decoding and encoding.
212     * <p>
213     * When encoding the line length is 0 (no chunking).
214     * </p>
215     *
216     * @param useHex if {@code true} then use Base32 Hex alphabet
217     */
218    public Base32(final boolean useHex) {
219        this(0, null, useHex, PAD_DEFAULT);
220    }
221
222    /**
223     * Constructs a Base32 codec used for decoding and encoding.
224     * <p>
225     * When encoding the line length is 0 (no chunking).
226     * </p>
227     *
228     * @param useHex  if {@code true} then use Base32 Hex alphabet
229     * @param padding byte used as padding byte.
230     */
231    public Base32(final boolean useHex, final byte padding) {
232        this(0, null, useHex, padding);
233    }
234
235    /**
236     * Constructs a Base32 codec used for decoding and encoding.
237     * <p>
238     * When encoding the line length is 0 (no chunking).
239     * </p>
240     *
241     * @param pad byte used as padding byte.
242     */
243    public Base32(final byte pad) {
244        this(false, pad);
245    }
246
247    /**
248     * Constructs a Base32 codec used for decoding and encoding.
249     * <p>
250     * When encoding the line length is given in the constructor, the line separator is CRLF.
251     * </p>
252     *
253     * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0, then
254     *                   the output will not be divided into lines (chunks). Ignored when decoding.
255     */
256    public Base32(final int lineLength) {
257        this(lineLength, CHUNK_SEPARATOR);
258    }
259
260    /**
261     * Constructs a Base32 codec used for decoding and encoding.
262     * <p>
263     * When encoding the line length and line separator are given in the constructor.
264     * </p>
265     * <p>
266     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
267     * </p>
268     *
269     * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
270     *                      then the output will not be divided into lines (chunks). Ignored when decoding.
271     * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
272     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters.
273     */
274    public Base32(final int lineLength, final byte[] lineSeparator) {
275        this(lineLength, lineSeparator, false, PAD_DEFAULT);
276    }
277
278    /**
279     * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
280     * <p>
281     * When encoding the line length and line separator are given in the constructor.
282     * </p>
283     * <p>
284     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
285     * </p>
286     *
287     * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
288     *                      then the output will not be divided into lines (chunks). Ignored when decoding.
289     * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
290     * @param useHex        if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
291     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
292     */
293    public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex) {
294        this(lineLength, lineSeparator, useHex, PAD_DEFAULT);
295    }
296
297    /**
298     * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
299     * <p>
300     * When encoding the line length and line separator are given in the constructor.
301     * </p>
302     * <p>
303     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
304     * </p>
305     *
306     * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
307     *                      then the output will not be divided into lines (chunks). Ignored when decoding.
308     * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
309     * @param useHex        if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
310     * @param padding       padding byte.
311     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
312     */
313    public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding) {
314        this(lineLength, lineSeparator, useHex, padding, DECODING_POLICY_DEFAULT);
315    }
316
317    /**
318     * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
319     * <p>
320     * When encoding the line length and line separator are given in the constructor.
321     * </p>
322     * <p>
323     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
324     * </p>
325     *
326     * @param lineLength     Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
327     *                       then the output will not be divided into lines (chunks). Ignored when decoding.
328     * @param lineSeparator  Each line of encoded data will end with this sequence of bytes.
329     * @param useHex         if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
330     * @param padding        padding byte.
331     * @param decodingPolicy The decoding policy.
332     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
333     * @since 1.15
334     */
335    public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding, final CodecPolicy decodingPolicy) {
336        this(lineLength, lineSeparator, useHex ? HEX_ENCODE_TABLE : ENCODE_TABLE, padding, decodingPolicy);
337    }
338
339    /**
340     * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
341     * <p>
342     * When encoding the line length and line separator are given in the constructor.
343     * </p>
344     * <p>
345     * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
346     * </p>
347     *
348     * @param lineLength     Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
349     *                       then the output will not be divided into lines (chunks). Ignored when decoding.
350     * @param lineSeparator  Each line of encoded data will end with this sequence of bytes.
351     * @param encodeTable    A Base32 alphabet.
352     * @param padding        padding byte.
353     * @param decodingPolicy The decoding policy.
354     * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
355     */
356    private Base32(final int lineLength, final byte[] lineSeparator, final byte[] encodeTable, final byte padding, final CodecPolicy decodingPolicy) {
357        super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, toLength(lineSeparator), padding, decodingPolicy);
358        Objects.requireNonNull(encodeTable, "encodeTable");
359        this.encodeTable = encodeTable;
360        this.decodeTable = encodeTable == HEX_ENCODE_TABLE ? HEX_DECODE_TABLE : DECODE_TABLE;
361        if (lineLength > 0) {
362            if (lineSeparator == null) {
363                throw new IllegalArgumentException("lineLength " + lineLength + " > 0, but lineSeparator is null");
364            }
365            final byte[] lineSeparatorCopy = lineSeparator.clone();
366            // Must be done after initializing the tables
367            if (containsAlphabetOrPad(lineSeparatorCopy)) {
368                final String sep = StringUtils.newStringUtf8(lineSeparatorCopy);
369                throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]");
370            }
371            this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparatorCopy.length;
372            this.lineSeparator = lineSeparatorCopy;
373        } else {
374            this.encodeSize = BYTES_PER_ENCODED_BLOCK;
375            this.lineSeparator = null;
376        }
377        if (isInAlphabet(padding) || Character.isWhitespace(padding)) {
378            throw new IllegalArgumentException("pad must not be in alphabet or whitespace");
379        }
380    }
381
382    /**
383     * <p>
384     * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once with the data to decode, and once with
385     * inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" call is not necessary when decoding, but it doesn't hurt, either.
386     * </p>
387     * <p>
388     * Ignores all non-Base32 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are silently ignored, but has implications
389     * for other bytes, too. This method subscribes to the garbage-in, garbage-out philosophy: it will not check the provided data for validity.
390     * </p>
391     * <p>
392     * Output is written to {@link org.apache.commons.codec.binary.BaseNCodec.Context#buffer Context#buffer} as 8-bit octets, using
393     * {@link org.apache.commons.codec.binary.BaseNCodec.Context#pos Context#pos} as the buffer position
394     * </p>
395     *
396     * @param input   byte[] array of ASCII data to Base32 decode.
397     * @param inPos   Position to start reading data from.
398     * @param inAvail Amount of bytes available from input for decoding.
399     * @param context the context to be used
400     */
401    @Override
402    void decode(final byte[] input, int inPos, final int inAvail, final Context context) {
403        // package protected for access from I/O streams
404        if (context.eof) {
405            return;
406        }
407        if (inAvail < 0) {
408            context.eof = true;
409        }
410        final int decodeSize = this.encodeSize - 1;
411        for (int i = 0; i < inAvail; i++) {
412            final byte b = input[inPos++];
413            if (b == pad) {
414                // We're done.
415                context.eof = true;
416                break;
417            }
418            final byte[] buffer = ensureBufferSize(decodeSize, context);
419            if (b >= 0 && b < this.decodeTable.length) {
420                final int result = this.decodeTable[b];
421                if (result >= 0) {
422                    context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK;
423                    // collect decoded bytes
424                    context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result;
425                    if (context.modulus == 0) { // we can output the 5 bytes
426                        buffer[context.pos++] = (byte) (context.lbitWorkArea >> 32 & MASK_8BITS);
427                        buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS);
428                        buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
429                        buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
430                        buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
431                    }
432                }
433            }
434        }
435        // Two forms of EOF as far as Base32 decoder is concerned: actual
436        // EOF (-1) and first time '=' character is encountered in stream.
437        // This approach makes the '=' padding characters completely optional.
438        if (context.eof && context.modulus > 0) { // if modulus == 0, nothing to do
439            final byte[] buffer = ensureBufferSize(decodeSize, context);
440            // We ignore partial bytes, i.e. only multiples of 8 count.
441            // Any combination not part of a valid encoding is either partially decoded
442            // or will raise an exception. Possible trailing characters are 2, 4, 5, 7.
443            // It is not possible to encode with 1, 3, 6 trailing characters.
444            // For backwards compatibility 3 & 6 chars are decoded anyway rather than discarded.
445            // See the encode(byte[]) method EOF section.
446            switch (context.modulus) {
447//              case 0 : // impossible, as excluded above
448            case 1: // 5 bits - either ignore entirely, or raise an exception
449                validateTrailingCharacters();
450            case 2: // 10 bits, drop 2 and output one byte
451                validateCharacter(MASK_2BITS, context);
452                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 2 & MASK_8BITS);
453                break;
454            case 3: // 15 bits, drop 7 and output 1 byte, or raise an exception
455                validateTrailingCharacters();
456                // Not possible from a valid encoding but decode anyway
457                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 7 & MASK_8BITS);
458                break;
459            case 4: // 20 bits = 2*8 + 4
460                validateCharacter(MASK_4BITS, context);
461                context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits
462                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
463                buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
464                break;
465            case 5: // 25 bits = 3*8 + 1
466                validateCharacter(MASK_1BITS, context);
467                context.lbitWorkArea = context.lbitWorkArea >> 1;
468                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
469                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
470                buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
471                break;
472            case 6: // 30 bits = 3*8 + 6, or raise an exception
473                validateTrailingCharacters();
474                // Not possible from a valid encoding but decode anyway
475                context.lbitWorkArea = context.lbitWorkArea >> 6;
476                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
477                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
478                buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
479                break;
480            case 7: // 35 bits = 4*8 +3
481                validateCharacter(MASK_3BITS, context);
482                context.lbitWorkArea = context.lbitWorkArea >> 3;
483                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS);
484                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
485                buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
486                buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
487                break;
488            default:
489                // modulus can be 0-7, and we excluded 0,1 already
490                throw new IllegalStateException("Impossible modulus " + context.modulus);
491            }
492        }
493    }
494
495    /**
496     * <p>
497     * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with the data to encode, and once with
498     * inAvail set to "-1" to alert encoder that EOF has been reached, so flush last remaining bytes (if not multiple of 5).
499     * </p>
500     *
501     * @param input   byte[] array of binary data to Base32 encode.
502     * @param inPos   Position to start reading data from.
503     * @param inAvail Amount of bytes available from input for encoding.
504     * @param context the context to be used
505     */
506    @Override
507    void encode(final byte[] input, int inPos, final int inAvail, final Context context) {
508        // package protected for access from I/O streams
509        if (context.eof) {
510            return;
511        }
512        // inAvail < 0 is how we're informed of EOF in the underlying data we're
513        // encoding.
514        if (inAvail < 0) {
515            context.eof = true;
516            if (0 == context.modulus && lineLength == 0) {
517                return; // no leftovers to process and not using chunking
518            }
519            final byte[] buffer = ensureBufferSize(encodeSize, context);
520            final int savedPos = context.pos;
521            switch (context.modulus) { // % 5
522            case 0:
523                break;
524            case 1: // Only 1 octet; take top 5 bits then remainder
525                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 3) & MASK_5BITS]; // 8-1*5 = 3
526                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 2) & MASK_5BITS]; // 5-3=2
527                buffer[context.pos++] = pad;
528                buffer[context.pos++] = pad;
529                buffer[context.pos++] = pad;
530                buffer[context.pos++] = pad;
531                buffer[context.pos++] = pad;
532                buffer[context.pos++] = pad;
533                break;
534            case 2: // 2 octets = 16 bits to use
535                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 11) & MASK_5BITS]; // 16-1*5 = 11
536                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 6) & MASK_5BITS]; // 16-2*5 = 6
537                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 1) & MASK_5BITS]; // 16-3*5 = 1
538                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 4) & MASK_5BITS]; // 5-1 = 4
539                buffer[context.pos++] = pad;
540                buffer[context.pos++] = pad;
541                buffer[context.pos++] = pad;
542                buffer[context.pos++] = pad;
543                break;
544            case 3: // 3 octets = 24 bits to use
545                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 19) & MASK_5BITS]; // 24-1*5 = 19
546                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 14) & MASK_5BITS]; // 24-2*5 = 14
547                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 9) & MASK_5BITS]; // 24-3*5 = 9
548                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 4) & MASK_5BITS]; // 24-4*5 = 4
549                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 1) & MASK_5BITS]; // 5-4 = 1
550                buffer[context.pos++] = pad;
551                buffer[context.pos++] = pad;
552                buffer[context.pos++] = pad;
553                break;
554            case 4: // 4 octets = 32 bits to use
555                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 27) & MASK_5BITS]; // 32-1*5 = 27
556                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 22) & MASK_5BITS]; // 32-2*5 = 22
557                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 17) & MASK_5BITS]; // 32-3*5 = 17
558                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 12) & MASK_5BITS]; // 32-4*5 = 12
559                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 7) & MASK_5BITS]; // 32-5*5 = 7
560                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 2) & MASK_5BITS]; // 32-6*5 = 2
561                buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 3) & MASK_5BITS]; // 5-2 = 3
562                buffer[context.pos++] = pad;
563                break;
564            default:
565                throw new IllegalStateException("Impossible modulus " + context.modulus);
566            }
567            context.currentLinePos += context.pos - savedPos; // keep track of current line position
568            // if currentPos == 0 we are at the start of a line, so don't add CRLF
569            if (lineLength > 0 && context.currentLinePos > 0) { // add chunk separator if required
570                System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
571                context.pos += lineSeparator.length;
572            }
573        } else {
574            for (int i = 0; i < inAvail; i++) {
575                final byte[] buffer = ensureBufferSize(encodeSize, context);
576                context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK;
577                int b = input[inPos++];
578                if (b < 0) {
579                    b += 256;
580                }
581                context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE
582                if (0 == context.modulus) { // we have enough bytes to create our output
583                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 35) & MASK_5BITS];
584                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 30) & MASK_5BITS];
585                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 25) & MASK_5BITS];
586                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 20) & MASK_5BITS];
587                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 15) & MASK_5BITS];
588                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 10) & MASK_5BITS];
589                    buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 5) & MASK_5BITS];
590                    buffer[context.pos++] = encodeTable[(int) context.lbitWorkArea & MASK_5BITS];
591                    context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
592                    if (lineLength > 0 && lineLength <= context.currentLinePos) {
593                        System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
594                        context.pos += lineSeparator.length;
595                        context.currentLinePos = 0;
596                    }
597                }
598            }
599        }
600    }
601
602    /**
603     * Gets the line separator (for testing only).
604     *
605     * @return the line separator.
606     */
607    byte[] getLineSeparator() {
608        return lineSeparator;
609    }
610
611    /**
612     * Returns whether or not the {@code octet} is in the Base32 alphabet.
613     *
614     * @param octet The value to test
615     * @return {@code true} if the value is defined in the Base32 alphabet {@code false} otherwise.
616     */
617    @Override
618    public boolean isInAlphabet(final byte octet) {
619        return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
620    }
621
622    /**
623     * Validates whether decoding the final trailing character is possible in the context of the set of possible base 32 values.
624     * <p>
625     * The character is valid if the lower bits within the provided mask are zero. This is used to test the final trailing base-32 digit is zero in the bits
626     * that will be discarded.
627     * </p>
628     *
629     * @param emptyBitsMask The mask of the lower bits that should be empty
630     * @param context       the context to be used
631     *
632     * @throws IllegalArgumentException if the bits being checked contain any non-zero value
633     */
634    private void validateCharacter(final long emptyBitsMask, final Context context) {
635        // Use the long bit work area
636        if (isStrictDecoding() && (context.lbitWorkArea & emptyBitsMask) != 0) {
637            throw new IllegalArgumentException("Strict decoding: Last encoded character (before the paddings if any) is a valid " +
638                    "base 32 alphabet but not a possible encoding. Expected the discarded bits from the character to be zero.");
639        }
640    }
641
642    /**
643     * Validates whether decoding allows final trailing characters that cannot be created during encoding.
644     *
645     * @throws IllegalArgumentException if strict decoding is enabled
646     */
647    private void validateTrailingCharacters() {
648        if (isStrictDecoding()) {
649            throw new IllegalArgumentException("Strict decoding: Last encoded character(s) (before the paddings if any) are valid " +
650                    "base 32 alphabet but not a possible encoding. Decoding requires either 2, 4, 5, or 7 trailing 5-bit characters to create bytes.");
651        }
652    }
653}