View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import java.util.Arrays;
21  
22  import org.apache.commons.codec.CodecPolicy;
23  
24  /**
25   * Provides Base32 encoding and decoding as defined by <a href="https://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>.
26   *
27   * <p>
28   * The class can be parameterized in the following manner with various constructors:
29   * </p>
30   * <ul>
31   * <li>Whether to use the "base32hex" variant instead of the default "base32"</li>
32   * <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
33   * <li>Line separator: Default is CRLF ("\r\n")</li>
34   * </ul>
35   * <p>
36   * This class operates directly on byte streams, and not character streams.
37   * </p>
38   * <p>
39   * This class is thread-safe.
40   * </p>
41   * <p>
42   * To configure a new instance, use a {@link Builder}. For example:
43   * </p>
44   * <pre>
45   * Base32 base32 = Base32.builder()
46   *   .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient
47   *   .setLineLength(0)                          // default is none
48   *   .setLineSeparator('\r', '\n')              // default is CR LF
49   *   .setPadding('=')                           // default is '='
50   *   .setEncodeTable(customEncodeTable)         // default is RFC 4648 Section 6, Table 3: The Base 32 Alphabet
51   *   .get()
52   * </pre>
53   *
54   * @see Base32InputStream
55   * @see Base32OutputStream
56   * @see <a href="https://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>
57   * @since 1.5
58   */
59  public class Base32 extends BaseNCodec {
60  
61      /**
62       * Builds {@link Base32} instances.
63       *
64       * <p>
65       * To configure a new instance, use a {@link Builder}. For example:
66       * </p>
67       *
68       * <pre>
69       * Base32 base32 = Base32.builder()
70       *   .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient
71       *   .setLineLength(0)                          // default is none
72       *   .setLineSeparator('\r', '\n')              // default is CR LF
73       *   .setPadding('=')                           // default is '='
74       *   .setEncodeTable(customEncodeTable)         // default is RFC 4648 Section 6, Table 3: The Base 32 Alphabet
75       *   .get()
76       * </pre>
77       *
78       * @since 1.17.0
79       */
80      public static class Builder extends AbstractBuilder<Base32, Builder> {
81  
82          /**
83           * Constructs a new instance using <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32
84           * Alphabet</a>.
85           */
86          public Builder() {
87              super(ENCODE_TABLE);
88              setDecodeTableRaw(DECODE_TABLE);
89              setEncodeTableRaw(ENCODE_TABLE);
90              setEncodedBlockSize(BYTES_PER_ENCODED_BLOCK);
91              setUnencodedBlockSize(BYTES_PER_UNENCODED_BLOCK);
92          }
93  
94          @Override
95          public Base32 get() {
96              return new Base32(this);
97          }
98  
99          @Override
100         public Builder setEncodeTable(final byte... encodeTable) {
101             super.setDecodeTableRaw(Arrays.equals(encodeTable, HEX_ENCODE_TABLE) ? HEX_DECODE_TABLE : DECODE_TABLE);
102             return super.setEncodeTable(encodeTable);
103         }
104 
105         /**
106          * Sets the decode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
107          * <p>
108          * This overrides a value previously set with {@link #setEncodeTable(byte...)}.
109          * </p>
110          *
111          * @param useHex use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
112          * @return {@code this} instance.
113          * @since 1.18.0
114          */
115         public Builder setHexDecodeTable(final boolean useHex) {
116             return setEncodeTable(decodeTable(useHex));
117         }
118 
119         /**
120          * Sets the encode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
121          * <p>
122          * This overrides a value previously set with {@link #setEncodeTable(byte...)}.
123          * </p>
124          *
125          * @param useHex
126          *               <ul>
127          *               <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding
128          *               with Extended Hex Alphabet</a></li>
129          *               <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32
130          *               Alphabet</a></li>
131          *               </ul>
132          * @return {@code this} instance.
133          * @since 1.18.0
134          */
135         public Builder setHexEncodeTable(final boolean useHex) {
136             return setEncodeTable(encodeTable(useHex));
137         }
138     }
139 
140     /**
141      * BASE32 characters are 5 bits in length. They are formed by taking a block of five octets to form a 40-bit string, which is converted into eight BASE32
142      * characters.
143      */
144     private static final int BITS_PER_ENCODED_BYTE = 5;
145 
146     private static final int BYTES_PER_ENCODED_BLOCK = 8;
147     private static final int BYTES_PER_UNENCODED_BLOCK = 5;
148 
149     /**
150      * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified in Table 3 of RFC 4648) into their 5-bit
151      * positive integer equivalents. Characters that are not in the Base32 alphabet but fall within the bounds of the array are translated to -1.
152      */
153     // @formatter:off
154     private static final byte[] DECODE_TABLE = {
155          //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
156             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
157             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
158             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
159             -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
160             -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 40-4f A-O
161             15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 50-5a P-Z
162                                                         -1, -1, -1, -1, -1, // 5b-5f
163             -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 60-6f a-o
164             15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 70-7a p-z
165     };
166     // @formatter:on
167 
168     /**
169      * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet" equivalents as specified in
170      * <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32 Alphabet</a>.
171      *
172      * @see <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32 Alphabet</a>
173      */
174     // @formatter:off
175     private static final byte[] ENCODE_TABLE = {
176             'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
177             'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
178             '2', '3', '4', '5', '6', '7',
179     };
180     // @formatter:on
181 
182     /**
183      * This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as specified in Table 4 of RFC 4648) into their
184      * 5-bit positive integer equivalents. Characters that are not in the Base32 Hex alphabet but fall within the bounds of the array are translated to -1.
185      */
186     // @formatter:off
187     private static final byte[] HEX_DECODE_TABLE = {
188          //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
189             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
190             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
191             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
192              0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
193             -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O
194             25, 26, 27, 28, 29, 30, 31,                                     // 50-56 P-V
195                                         -1, -1, -1, -1, -1, -1, -1, -1, -1, // 57-5f
196             -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60-6f a-o
197             25, 26, 27, 28, 29, 30, 31                                      // 70-76 p-v
198     };
199     // @formatter:on
200 
201     /**
202      * This array is a lookup table that translates 5-bit positive integer index values into their "Base 32 Encoding with Extended Hex Alphabet" equivalents as
203      * specified in <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with Extended Hex
204      * Alphabet</a>.
205      *
206      * @see <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with Extended Hex Alphabet</a>
207      */
208     // @formatter:off
209     private static final byte[] HEX_ENCODE_TABLE = {
210             '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
211             'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
212             'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
213     };
214     // @formatter:on
215 
216     /** Mask used to extract 5 bits, used when encoding Base32 bytes */
217     private static final int MASK_5_BITS = 0x1f;
218 
219     /** Mask used to extract 4 bits, used when decoding final trailing character. */
220     private static final long MASK_4_BITS = 0x0fL;
221 
222     /** Mask used to extract 3 bits, used when decoding final trailing character. */
223     private static final long MASK_3_BITS = 0x07L;
224 
225     /** Mask used to extract 2 bits, used when decoding final trailing character. */
226     private static final long MASK_2_BITS = 0x03L;
227 
228     /** Mask used to extract 1 bits, used when decoding final trailing character. */
229     private static final long MASK_1_BITS = 0x01L;
230 
231     // The static final fields above are used for the original static byte[] methods on Base32.
232     // The private member fields below are used with the new streaming approach, which requires
233     // some state be preserved between calls of encode() and decode().
234 
235     /**
236      * Creates a new Builder.
237      *
238      * <p>
239      * To configure a new instance, use a {@link Builder}. For example:
240      * </p>
241      *
242      * <pre>
243      * Base32 base32 = Base32.builder()
244      *   .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient
245      *   .setLineLength(0)                          // default is none
246      *   .setLineSeparator('\r', '\n')              // default is CR LF
247      *   .setPadding('=')                           // default is '='
248      *   .setEncodeTable(customEncodeTable)         // default is RFC 4648 Section 6, Table 3: The Base 32 Alphabet
249      *   .get()
250      * </pre>
251      *
252      * @return a new Builder.
253      * @since 1.17.0
254      */
255     public static Builder builder() {
256         return new Builder();
257     }
258 
259     private static byte[] decodeTable(final boolean useHex) {
260         return useHex ? HEX_DECODE_TABLE : DECODE_TABLE;
261     }
262 
263     /**
264      * Gets the encoding table that matches {@code useHex}.
265      *
266      * @param useHex
267      *               <ul>
268      *               <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with
269      *               Extended Hex Alphabet</a></li>
270      *               <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32
271      *               Alphabet</a></li>
272      *               </ul>
273      * @return the encoding table that matches {@code useHex}.
274      */
275     private static byte[] encodeTable(final boolean useHex) {
276         return useHex ? HEX_ENCODE_TABLE : ENCODE_TABLE;
277     }
278 
279     /**
280      * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. {@code encodeSize = {@link
281      * #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;}
282      */
283     private final int encodeSize;
284 
285     /**
286      * Line separator for encoding. Not used when decoding. Only used if lineLength &gt; 0.
287      */
288     private final byte[] lineSeparator;
289 
290     /**
291      * Constructs a Base32 codec used for decoding and encoding.
292      * <p>
293      * When encoding the line length is 0 (no chunking).
294      * </p>
295      */
296     public Base32() {
297         this(false);
298     }
299 
300     /**
301      * Constructs a Base32 codec used for decoding and encoding.
302      * <p>
303      * When encoding the line length is 0 (no chunking).
304      * </p>
305      *
306      * @param useHex
307      *               <ul>
308      *               <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with
309      *               Extended Hex Alphabet</a></li>
310      *               <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32
311      *               Alphabet</a></li>
312      *               </ul>
313      * @deprecated Use {@link #builder()} and {@link Builder}.
314      */
315     @Deprecated
316     public Base32(final boolean useHex) {
317         this(0, null, useHex, PAD_DEFAULT);
318     }
319 
320     /**
321      * Constructs a Base32 codec used for decoding and encoding.
322      * <p>
323      * When encoding the line length is 0 (no chunking).
324      * </p>
325      *
326      * @param useHex
327      *               <ul>
328      *               <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with
329      *               Extended Hex Alphabet</a></li>
330      *               <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32
331      *               Alphabet</a></li>
332      *               </ul>
333      * @param padding byte used as padding byte.
334      * @deprecated Use {@link #builder()} and {@link Builder}.
335      */
336     @Deprecated
337     public Base32(final boolean useHex, final byte padding) {
338         this(0, null, useHex, padding);
339     }
340 
341     private Base32(final Builder builder) {
342         super(builder);
343         if (builder.getLineLength() > 0) {
344             final byte[] lineSeparator = builder.getLineSeparator();
345             // Must be done after initializing the tables
346             if (containsAlphabetOrPad(lineSeparator)) {
347                 final String sep = StringUtils.newStringUtf8(lineSeparator);
348                 throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]");
349             }
350             this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
351             this.lineSeparator = lineSeparator;
352         } else {
353             this.encodeSize = BYTES_PER_ENCODED_BLOCK;
354             this.lineSeparator = null;
355         }
356         if (isInAlphabet(builder.getPadding()) || Character.isWhitespace(builder.getPadding())) {
357             throw new IllegalArgumentException("pad must not be in alphabet or whitespace");
358         }
359     }
360 
361     /**
362      * Constructs a Base32 codec used for decoding and encoding.
363      * <p>
364      * When encoding the line length is 0 (no chunking).
365      * </p>
366      *
367      * @param pad byte used as padding byte.
368      * @deprecated Use {@link #builder()} and {@link Builder}.
369      */
370     @Deprecated
371     public Base32(final byte pad) {
372         this(false, pad);
373     }
374 
375     /**
376      * Constructs a Base32 codec used for decoding and encoding.
377      * <p>
378      * When encoding the line length is given in the constructor, the line separator is CRLF.
379      * </p>
380      *
381      * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0, then
382      *                   the output will not be divided into lines (chunks). Ignored when decoding.
383      * @deprecated Use {@link #builder()} and {@link Builder}.
384      */
385     @Deprecated
386     public Base32(final int lineLength) {
387         this(lineLength, CHUNK_SEPARATOR);
388     }
389 
390     /**
391      * Constructs a Base32 codec used for decoding and encoding.
392      * <p>
393      * When encoding the line length and line separator are given in the constructor.
394      * </p>
395      * <p>
396      * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
397      * </p>
398      *
399      * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
400      *                      then the output will not be divided into lines (chunks). Ignored when decoding.
401      * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
402      * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters.
403      * @deprecated Use {@link #builder()} and {@link Builder}.
404      */
405     @Deprecated
406     public Base32(final int lineLength, final byte[] lineSeparator) {
407         this(lineLength, lineSeparator, false, PAD_DEFAULT);
408     }
409 
410     /**
411      * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
412      * <p>
413      * When encoding the line length and line separator are given in the constructor.
414      * </p>
415      * <p>
416      * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
417      * </p>
418      *
419      * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
420      *                      then the output will not be divided into lines (chunks). Ignored when decoding.
421      * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
422      * @param useHex
423      *               <ul>
424      *               <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with
425      *               Extended Hex Alphabet</a></li>
426      *               <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32
427      *               Alphabet</a></li>
428      *               </ul>
429      * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
430      * @deprecated Use {@link #builder()} and {@link Builder}.
431      */
432     @Deprecated
433     public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex) {
434         this(lineLength, lineSeparator, useHex, PAD_DEFAULT);
435     }
436 
437     /**
438      * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
439      * <p>
440      * When encoding the line length and line separator are given in the constructor.
441      * </p>
442      * <p>
443      * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
444      * </p>
445      *
446      * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
447      *                      then the output will not be divided into lines (chunks). Ignored when decoding.
448      * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
449      * @param useHex
450      *               <ul>
451      *               <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with
452      *               Extended Hex Alphabet</a></li>
453      *               <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32
454      *               Alphabet</a></li>
455      *               </ul>
456      * @param padding       padding byte.
457      * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
458      * @deprecated Use {@link #builder()} and {@link Builder}.
459      */
460     @Deprecated
461     public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding) {
462         this(lineLength, lineSeparator, useHex, padding, DECODING_POLICY_DEFAULT);
463     }
464 
465     /**
466      * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
467      * <p>
468      * When encoding the line length and line separator are given in the constructor.
469      * </p>
470      * <p>
471      * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
472      * </p>
473      *
474      * @param lineLength     Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
475      *                       then the output will not be divided into lines (chunks). Ignored when decoding.
476      * @param lineSeparator  Each line of encoded data will end with this sequence of bytes.
477      * @param useHex
478      *               <ul>
479      *               <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with
480      *               Extended Hex Alphabet</a></li>
481      *               <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32
482      *               Alphabet</a></li>
483      *               </ul>
484      * @param padding        padding byte.
485      * @param decodingPolicy The decoding policy.
486      * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
487      * @since 1.15
488      * @deprecated Use {@link #builder()} and {@link Builder}.
489      */
490     @Deprecated
491     public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding, final CodecPolicy decodingPolicy) {
492         // @formatter:off
493         this(builder()
494                 .setLineLength(lineLength)
495                 .setLineSeparator(lineSeparator != null ? lineSeparator : EMPTY_BYTE_ARRAY)
496                 .setDecodeTable(decodeTable(useHex))
497                 .setEncodeTableRaw(encodeTable(useHex))
498                 .setPadding(padding)
499                 .setDecodingPolicy(decodingPolicy));
500         // @formatter:on
501     }
502 
503     /**
504      * <p>
505      * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once with the data to decode, and once with
506      * inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" call is not necessary when decoding, but it doesn't hurt, either.
507      * </p>
508      * <p>
509      * Ignores all non-Base32 characters. This is how chunked (for example 76 character) data is handled, since CR and LF are silently ignored, but has implications
510      * for other bytes, too. This method subscribes to the garbage-in, garbage-out philosophy: it will not check the provided data for validity.
511      * </p>
512      * <p>
513      * Output is written to {@link org.apache.commons.codec.binary.BaseNCodec.Context#buffer Context#buffer} as 8-bit octets, using
514      * {@link org.apache.commons.codec.binary.BaseNCodec.Context#pos Context#pos} as the buffer position
515      * </p>
516      *
517      * @param input   byte[] array of ASCII data to Base32 decode.
518      * @param inPos   Position to start reading data from.
519      * @param inAvail Amount of bytes available from input for decoding.
520      * @param context the context to be used
521      */
522     @Override
523     void decode(final byte[] input, int inPos, final int inAvail, final Context context) {
524         // package protected for access from I/O streams
525         if (context.eof) {
526             return;
527         }
528         if (inAvail < 0) {
529             context.eof = true;
530         }
531         final int decodeSize = this.encodeSize - 1;
532         for (int i = 0; i < inAvail; i++) {
533             final byte b = input[inPos++];
534             if (b == pad) {
535                 // We're done.
536                 context.eof = true;
537                 break;
538             }
539             final byte[] buffer = ensureBufferSize(decodeSize, context);
540             if (b >= 0 && b < this.decodeTable.length) {
541                 final int result = this.decodeTable[b];
542                 if (result >= 0) {
543                     context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK;
544                     // collect decoded bytes
545                     context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result;
546                     if (context.modulus == 0) { // we can output the 5 bytes
547                         buffer[context.pos++] = (byte) (context.lbitWorkArea >> 32 & MASK_8BITS);
548                         buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS);
549                         buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
550                         buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
551                         buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
552                     }
553                 }
554             }
555         }
556         // Two forms of EOF as far as Base32 decoder is concerned: actual
557         // EOF (-1) and first time '=' character is encountered in stream.
558         // This approach makes the '=' padding characters completely optional.
559         if (context.eof && context.modulus > 0) { // if modulus == 0, nothing to do
560             final byte[] buffer = ensureBufferSize(decodeSize, context);
561             // We ignore partial bytes, i.e. only multiples of 8 count.
562             // Any combination not part of a valid encoding is either partially decoded
563             // or will raise an exception. Possible trailing characters are 2, 4, 5, 7.
564             // It is not possible to encode with 1, 3, 6 trailing characters.
565             // For backwards compatibility 3 & 6 chars are decoded anyway rather than discarded.
566             // See the encode(byte[]) method EOF section.
567             switch (context.modulus) {
568 //              case 0 : // impossible, as excluded above
569             case 1: // 5 bits - either ignore entirely, or raise an exception
570                 validateTrailingCharacters();
571                 // falls-through
572             case 2: // 10 bits, drop 2 and output one byte
573                 validateCharacter(MASK_2_BITS, context);
574                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 2 & MASK_8BITS);
575                 break;
576             case 3: // 15 bits, drop 7 and output 1 byte, or raise an exception
577                 validateTrailingCharacters();
578                 // Not possible from a valid encoding but decode anyway
579                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 7 & MASK_8BITS);
580                 break;
581             case 4: // 20 bits = 2*8 + 4
582                 validateCharacter(MASK_4_BITS, context);
583                 context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits
584                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
585                 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
586                 break;
587             case 5: // 25 bits = 3*8 + 1
588                 validateCharacter(MASK_1_BITS, context);
589                 context.lbitWorkArea = context.lbitWorkArea >> 1;
590                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
591                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
592                 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
593                 break;
594             case 6: // 30 bits = 3*8 + 6, or raise an exception
595                 validateTrailingCharacters();
596                 // Not possible from a valid encoding but decode anyway
597                 context.lbitWorkArea = context.lbitWorkArea >> 6;
598                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
599                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
600                 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
601                 break;
602             case 7: // 35 bits = 4*8 +3
603                 validateCharacter(MASK_3_BITS, context);
604                 context.lbitWorkArea = context.lbitWorkArea >> 3;
605                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS);
606                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
607                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
608                 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
609                 break;
610             default:
611                 // modulus can be 0-7, and we excluded 0,1 already
612                 throw new IllegalStateException("Impossible modulus " + context.modulus);
613             }
614         }
615     }
616 
617     /**
618      * <p>
619      * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with the data to encode, and once with
620      * inAvail set to "-1" to alert encoder that EOF has been reached, so flush last remaining bytes (if not multiple of 5).
621      * </p>
622      *
623      * @param input   byte[] array of binary data to Base32 encode.
624      * @param inPos   Position to start reading data from.
625      * @param inAvail Amount of bytes available from input for encoding.
626      * @param context the context to be used
627      */
628     @Override
629     void encode(final byte[] input, int inPos, final int inAvail, final Context context) {
630         // package protected for access from I/O streams
631         if (context.eof) {
632             return;
633         }
634         // inAvail < 0 is how we're informed of EOF in the underlying data we're
635         // encoding.
636         if (inAvail < 0) {
637             context.eof = true;
638             if (0 == context.modulus && lineLength == 0) {
639                 return; // no leftovers to process and not using chunking
640             }
641             final byte[] buffer = ensureBufferSize(encodeSize, context);
642             final int savedPos = context.pos;
643             switch (context.modulus) { // % 5
644             case 0:
645                 break;
646             case 1: // Only 1 octet; take top 5 bits then remainder
647                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 3) & MASK_5_BITS]; // 8-1*5 = 3
648                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 2) & MASK_5_BITS]; // 5-3=2
649                 buffer[context.pos++] = pad;
650                 buffer[context.pos++] = pad;
651                 buffer[context.pos++] = pad;
652                 buffer[context.pos++] = pad;
653                 buffer[context.pos++] = pad;
654                 buffer[context.pos++] = pad;
655                 break;
656             case 2: // 2 octets = 16 bits to use
657                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 11) & MASK_5_BITS]; // 16-1*5 = 11
658                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 6) & MASK_5_BITS]; // 16-2*5 = 6
659                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 1) & MASK_5_BITS]; // 16-3*5 = 1
660                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 4) & MASK_5_BITS]; // 5-1 = 4
661                 buffer[context.pos++] = pad;
662                 buffer[context.pos++] = pad;
663                 buffer[context.pos++] = pad;
664                 buffer[context.pos++] = pad;
665                 break;
666             case 3: // 3 octets = 24 bits to use
667                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 19) & MASK_5_BITS]; // 24-1*5 = 19
668                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 14) & MASK_5_BITS]; // 24-2*5 = 14
669                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 9) & MASK_5_BITS]; // 24-3*5 = 9
670                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 4) & MASK_5_BITS]; // 24-4*5 = 4
671                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 1) & MASK_5_BITS]; // 5-4 = 1
672                 buffer[context.pos++] = pad;
673                 buffer[context.pos++] = pad;
674                 buffer[context.pos++] = pad;
675                 break;
676             case 4: // 4 octets = 32 bits to use
677                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 27) & MASK_5_BITS]; // 32-1*5 = 27
678                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 22) & MASK_5_BITS]; // 32-2*5 = 22
679                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 17) & MASK_5_BITS]; // 32-3*5 = 17
680                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 12) & MASK_5_BITS]; // 32-4*5 = 12
681                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 7) & MASK_5_BITS]; // 32-5*5 = 7
682                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 2) & MASK_5_BITS]; // 32-6*5 = 2
683                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 3) & MASK_5_BITS]; // 5-2 = 3
684                 buffer[context.pos++] = pad;
685                 break;
686             default:
687                 throw new IllegalStateException("Impossible modulus " + context.modulus);
688             }
689             context.currentLinePos += context.pos - savedPos; // keep track of current line position
690             // if currentPos == 0 we are at the start of a line, so don't add CRLF
691             if (lineLength > 0 && context.currentLinePos > 0) { // add chunk separator if required
692                 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
693                 context.pos += lineSeparator.length;
694             }
695         } else {
696             for (int i = 0; i < inAvail; i++) {
697                 final byte[] buffer = ensureBufferSize(encodeSize, context);
698                 context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK;
699                 int b = input[inPos++];
700                 if (b < 0) {
701                     b += 256;
702                 }
703                 context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE
704                 if (0 == context.modulus) { // we have enough bytes to create our output
705                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 35) & MASK_5_BITS];
706                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 30) & MASK_5_BITS];
707                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 25) & MASK_5_BITS];
708                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 20) & MASK_5_BITS];
709                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 15) & MASK_5_BITS];
710                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 10) & MASK_5_BITS];
711                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 5) & MASK_5_BITS];
712                     buffer[context.pos++] = encodeTable[(int) context.lbitWorkArea & MASK_5_BITS];
713                     context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
714                     if (lineLength > 0 && lineLength <= context.currentLinePos) {
715                         System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
716                         context.pos += lineSeparator.length;
717                         context.currentLinePos = 0;
718                     }
719                 }
720             }
721         }
722     }
723 
724     /**
725      * Gets the line separator (for testing only).
726      *
727      * @return the line separator.
728      */
729     byte[] getLineSeparator() {
730         return lineSeparator;
731     }
732 
733     /**
734      * Returns whether or not the {@code octet} is in the Base32 alphabet.
735      *
736      * @param octet The value to test
737      * @return {@code true} if the value is defined in the Base32 alphabet {@code false} otherwise.
738      */
739     @Override
740     public boolean isInAlphabet(final byte octet) {
741         return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
742     }
743 
744     /**
745      * Validates whether decoding the final trailing character is possible in the context of the set of possible base 32 values.
746      * <p>
747      * The character is valid if the lower bits within the provided mask are zero. This is used to test the final trailing base-32 digit is zero in the bits
748      * that will be discarded.
749      * </p>
750      *
751      * @param emptyBitsMask The mask of the lower bits that should be empty
752      * @param context       the context to be used
753      * @throws IllegalArgumentException if the bits being checked contain any non-zero value
754      */
755     private void validateCharacter(final long emptyBitsMask, final Context context) {
756         // Use the long bit work area
757         if (isStrictDecoding() && (context.lbitWorkArea & emptyBitsMask) != 0) {
758             throw new IllegalArgumentException("Strict decoding: Last encoded character (before the paddings if any) is a valid " +
759                     "base 32 alphabet but not a possible encoding. Expected the discarded bits from the character to be zero.");
760         }
761     }
762 
763     /**
764      * Validates whether decoding allows final trailing characters that cannot be created during encoding.
765      *
766      * @throws IllegalArgumentException if strict decoding is enabled
767      */
768     private void validateTrailingCharacters() {
769         if (isStrictDecoding()) {
770             throw new IllegalArgumentException("Strict decoding: Last encoded character(s) (before the paddings if any) are valid " +
771                     "base 32 alphabet but not a possible encoding. Decoding requires either 2, 4, 5, or 7 trailing 5-bit characters to create bytes.");
772         }
773     }
774 }