Base32.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.codec.binary;

  18. import java.util.Objects;

  19. import org.apache.commons.codec.CodecPolicy;

  20. /**
  21.  * Provides Base32 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>.
  22.  *
  23.  * <p>
  24.  * The class can be parameterized in the following manner with various constructors:
  25.  * </p>
  26.  * <ul>
  27.  * <li>Whether to use the "base32hex" variant instead of the default "base32"</li>
  28.  * <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
  29.  * <li>Line separator: Default is CRLF ("\r\n")</li>
  30.  * </ul>
  31.  * <p>
  32.  * This class operates directly on byte streams, and not character streams.
  33.  * </p>
  34.  * <p>
  35.  * This class is thread-safe.
  36.  * </p>
  37.  * <p>
  38.  * You can configure instances with the {@link Builder}.
  39.  * </p>
  40.  * <pre>
  41.  * Base32 base32 = Base32.builder()
  42.  *   .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient
  43.  *   .setEncodeTable(customEncodeTable)
  44.  *   .setLineLength(0)                          // default is none
  45.  *   .setLineSeparator('\r', '\n')              // default is CR LF
  46.  *   .setPadding('=')                           // default is =
  47.  *   .get()
  48.  * </pre>
  49.  *
  50.  * @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>
  51.  * @since 1.5
  52.  */
  53. public class Base32 extends BaseNCodec {

  54.     /**
  55.      * Builds {@link Base32} instances.
  56.      *
  57.      * @since 1.17.0
  58.      */
  59.     public static class Builder extends AbstractBuilder<Base32, Builder> {

  60.         /**
  61.          * Constructs a new instance.
  62.          */
  63.         public Builder() {
  64.             super(ENCODE_TABLE);
  65.         }

  66.         @Override
  67.         public Base32 get() {
  68.             return new Base32(getLineLength(), getLineSeparator(), getEncodeTable(), getPadding(), getDecodingPolicy());
  69.         }

  70.         /**
  71.          * Sets the decode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
  72.          * <p>
  73.          * This overrides a value previously set with {@link #setEncodeTable(byte...)}.
  74.          * </p>
  75.          *
  76.          * @param useHex use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
  77.          * @return this instance.
  78.          * @since 1.18.0
  79.          */
  80.         public Builder setHexDecodeTable(final boolean useHex) {
  81.             return setEncodeTable(decodeTable(useHex));
  82.         }

  83.         /**
  84.          * Sets the encode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
  85.          * <p>
  86.          * This overrides a value previously set with {@link #setEncodeTable(byte...)}.
  87.          * </p>
  88.          *
  89.          * @param useHex use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
  90.          * @return this instance.
  91.          * @since 1.18.0
  92.          */
  93.         public Builder setHexEncodeTable(final boolean useHex) {
  94.             return setEncodeTable(encodeTable(useHex));
  95.         }
  96.     }

  97.     /**
  98.      * BASE32 characters are 5 bits in length. They are formed by taking a block of five octets to form a 40-bit string, which is converted into eight BASE32
  99.      * characters.
  100.      */
  101.     private static final int BITS_PER_ENCODED_BYTE = 5;

  102.     private static final int BYTES_PER_ENCODED_BLOCK = 8;
  103.     private static final int BYTES_PER_UNENCODED_BLOCK = 5;
  104.     /**
  105.      * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified in Table 3 of RFC 4648) into their 5-bit
  106.      * positive integer equivalents. Characters that are not in the Base32 alphabet but fall within the bounds of the array are translated to -1.
  107.      */
  108.     // @formatter:off
  109.     private static final byte[] DECODE_TABLE = {
  110.          //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
  111.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
  112.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
  113.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
  114.             -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
  115.             -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 40-4f A-O
  116.             15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 50-5a P-Z
  117.                                                         -1, -1, -1, -1, -1, // 5b-5f
  118.             -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 60-6f a-o
  119.             15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,                     // 70-7a p-z
  120.     };
  121.     // @formatter:on

  122.     /**
  123.      * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet" equivalents as specified in Table 3 of RFC
  124.      * 4648.
  125.      */
  126.     // @formatter:off
  127.     private static final byte[] ENCODE_TABLE = {
  128.             'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  129.             'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  130.             '2', '3', '4', '5', '6', '7',
  131.     };
  132.     // @formatter:on

  133.     /**
  134.      * This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as specified in Table 4 of RFC 4648) into their
  135.      * 5-bit positive integer equivalents. Characters that are not in the Base32 Hex alphabet but fall within the bounds of the array are translated to -1.
  136.      */
  137.     // @formatter:off
  138.     private static final byte[] HEX_DECODE_TABLE = {
  139.          //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
  140.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
  141.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
  142.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
  143.              0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
  144.             -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O
  145.             25, 26, 27, 28, 29, 30, 31,                                     // 50-56 P-V
  146.                                         -1, -1, -1, -1, -1, -1, -1, -1, -1, // 57-5f
  147.             -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60-6f a-o
  148.             25, 26, 27, 28, 29, 30, 31                                      // 70-76 p-v
  149.     };
  150.     // @formatter:on

  151.     /**
  152.      * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Hex Alphabet" equivalents as specified in Table 4 of
  153.      * RFC 4648.
  154.      */
  155.     // @formatter:off
  156.     private static final byte[] HEX_ENCODE_TABLE = {
  157.             '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  158.             'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  159.             'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
  160.     };
  161.     // @formatter:on

  162.     /** Mask used to extract 5 bits, used when encoding Base32 bytes */
  163.     private static final int MASK_5BITS = 0x1f;

  164.     /** Mask used to extract 4 bits, used when decoding final trailing character. */
  165.     private static final long MASK_4BITS = 0x0fL;

  166.     /** Mask used to extract 3 bits, used when decoding final trailing character. */
  167.     private static final long MASK_3BITS = 0x07L;

  168.     /** Mask used to extract 2 bits, used when decoding final trailing character. */
  169.     private static final long MASK_2BITS = 0x03L;

  170.     /** Mask used to extract 1 bits, used when decoding final trailing character. */
  171.     private static final long MASK_1BITS = 0x01L;

  172.     // The static final fields above are used for the original static byte[] methods on Base32.
  173.     // The private member fields below are used with the new streaming approach, which requires
  174.     // some state be preserved between calls of encode() and decode().

  175.     /**
  176.      * Creates a new Builder.
  177.      *
  178.      * @return a new Builder.
  179.      * @since 1.17.0
  180.      */
  181.     public static Builder builder() {
  182.         return new Builder();
  183.     }

  184.     private static byte[] decodeTable(final boolean useHex) {
  185.         return useHex ? HEX_DECODE_TABLE : DECODE_TABLE;
  186.     }

  187.     private static byte[] encodeTable(final boolean useHex) {
  188.         return useHex ? HEX_ENCODE_TABLE : ENCODE_TABLE;
  189.     }

  190.     /**
  191.      * Decode table to use.
  192.      */
  193.     private final byte[] decodeTable;

  194.     /**
  195.      * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. {@code encodeSize = {@link
  196.      * #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;}
  197.      */
  198.     private final int encodeSize;

  199.     /**
  200.      * Encode table to use.
  201.      */
  202.     private final byte[] encodeTable;

  203.     /**
  204.      * Line separator for encoding. Not used when decoding. Only used if lineLength &gt; 0.
  205.      */
  206.     private final byte[] lineSeparator;

  207.     /**
  208.      * Constructs a Base32 codec used for decoding and encoding.
  209.      * <p>
  210.      * When encoding the line length is 0 (no chunking).
  211.      * </p>
  212.      */
  213.     public Base32() {
  214.         this(false);
  215.     }

  216.     /**
  217.      * Constructs a Base32 codec used for decoding and encoding.
  218.      * <p>
  219.      * When encoding the line length is 0 (no chunking).
  220.      * </p>
  221.      *
  222.      * @param useHex if {@code true} then use Base32 Hex alphabet
  223.      */
  224.     public Base32(final boolean useHex) {
  225.         this(0, null, useHex, PAD_DEFAULT);
  226.     }

  227.     /**
  228.      * Constructs a Base32 codec used for decoding and encoding.
  229.      * <p>
  230.      * When encoding the line length is 0 (no chunking).
  231.      * </p>
  232.      *
  233.      * @param useHex  if {@code true} then use Base32 Hex alphabet
  234.      * @param padding byte used as padding byte.
  235.      */
  236.     public Base32(final boolean useHex, final byte padding) {
  237.         this(0, null, useHex, padding);
  238.     }

  239.     /**
  240.      * Constructs a Base32 codec used for decoding and encoding.
  241.      * <p>
  242.      * When encoding the line length is 0 (no chunking).
  243.      * </p>
  244.      *
  245.      * @param pad byte used as padding byte.
  246.      */
  247.     public Base32(final byte pad) {
  248.         this(false, pad);
  249.     }

  250.     /**
  251.      * Constructs a Base32 codec used for decoding and encoding.
  252.      * <p>
  253.      * When encoding the line length is given in the constructor, the line separator is CRLF.
  254.      * </p>
  255.      *
  256.      * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0, then
  257.      *                   the output will not be divided into lines (chunks). Ignored when decoding.
  258.      */
  259.     public Base32(final int lineLength) {
  260.         this(lineLength, CHUNK_SEPARATOR);
  261.     }

  262.     /**
  263.      * Constructs a Base32 codec used for decoding and encoding.
  264.      * <p>
  265.      * When encoding the line length and line separator are given in the constructor.
  266.      * </p>
  267.      * <p>
  268.      * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
  269.      * </p>
  270.      *
  271.      * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
  272.      *                      then the output will not be divided into lines (chunks). Ignored when decoding.
  273.      * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
  274.      * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters.
  275.      */
  276.     public Base32(final int lineLength, final byte[] lineSeparator) {
  277.         this(lineLength, lineSeparator, false, PAD_DEFAULT);
  278.     }

  279.     /**
  280.      * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
  281.      * <p>
  282.      * When encoding the line length and line separator are given in the constructor.
  283.      * </p>
  284.      * <p>
  285.      * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
  286.      * </p>
  287.      *
  288.      * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
  289.      *                      then the output will not be divided into lines (chunks). Ignored when decoding.
  290.      * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
  291.      * @param useHex        if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
  292.      * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
  293.      */
  294.     public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex) {
  295.         this(lineLength, lineSeparator, useHex, PAD_DEFAULT);
  296.     }

  297.     /**
  298.      * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
  299.      * <p>
  300.      * When encoding the line length and line separator are given in the constructor.
  301.      * </p>
  302.      * <p>
  303.      * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
  304.      * </p>
  305.      *
  306.      * @param lineLength    Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
  307.      *                      then the output will not be divided into lines (chunks). Ignored when decoding.
  308.      * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
  309.      * @param useHex        if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet
  310.      * @param padding       padding byte.
  311.      * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
  312.      */
  313.     public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding) {
  314.         this(lineLength, lineSeparator, useHex, padding, DECODING_POLICY_DEFAULT);
  315.     }

  316.     /**
  317.      * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
  318.      * <p>
  319.      * When encoding the line length and line separator are given in the constructor.
  320.      * </p>
  321.      * <p>
  322.      * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
  323.      * </p>
  324.      *
  325.      * @param lineLength     Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
  326.      *                       then the output will not be divided into lines (chunks). Ignored when decoding.
  327.      * @param lineSeparator  Each line of encoded data will end with this sequence of bytes.
  328.      * @param useHex         use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet.
  329.      * @param padding        padding byte.
  330.      * @param decodingPolicy The decoding policy.
  331.      * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
  332.      * @since 1.15
  333.      */
  334.     public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding, final CodecPolicy decodingPolicy) {
  335.         this(lineLength, lineSeparator, encodeTable(useHex), padding, decodingPolicy);
  336.     }

  337.     /**
  338.      * Constructs a Base32 / Base32 Hex codec used for decoding and encoding.
  339.      * <p>
  340.      * When encoding the line length and line separator are given in the constructor.
  341.      * </p>
  342.      * <p>
  343.      * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data.
  344.      * </p>
  345.      *
  346.      * @param lineLength     Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength &lt;= 0,
  347.      *                       then the output will not be divided into lines (chunks). Ignored when decoding.
  348.      * @param lineSeparator  Each line of encoded data will end with this sequence of bytes.
  349.      * @param encodeTable    A Base32 alphabet.
  350.      * @param padding        padding byte.
  351.      * @param decodingPolicy The decoding policy.
  352.      * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength &gt; 0 and lineSeparator is null.
  353.      */
  354.     private Base32(final int lineLength, final byte[] lineSeparator, final byte[] encodeTable, final byte padding, final CodecPolicy decodingPolicy) {
  355.         super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, toLength(lineSeparator), padding, decodingPolicy);
  356.         Objects.requireNonNull(encodeTable, "encodeTable");
  357.         this.encodeTable = encodeTable;
  358.         this.decodeTable = encodeTable == HEX_ENCODE_TABLE ? HEX_DECODE_TABLE : DECODE_TABLE;
  359.         if (lineLength > 0) {
  360.             if (lineSeparator == null) {
  361.                 throw new IllegalArgumentException("lineLength " + lineLength + " > 0, but lineSeparator is null");
  362.             }
  363.             final byte[] lineSeparatorCopy = lineSeparator.clone();
  364.             // Must be done after initializing the tables
  365.             if (containsAlphabetOrPad(lineSeparatorCopy)) {
  366.                 final String sep = StringUtils.newStringUtf8(lineSeparatorCopy);
  367.                 throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]");
  368.             }
  369.             this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparatorCopy.length;
  370.             this.lineSeparator = lineSeparatorCopy;
  371.         } else {
  372.             this.encodeSize = BYTES_PER_ENCODED_BLOCK;
  373.             this.lineSeparator = null;
  374.         }
  375.         if (isInAlphabet(padding) || Character.isWhitespace(padding)) {
  376.             throw new IllegalArgumentException("pad must not be in alphabet or whitespace");
  377.         }
  378.     }

  379.     /**
  380.      * <p>
  381.      * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once with the data to decode, and once with
  382.      * inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" call is not necessary when decoding, but it doesn't hurt, either.
  383.      * </p>
  384.      * <p>
  385.      * Ignores all non-Base32 characters. This is how chunked (for example 76 character) data is handled, since CR and LF are silently ignored, but has implications
  386.      * for other bytes, too. This method subscribes to the garbage-in, garbage-out philosophy: it will not check the provided data for validity.
  387.      * </p>
  388.      * <p>
  389.      * Output is written to {@link org.apache.commons.codec.binary.BaseNCodec.Context#buffer Context#buffer} as 8-bit octets, using
  390.      * {@link org.apache.commons.codec.binary.BaseNCodec.Context#pos Context#pos} as the buffer position
  391.      * </p>
  392.      *
  393.      * @param input   byte[] array of ASCII data to Base32 decode.
  394.      * @param inPos   Position to start reading data from.
  395.      * @param inAvail Amount of bytes available from input for decoding.
  396.      * @param context the context to be used
  397.      */
  398.     @Override
  399.     void decode(final byte[] input, int inPos, final int inAvail, final Context context) {
  400.         // package protected for access from I/O streams
  401.         if (context.eof) {
  402.             return;
  403.         }
  404.         if (inAvail < 0) {
  405.             context.eof = true;
  406.         }
  407.         final int decodeSize = this.encodeSize - 1;
  408.         for (int i = 0; i < inAvail; i++) {
  409.             final byte b = input[inPos++];
  410.             if (b == pad) {
  411.                 // We're done.
  412.                 context.eof = true;
  413.                 break;
  414.             }
  415.             final byte[] buffer = ensureBufferSize(decodeSize, context);
  416.             if (b >= 0 && b < this.decodeTable.length) {
  417.                 final int result = this.decodeTable[b];
  418.                 if (result >= 0) {
  419.                     context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK;
  420.                     // collect decoded bytes
  421.                     context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result;
  422.                     if (context.modulus == 0) { // we can output the 5 bytes
  423.                         buffer[context.pos++] = (byte) (context.lbitWorkArea >> 32 & MASK_8BITS);
  424.                         buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS);
  425.                         buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
  426.                         buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
  427.                         buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
  428.                     }
  429.                 }
  430.             }
  431.         }
  432.         // Two forms of EOF as far as Base32 decoder is concerned: actual
  433.         // EOF (-1) and first time '=' character is encountered in stream.
  434.         // This approach makes the '=' padding characters completely optional.
  435.         if (context.eof && context.modulus > 0) { // if modulus == 0, nothing to do
  436.             final byte[] buffer = ensureBufferSize(decodeSize, context);
  437.             // We ignore partial bytes, i.e. only multiples of 8 count.
  438.             // Any combination not part of a valid encoding is either partially decoded
  439.             // or will raise an exception. Possible trailing characters are 2, 4, 5, 7.
  440.             // It is not possible to encode with 1, 3, 6 trailing characters.
  441.             // For backwards compatibility 3 & 6 chars are decoded anyway rather than discarded.
  442.             // See the encode(byte[]) method EOF section.
  443.             switch (context.modulus) {
  444. //              case 0 : // impossible, as excluded above
  445.             case 1: // 5 bits - either ignore entirely, or raise an exception
  446.                 validateTrailingCharacters();
  447.             case 2: // 10 bits, drop 2 and output one byte
  448.                 validateCharacter(MASK_2BITS, context);
  449.                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 2 & MASK_8BITS);
  450.                 break;
  451.             case 3: // 15 bits, drop 7 and output 1 byte, or raise an exception
  452.                 validateTrailingCharacters();
  453.                 // Not possible from a valid encoding but decode anyway
  454.                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 7 & MASK_8BITS);
  455.                 break;
  456.             case 4: // 20 bits = 2*8 + 4
  457.                 validateCharacter(MASK_4BITS, context);
  458.                 context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits
  459.                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
  460.                 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
  461.                 break;
  462.             case 5: // 25 bits = 3*8 + 1
  463.                 validateCharacter(MASK_1BITS, context);
  464.                 context.lbitWorkArea = context.lbitWorkArea >> 1;
  465.                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
  466.                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
  467.                 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
  468.                 break;
  469.             case 6: // 30 bits = 3*8 + 6, or raise an exception
  470.                 validateTrailingCharacters();
  471.                 // Not possible from a valid encoding but decode anyway
  472.                 context.lbitWorkArea = context.lbitWorkArea >> 6;
  473.                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
  474.                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
  475.                 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
  476.                 break;
  477.             case 7: // 35 bits = 4*8 +3
  478.                 validateCharacter(MASK_3BITS, context);
  479.                 context.lbitWorkArea = context.lbitWorkArea >> 3;
  480.                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS);
  481.                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS);
  482.                 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS);
  483.                 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS);
  484.                 break;
  485.             default:
  486.                 // modulus can be 0-7, and we excluded 0,1 already
  487.                 throw new IllegalStateException("Impossible modulus " + context.modulus);
  488.             }
  489.         }
  490.     }

  491.     /**
  492.      * <p>
  493.      * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with the data to encode, and once with
  494.      * inAvail set to "-1" to alert encoder that EOF has been reached, so flush last remaining bytes (if not multiple of 5).
  495.      * </p>
  496.      *
  497.      * @param input   byte[] array of binary data to Base32 encode.
  498.      * @param inPos   Position to start reading data from.
  499.      * @param inAvail Amount of bytes available from input for encoding.
  500.      * @param context the context to be used
  501.      */
  502.     @Override
  503.     void encode(final byte[] input, int inPos, final int inAvail, final Context context) {
  504.         // package protected for access from I/O streams
  505.         if (context.eof) {
  506.             return;
  507.         }
  508.         // inAvail < 0 is how we're informed of EOF in the underlying data we're
  509.         // encoding.
  510.         if (inAvail < 0) {
  511.             context.eof = true;
  512.             if (0 == context.modulus && lineLength == 0) {
  513.                 return; // no leftovers to process and not using chunking
  514.             }
  515.             final byte[] buffer = ensureBufferSize(encodeSize, context);
  516.             final int savedPos = context.pos;
  517.             switch (context.modulus) { // % 5
  518.             case 0:
  519.                 break;
  520.             case 1: // Only 1 octet; take top 5 bits then remainder
  521.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 3) & MASK_5BITS]; // 8-1*5 = 3
  522.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 2) & MASK_5BITS]; // 5-3=2
  523.                 buffer[context.pos++] = pad;
  524.                 buffer[context.pos++] = pad;
  525.                 buffer[context.pos++] = pad;
  526.                 buffer[context.pos++] = pad;
  527.                 buffer[context.pos++] = pad;
  528.                 buffer[context.pos++] = pad;
  529.                 break;
  530.             case 2: // 2 octets = 16 bits to use
  531.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 11) & MASK_5BITS]; // 16-1*5 = 11
  532.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 6) & MASK_5BITS]; // 16-2*5 = 6
  533.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 1) & MASK_5BITS]; // 16-3*5 = 1
  534.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 4) & MASK_5BITS]; // 5-1 = 4
  535.                 buffer[context.pos++] = pad;
  536.                 buffer[context.pos++] = pad;
  537.                 buffer[context.pos++] = pad;
  538.                 buffer[context.pos++] = pad;
  539.                 break;
  540.             case 3: // 3 octets = 24 bits to use
  541.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 19) & MASK_5BITS]; // 24-1*5 = 19
  542.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 14) & MASK_5BITS]; // 24-2*5 = 14
  543.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 9) & MASK_5BITS]; // 24-3*5 = 9
  544.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 4) & MASK_5BITS]; // 24-4*5 = 4
  545.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 1) & MASK_5BITS]; // 5-4 = 1
  546.                 buffer[context.pos++] = pad;
  547.                 buffer[context.pos++] = pad;
  548.                 buffer[context.pos++] = pad;
  549.                 break;
  550.             case 4: // 4 octets = 32 bits to use
  551.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 27) & MASK_5BITS]; // 32-1*5 = 27
  552.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 22) & MASK_5BITS]; // 32-2*5 = 22
  553.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 17) & MASK_5BITS]; // 32-3*5 = 17
  554.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 12) & MASK_5BITS]; // 32-4*5 = 12
  555.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 7) & MASK_5BITS]; // 32-5*5 = 7
  556.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 2) & MASK_5BITS]; // 32-6*5 = 2
  557.                 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 3) & MASK_5BITS]; // 5-2 = 3
  558.                 buffer[context.pos++] = pad;
  559.                 break;
  560.             default:
  561.                 throw new IllegalStateException("Impossible modulus " + context.modulus);
  562.             }
  563.             context.currentLinePos += context.pos - savedPos; // keep track of current line position
  564.             // if currentPos == 0 we are at the start of a line, so don't add CRLF
  565.             if (lineLength > 0 && context.currentLinePos > 0) { // add chunk separator if required
  566.                 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
  567.                 context.pos += lineSeparator.length;
  568.             }
  569.         } else {
  570.             for (int i = 0; i < inAvail; i++) {
  571.                 final byte[] buffer = ensureBufferSize(encodeSize, context);
  572.                 context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK;
  573.                 int b = input[inPos++];
  574.                 if (b < 0) {
  575.                     b += 256;
  576.                 }
  577.                 context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE
  578.                 if (0 == context.modulus) { // we have enough bytes to create our output
  579.                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 35) & MASK_5BITS];
  580.                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 30) & MASK_5BITS];
  581.                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 25) & MASK_5BITS];
  582.                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 20) & MASK_5BITS];
  583.                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 15) & MASK_5BITS];
  584.                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 10) & MASK_5BITS];
  585.                     buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 5) & MASK_5BITS];
  586.                     buffer[context.pos++] = encodeTable[(int) context.lbitWorkArea & MASK_5BITS];
  587.                     context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
  588.                     if (lineLength > 0 && lineLength <= context.currentLinePos) {
  589.                         System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
  590.                         context.pos += lineSeparator.length;
  591.                         context.currentLinePos = 0;
  592.                     }
  593.                 }
  594.             }
  595.         }
  596.     }

  597.     /**
  598.      * Gets the line separator (for testing only).
  599.      *
  600.      * @return the line separator.
  601.      */
  602.     byte[] getLineSeparator() {
  603.         return lineSeparator;
  604.     }

  605.     /**
  606.      * Returns whether or not the {@code octet} is in the Base32 alphabet.
  607.      *
  608.      * @param octet The value to test
  609.      * @return {@code true} if the value is defined in the Base32 alphabet {@code false} otherwise.
  610.      */
  611.     @Override
  612.     public boolean isInAlphabet(final byte octet) {
  613.         return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
  614.     }

  615.     /**
  616.      * Validates whether decoding the final trailing character is possible in the context of the set of possible base 32 values.
  617.      * <p>
  618.      * The character is valid if the lower bits within the provided mask are zero. This is used to test the final trailing base-32 digit is zero in the bits
  619.      * that will be discarded.
  620.      * </p>
  621.      *
  622.      * @param emptyBitsMask The mask of the lower bits that should be empty
  623.      * @param context       the context to be used
  624.      * @throws IllegalArgumentException if the bits being checked contain any non-zero value
  625.      */
  626.     private void validateCharacter(final long emptyBitsMask, final Context context) {
  627.         // Use the long bit work area
  628.         if (isStrictDecoding() && (context.lbitWorkArea & emptyBitsMask) != 0) {
  629.             throw new IllegalArgumentException("Strict decoding: Last encoded character (before the paddings if any) is a valid " +
  630.                     "base 32 alphabet but not a possible encoding. Expected the discarded bits from the character to be zero.");
  631.         }
  632.     }

  633.     /**
  634.      * Validates whether decoding allows final trailing characters that cannot be created during encoding.
  635.      *
  636.      * @throws IllegalArgumentException if strict decoding is enabled
  637.      */
  638.     private void validateTrailingCharacters() {
  639.         if (isStrictDecoding()) {
  640.             throw new IllegalArgumentException("Strict decoding: Last encoded character(s) (before the paddings if any) are valid " +
  641.                     "base 32 alphabet but not a possible encoding. Decoding requires either 2, 4, 5, or 7 trailing 5-bit characters to create bytes.");
  642.         }
  643.     }
  644. }