Base64.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.codec.binary;

  18. import java.math.BigInteger;
  19. import java.util.Arrays;
  20. import java.util.Objects;

  21. import org.apache.commons.codec.CodecPolicy;

  22. /**
  23.  * Provides Base64 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
  24.  *
  25.  * <p>
  26.  * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
  27.  * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
  28.  * </p>
  29.  * <p>
  30.  * The class can be parameterized in the following manner with various constructors:
  31.  * </p>
  32.  * <ul>
  33.  * <li>URL-safe mode: Default off.</li>
  34.  * <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of
  35.  * 4 in the encoded data.
  36.  * <li>Line separator: Default is CRLF ("\r\n")</li>
  37.  * </ul>
  38.  * <p>
  39.  * The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes.
  40.  * </p>
  41.  * <p>
  42.  * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only
  43.  * encode/decode character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252,
  44.  * UTF-8, etc).
  45.  * </p>
  46.  * <p>
  47.  * This class is thread-safe.
  48.  * </p>
  49.  * <p>
  50.  * You can configure instances with the {@link Builder}.
  51.  * </p>
  52.  * <pre>
  53.  * Base64 base64 = Base64.builder()
  54.  *   .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient, null resets to default
  55.  *   .setEncodeTable(customEncodeTable)         // default is built in, null resets to default
  56.  *   .setLineLength(0)                          // default is none
  57.  *   .setLineSeparator('\r', '\n')              // default is CR LF, null resets to default
  58.  *   .setPadding('=')                           // default is =
  59.  *   .setUrlSafe(false)                         // default is false
  60.  *   .get()
  61.  * </pre>
  62.  *
  63.  * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
  64.  * @since 1.0
  65.  */
  66. public class Base64 extends BaseNCodec {

  67.     /**
  68.      * Builds {@link Base64} instances.
  69.      *
  70.      * @since 1.17.0
  71.      */
  72.     public static class Builder extends AbstractBuilder<Base64, Builder> {

  73.         /**
  74.          * Constructs a new instance.
  75.          */
  76.         public Builder() {
  77.             super(STANDARD_ENCODE_TABLE);
  78.         }

  79.         @Override
  80.         public Base64 get() {
  81.             return new Base64(getLineLength(), getLineSeparator(), getPadding(), getEncodeTable(), getDecodingPolicy());
  82.         }

  83.         /**
  84.          * Sets the URL-safe encoding policy.
  85.          *
  86.          * @param urlSafe URL-safe encoding policy, null resets to the default.
  87.          * @return {@code this} instance.
  88.          */
  89.         public Builder setUrlSafe(final boolean urlSafe) {
  90.             return setEncodeTable(toUrlSafeEncodeTable(urlSafe));
  91.         }

  92.     }

  93.     /**
  94.      * BASE64 characters are 6 bits in length.
  95.      * They are formed by taking a block of 3 octets to form a 24-bit string,
  96.      * which is converted into 4 BASE64 characters.
  97.      */
  98.     private static final int BITS_PER_ENCODED_BYTE = 6;
  99.     private static final int BYTES_PER_UNENCODED_BLOCK = 3;
  100.     private static final int BYTES_PER_ENCODED_BLOCK = 4;
  101.     private static final int DECODING_TABLE_LENGTH = 256;

  102.     /**
  103.      * This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet" equivalents as specified in Table 1 of RFC
  104.      * 2045.
  105.      * <p>
  106.      * Thanks to "commons" project in ws.apache.org for this code. https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
  107.      * </p>
  108.      */
  109.     // @formatter:off
  110.     private static final byte[] STANDARD_ENCODE_TABLE = {
  111.             'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  112.             'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  113.             'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
  114.             'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
  115.             '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
  116.     };
  117.     /**
  118.      * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and / changed to - and _ to make the encoded Base64 results more URL-SAFE. This table is
  119.      * only used when the Base64's mode is set to URL-SAFE.
  120.      */
  121.     // @formatter:off
  122.     private static final byte[] URL_SAFE_ENCODE_TABLE = {
  123.             'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  124.             'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  125.             'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
  126.             'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
  127.             '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
  128.     };
  129.     // @formatter:on
  130.     /**
  131.      * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified
  132.      * in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64
  133.      * alphabet but fall within the bounds of the array are translated to -1.
  134.      * <p>
  135.      * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both
  136.      * URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit).
  137.      * </p>
  138.      * <p>
  139.      * Thanks to "commons" project in ws.apache.org for this code.
  140.      * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
  141.      * </p>
  142.      */
  143.     private static final byte[] DECODE_TABLE = {
  144.         //   0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
  145.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
  146.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
  147.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, // 20-2f + - /
  148.             52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
  149.             -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 40-4f A-O
  150.             15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _
  151.             -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o
  152.             41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51                      // 70-7a p-z
  153.     };

  154.     /**
  155.      * Base64 uses 6-bit fields.
  156.      */
  157.     /** Mask used to extract 6 bits, used when encoding */
  158.     private static final int MASK_6_BITS = 0x3f;

  159.     // The static final fields above are used for the original static byte[] methods on Base64.
  160.     // The private member fields below are used with the new streaming approach, which requires
  161.     // some state be preserved between calls of encode() and decode().

  162.     /** Mask used to extract 4 bits, used when decoding final trailing character. */
  163.     private static final int MASK_4_BITS = 0xf;
  164.     /** Mask used to extract 2 bits, used when decoding final trailing character. */
  165.     private static final int MASK_2_BITS = 0x3;

  166.     /**
  167.      * Creates a new Builder.
  168.      *
  169.      * @return a new Builder.
  170.      * @since 1.17.0
  171.      */
  172.     public static Builder builder() {
  173.         return new Builder();
  174.     }

  175.     /**
  176.      * Decodes Base64 data into octets.
  177.      * <p>
  178.      * <strong>Note:</strong> this method seamlessly handles data encoded in URL-safe or normal mode.
  179.      * </p>
  180.      *
  181.      * @param base64Data
  182.      *            Byte array containing Base64 data
  183.      * @return Array containing decoded data.
  184.      */
  185.     public static byte[] decodeBase64(final byte[] base64Data) {
  186.         return new Base64().decode(base64Data);
  187.     }

  188.     /**
  189.      * Decodes a Base64 String into octets.
  190.      * <p>
  191.      * <strong>Note:</strong> this method seamlessly handles data encoded in URL-safe or normal mode.
  192.      * </p>
  193.      *
  194.      * @param base64String
  195.      *            String containing Base64 data
  196.      * @return Array containing decoded data.
  197.      * @since 1.4
  198.      */
  199.     public static byte[] decodeBase64(final String base64String) {
  200.         return new Base64().decode(base64String);
  201.     }

  202.     /**
  203.      * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
  204.      *
  205.      * @param array
  206.      *            a byte array containing base64 character data
  207.      * @return A BigInteger
  208.      * @since 1.4
  209.      */
  210.     public static BigInteger decodeInteger(final byte[] array) {
  211.         return new BigInteger(1, decodeBase64(array));
  212.     }

  213.     /**
  214.      * Encodes binary data using the base64 algorithm but does not chunk the output.
  215.      *
  216.      * @param binaryData
  217.      *            binary data to encode
  218.      * @return byte[] containing Base64 characters in their UTF-8 representation.
  219.      */
  220.     public static byte[] encodeBase64(final byte[] binaryData) {
  221.         return encodeBase64(binaryData, false);
  222.     }

  223.     /**
  224.      * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
  225.      *
  226.      * @param binaryData
  227.      *            Array containing binary data to encode.
  228.      * @param isChunked
  229.      *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
  230.      * @return Base64-encoded data.
  231.      * @throws IllegalArgumentException
  232.      *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
  233.      */
  234.     public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked) {
  235.         return encodeBase64(binaryData, isChunked, false);
  236.     }

  237.     /**
  238.      * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
  239.      *
  240.      * @param binaryData
  241.      *            Array containing binary data to encode.
  242.      * @param isChunked
  243.      *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
  244.      * @param urlSafe
  245.      *            if {@code true} this encoder will emit - and _ instead of the usual + and / characters.
  246.      *            <strong>Note: No padding is added when encoding using the URL-safe alphabet.</strong>
  247.      * @return Base64-encoded data.
  248.      * @throws IllegalArgumentException
  249.      *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
  250.      * @since 1.4
  251.      */
  252.     public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe) {
  253.         return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
  254.     }

  255.     /**
  256.      * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
  257.      *
  258.      * @param binaryData
  259.      *            Array containing binary data to encode.
  260.      * @param isChunked
  261.      *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
  262.      * @param urlSafe
  263.      *            if {@code true} this encoder will emit - and _ instead of the usual + and / characters.
  264.      *            <strong>Note: No padding is added when encoding using the URL-safe alphabet.</strong>
  265.      * @param maxResultSize
  266.      *            The maximum result size to accept.
  267.      * @return Base64-encoded data.
  268.      * @throws IllegalArgumentException
  269.      *             Thrown when the input array needs an output array bigger than maxResultSize
  270.      * @since 1.4
  271.      */
  272.     public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked,
  273.                                       final boolean urlSafe, final int maxResultSize) {
  274.         if (BinaryCodec.isEmpty(binaryData)) {
  275.             return binaryData;
  276.         }
  277.         // Create this so can use the super-class method
  278.         // Also ensures that the same roundings are performed by the ctor and the code
  279.         final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
  280.         final long len = b64.getEncodedLength(binaryData);
  281.         if (len > maxResultSize) {
  282.             throw new IllegalArgumentException("Input array too big, the output array would be bigger (" +
  283.                 len +
  284.                 ") than the specified maximum size of " +
  285.                 maxResultSize);
  286.         }
  287.         return b64.encode(binaryData);
  288.     }

  289.     /**
  290.      * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
  291.      *
  292.      * @param binaryData
  293.      *            binary data to encode
  294.      * @return Base64 characters chunked in 76 character blocks
  295.      */
  296.     public static byte[] encodeBase64Chunked(final byte[] binaryData) {
  297.         return encodeBase64(binaryData, true);
  298.     }

  299.     /**
  300.      * Encodes binary data using the base64 algorithm but does not chunk the output.
  301.      *
  302.      * NOTE:  We changed the behavior of this method from multi-line chunking (commons-codec-1.4) to
  303.      * single-line non-chunking (commons-codec-1.5).
  304.      *
  305.      * @param binaryData
  306.      *            binary data to encode
  307.      * @return String containing Base64 characters.
  308.      * @since 1.4 (NOTE:  1.4 chunked the output, whereas 1.5 does not).
  309.      */
  310.     public static String encodeBase64String(final byte[] binaryData) {
  311.         return StringUtils.newStringUsAscii(encodeBase64(binaryData, false));
  312.     }

  313.     /**
  314.      * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
  315.      * url-safe variation emits - and _ instead of + and / characters.
  316.      * <strong>Note: No padding is added.</strong>
  317.      * @param binaryData
  318.      *            binary data to encode
  319.      * @return byte[] containing Base64 characters in their UTF-8 representation.
  320.      * @since 1.4
  321.      */
  322.     public static byte[] encodeBase64URLSafe(final byte[] binaryData) {
  323.         return encodeBase64(binaryData, false, true);
  324.     }

  325.     /**
  326.      * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
  327.      * url-safe variation emits - and _ instead of + and / characters.
  328.      * <strong>Note: No padding is added.</strong>
  329.      * @param binaryData
  330.      *            binary data to encode
  331.      * @return String containing Base64 characters
  332.      * @since 1.4
  333.      */
  334.     public static String encodeBase64URLSafeString(final byte[] binaryData) {
  335.         return StringUtils.newStringUsAscii(encodeBase64(binaryData, false, true));
  336.     }

  337.     /**
  338.      * Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
  339.      *
  340.      * @param bigInteger
  341.      *            a BigInteger
  342.      * @return A byte array containing base64 character data
  343.      * @throws NullPointerException
  344.      *             if null is passed in
  345.      * @since 1.4
  346.      */
  347.     public static byte[] encodeInteger(final BigInteger bigInteger) {
  348.         Objects.requireNonNull(bigInteger, "bigInteger");
  349.         return encodeBase64(toIntegerBytes(bigInteger), false);
  350.     }

  351.     /**
  352.      * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
  353.      * method treats whitespace as valid.
  354.      *
  355.      * @param arrayOctet
  356.      *            byte array to test
  357.      * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
  358.      *         {@code false}, otherwise
  359.      * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0.
  360.      */
  361.     @Deprecated
  362.     public static boolean isArrayByteBase64(final byte[] arrayOctet) {
  363.         return isBase64(arrayOctet);
  364.     }

  365.     /**
  366.      * Returns whether or not the {@code octet} is in the base 64 alphabet.
  367.      *
  368.      * @param octet
  369.      *            The value to test
  370.      * @return {@code true} if the value is defined in the base 64 alphabet, {@code false} otherwise.
  371.      * @since 1.4
  372.      */
  373.     public static boolean isBase64(final byte octet) {
  374.         return octet == PAD_DEFAULT || octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1;
  375.     }

  376.     /**
  377.      * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
  378.      * method treats whitespace as valid.
  379.      *
  380.      * @param arrayOctet
  381.      *            byte array to test
  382.      * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
  383.      *         {@code false}, otherwise
  384.      * @since 1.5
  385.      */
  386.     public static boolean isBase64(final byte[] arrayOctet) {
  387.         for (final byte element : arrayOctet) {
  388.             if (!isBase64(element) && !Character.isWhitespace(element)) {
  389.                 return false;
  390.             }
  391.         }
  392.         return true;
  393.     }

  394.     /**
  395.      * Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the
  396.      * method treats whitespace as valid.
  397.      *
  398.      * @param base64
  399.      *            String to test
  400.      * @return {@code true} if all characters in the String are valid characters in the Base64 alphabet or if
  401.      *         the String is empty; {@code false}, otherwise
  402.      *  @since 1.5
  403.      */
  404.     public static boolean isBase64(final String base64) {
  405.         return isBase64(StringUtils.getBytesUtf8(base64));
  406.     }

  407.     /**
  408.      * Returns a byte-array representation of a {@code BigInteger} without sign bit.
  409.      *
  410.      * @param bigInt
  411.      *            {@code BigInteger} to be converted
  412.      * @return a byte array representation of the BigInteger parameter
  413.      */
  414.     static byte[] toIntegerBytes(final BigInteger bigInt) {
  415.         int bitlen = bigInt.bitLength();
  416.         // round bitlen
  417.         bitlen = bitlen + 7 >> 3 << 3;
  418.         final byte[] bigBytes = bigInt.toByteArray();

  419.         if (bigInt.bitLength() % 8 != 0 && bigInt.bitLength() / 8 + 1 == bitlen / 8) {
  420.             return bigBytes;
  421.         }
  422.         // set up params for copying everything but sign bit
  423.         int startSrc = 0;
  424.         int len = bigBytes.length;

  425.         // if bigInt is exactly byte-aligned, just skip signbit in copy
  426.         if (bigInt.bitLength() % 8 == 0) {
  427.             startSrc = 1;
  428.             len--;
  429.         }
  430.         final int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
  431.         final byte[] resizedBytes = new byte[bitlen / 8];
  432.         System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
  433.         return resizedBytes;
  434.     }

  435.     private static byte[] toUrlSafeEncodeTable(final boolean urlSafe) {
  436.         return urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
  437.     }

  438.     /**
  439.      * Encode table to use: either STANDARD or URL_SAFE or custom.
  440.      * Note: the DECODE_TABLE above remains static because it is able
  441.      * to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch
  442.      * between the two modes.
  443.      */
  444.     private final byte[] encodeTable;

  445.     /**
  446.      * Decode table to use.
  447.      */
  448.     private final byte[] decodeTable;

  449.     /**
  450.      * Line separator for encoding. Not used when decoding. Only used if lineLength &gt; 0.
  451.      */
  452.     private final byte[] lineSeparator;

  453.     /**
  454.      * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
  455.      * {@code encodeSize = 4 + lineSeparator.length;}
  456.      */
  457.     private final int encodeSize;

  458.     private final boolean isUrlSafe;

  459.     /**
  460.      * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
  461.      * <p>
  462.      * When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE.
  463.      * </p>
  464.      * <p>
  465.      * When decoding all variants are supported.
  466.      * </p>
  467.      */
  468.     public Base64() {
  469.         this(0);
  470.     }

  471.     /**
  472.      * Constructs a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode.
  473.      * <p>
  474.      * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
  475.      * </p>
  476.      * <p>
  477.      * When decoding all variants are supported.
  478.      * </p>
  479.      *
  480.      * @param urlSafe
  481.      *            if {@code true}, URL-safe encoding is used. In most cases this should be set to
  482.      *            {@code false}.
  483.      * @since 1.4
  484.      */
  485.     public Base64(final boolean urlSafe) {
  486.         this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
  487.     }

  488.     /**
  489.      * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
  490.      * <p>
  491.      * When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is
  492.      * STANDARD_ENCODE_TABLE.
  493.      * </p>
  494.      * <p>
  495.      * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
  496.      * </p>
  497.      * <p>
  498.      * When decoding all variants are supported.
  499.      * </p>
  500.      *
  501.      * @param lineLength
  502.      *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
  503.      *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
  504.      *            decoding.
  505.      * @since 1.4
  506.      */
  507.     public Base64(final int lineLength) {
  508.         this(lineLength, CHUNK_SEPARATOR);
  509.     }

  510.     /**
  511.      * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
  512.      * <p>
  513.      * When encoding the line length and line separator are given in the constructor, and the encoding table is
  514.      * STANDARD_ENCODE_TABLE.
  515.      * </p>
  516.      * <p>
  517.      * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
  518.      * </p>
  519.      * <p>
  520.      * When decoding all variants are supported.
  521.      * </p>
  522.      *
  523.      * @param lineLength
  524.      *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
  525.      *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
  526.      *            decoding.
  527.      * @param lineSeparator
  528.      *            Each line of encoded data will end with this sequence of bytes.
  529.      * @throws IllegalArgumentException
  530.      *             Thrown when the provided lineSeparator included some base64 characters.
  531.      * @since 1.4
  532.      */
  533.     public Base64(final int lineLength, final byte[] lineSeparator) {
  534.         this(lineLength, lineSeparator, false);
  535.     }

  536.     /**
  537.      * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
  538.      * <p>
  539.      * When encoding the line length and line separator are given in the constructor, and the encoding table is
  540.      * STANDARD_ENCODE_TABLE.
  541.      * </p>
  542.      * <p>
  543.      * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
  544.      * </p>
  545.      * <p>
  546.      * When decoding all variants are supported.
  547.      * </p>
  548.      *
  549.      * @param lineLength
  550.      *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
  551.      *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
  552.      *            decoding.
  553.      * @param lineSeparator
  554.      *            Each line of encoded data will end with this sequence of bytes.
  555.      * @param urlSafe
  556.      *            Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
  557.      *            operations. Decoding seamlessly handles both modes.
  558.      *            <strong>Note: No padding is added when using the URL-safe alphabet.</strong>
  559.      * @throws IllegalArgumentException
  560.      *             Thrown when the {@code lineSeparator} contains Base64 characters.
  561.      * @since 1.4
  562.      */
  563.     public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe) {
  564.         this(lineLength, lineSeparator, PAD_DEFAULT, toUrlSafeEncodeTable(urlSafe), DECODING_POLICY_DEFAULT);
  565.     }

  566.     /**
  567.      * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
  568.      * <p>
  569.      * When encoding the line length and line separator are given in the constructor, and the encoding table is
  570.      * STANDARD_ENCODE_TABLE.
  571.      * </p>
  572.      * <p>
  573.      * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
  574.      * </p>
  575.      * <p>
  576.      * When decoding all variants are supported.
  577.      * </p>
  578.      *
  579.      * @param lineLength
  580.      *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
  581.      *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
  582.      *            decoding.
  583.      * @param lineSeparator
  584.      *            Each line of encoded data will end with this sequence of bytes.
  585.      * @param urlSafe
  586.      *            Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
  587.      *            operations. Decoding seamlessly handles both modes.
  588.      *            <strong>Note: No padding is added when using the URL-safe alphabet.</strong>
  589.      * @param decodingPolicy The decoding policy.
  590.      * @throws IllegalArgumentException
  591.      *             Thrown when the {@code lineSeparator} contains Base64 characters.
  592.      * @since 1.15
  593.      */
  594.     public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe, final CodecPolicy decodingPolicy) {
  595.         this(lineLength, lineSeparator, PAD_DEFAULT, toUrlSafeEncodeTable(urlSafe), decodingPolicy);
  596.     }

  597.     /**
  598.      * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
  599.      * <p>
  600.      * When encoding the line length and line separator are given in the constructor, and the encoding table is STANDARD_ENCODE_TABLE.
  601.      * </p>
  602.      * <p>
  603.      * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
  604.      * </p>
  605.      * <p>
  606.      * When decoding all variants are supported.
  607.      * </p>
  608.      *
  609.      * @param lineLength     Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 4). If lineLength &lt;= 0,
  610.      *                       then the output will not be divided into lines (chunks). Ignored when decoding.
  611.      * @param lineSeparator  Each line of encoded data will end with this sequence of bytes; the constructor makes a defensive copy. May be null.
  612.      * @param padding        padding byte.
  613.      * @param encodeTable    The manual encodeTable - a byte array of 64 chars.
  614.      * @param decodingPolicy The decoding policy.
  615.      * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base64 characters.
  616.      */
  617.     private Base64(final int lineLength, final byte[] lineSeparator, final byte padding, final byte[] encodeTable, final CodecPolicy decodingPolicy) {
  618.         super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, toLength(lineSeparator), padding, decodingPolicy);
  619.         Objects.requireNonNull(encodeTable, "encodeTable");
  620.         if (encodeTable.length != STANDARD_ENCODE_TABLE.length) {
  621.             throw new IllegalArgumentException("encodeTable must have exactly 64 entries.");
  622.         }
  623.         // same array first or equal contents second
  624.         this.isUrlSafe = encodeTable == URL_SAFE_ENCODE_TABLE || Arrays.equals(encodeTable, URL_SAFE_ENCODE_TABLE);
  625.         if (encodeTable == STANDARD_ENCODE_TABLE || this.isUrlSafe) {
  626.             decodeTable = DECODE_TABLE;
  627.             // No need of a defensive copy of an internal table.
  628.             this.encodeTable = encodeTable;
  629.         } else {
  630.             this.encodeTable = encodeTable.clone();
  631.             this.decodeTable = calculateDecodeTable(this.encodeTable);
  632.         }
  633.         // TODO could be simplified if there is no requirement to reject invalid line sep when length <=0
  634.         // @see test case Base64Test.testConstructors()
  635.         if (lineSeparator != null) {
  636.             final byte[] lineSeparatorCopy = lineSeparator.clone();
  637.             if (containsAlphabetOrPad(lineSeparatorCopy)) {
  638.                 final String sep = StringUtils.newStringUtf8(lineSeparatorCopy);
  639.                 throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]");
  640.             }
  641.             if (lineLength > 0) { // null line-sep forces no chunking rather than throwing IAE
  642.                 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparatorCopy.length;
  643.                 this.lineSeparator = lineSeparatorCopy;
  644.             } else {
  645.                 this.encodeSize = BYTES_PER_ENCODED_BLOCK;
  646.                 this.lineSeparator = null;
  647.             }
  648.         } else {
  649.             this.encodeSize = BYTES_PER_ENCODED_BLOCK;
  650.             this.lineSeparator = null;
  651.         }
  652.     }

  653.     /**
  654.      * Calculates a decode table for a given encode table.
  655.      *
  656.      * @param encodeTable that is used to determine decode lookup table
  657.      * @return decodeTable
  658.      */
  659.     private byte[] calculateDecodeTable(final byte[] encodeTable) {
  660.         final byte[] decodeTable = new byte[DECODING_TABLE_LENGTH];
  661.         Arrays.fill(decodeTable, (byte) -1);
  662.         for (int i = 0; i < encodeTable.length; i++) {
  663.             decodeTable[encodeTable[i]] = (byte) i;
  664.         }
  665.         return decodeTable;
  666.     }

  667.     /**
  668.      * <p>
  669.      * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once
  670.      * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1"
  671.      * call is not necessary when decoding, but it doesn't hurt, either.
  672.      * </p>
  673.      * <p>
  674.      * Ignores all non-base64 characters. This is how chunked (for example 76 character) data is handled, since CR and LF are
  675.      * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in,
  676.      * garbage-out philosophy: it will not check the provided data for validity.
  677.      * </p>
  678.      * <p>
  679.      * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
  680.      * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
  681.      * </p>
  682.      *
  683.      * @param input
  684.      *            byte[] array of ASCII data to base64 decode.
  685.      * @param inPos
  686.      *            Position to start reading data from.
  687.      * @param inAvail
  688.      *            Amount of bytes available from input for decoding.
  689.      * @param context
  690.      *            the context to be used
  691.      */
  692.     @Override
  693.     void decode(final byte[] input, int inPos, final int inAvail, final Context context) {
  694.         if (context.eof) {
  695.             return;
  696.         }
  697.         if (inAvail < 0) {
  698.             context.eof = true;
  699.         }
  700.         final int decodeSize = this.encodeSize - 1;
  701.         for (int i = 0; i < inAvail; i++) {
  702.             final byte[] buffer = ensureBufferSize(decodeSize, context);
  703.             final byte b = input[inPos++];
  704.             if (b == pad) {
  705.                 // We're done.
  706.                 context.eof = true;
  707.                 break;
  708.             }
  709.             if (b >= 0 && b < decodeTable.length) {
  710.                 final int result = decodeTable[b];
  711.                 if (result >= 0) {
  712.                     context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK;
  713.                     context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result;
  714.                     if (context.modulus == 0) {
  715.                         buffer[context.pos++] = (byte) (context.ibitWorkArea >> 16 & MASK_8BITS);
  716.                         buffer[context.pos++] = (byte) (context.ibitWorkArea >> 8 & MASK_8BITS);
  717.                         buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
  718.                     }
  719.                 }
  720.             }
  721.         }

  722.         // Two forms of EOF as far as base64 decoder is concerned: actual
  723.         // EOF (-1) and first time '=' character is encountered in stream.
  724.         // This approach makes the '=' padding characters completely optional.
  725.         if (context.eof && context.modulus != 0) {
  726.             final byte[] buffer = ensureBufferSize(decodeSize, context);

  727.             // We have some spare bits remaining
  728.             // Output all whole multiples of 8 bits and ignore the rest
  729.             switch (context.modulus) {
  730. //              case 0 : // impossible, as excluded above
  731.                 case 1 : // 6 bits - either ignore entirely, or raise an exception
  732.                     validateTrailingCharacter();
  733.                     break;
  734.                 case 2 : // 12 bits = 8 + 4
  735.                     validateCharacter(MASK_4_BITS, context);
  736.                     context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits
  737.                     buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
  738.                     break;
  739.                 case 3 : // 18 bits = 8 + 8 + 2
  740.                     validateCharacter(MASK_2_BITS, context);
  741.                     context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits
  742.                     buffer[context.pos++] = (byte) (context.ibitWorkArea >> 8 & MASK_8BITS);
  743.                     buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
  744.                     break;
  745.                 default:
  746.                     throw new IllegalStateException("Impossible modulus " + context.modulus);
  747.             }
  748.         }
  749.     }

  750.     /**
  751.      * <p>
  752.      * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with
  753.      * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, to flush last
  754.      * remaining bytes (if not multiple of 3).
  755.      * </p>
  756.      * <p><strong>Note: No padding is added when encoding using the URL-safe alphabet.</strong></p>
  757.      * <p>
  758.      * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
  759.      * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
  760.      * </p>
  761.      *
  762.      * @param in
  763.      *            byte[] array of binary data to base64 encode.
  764.      * @param inPos
  765.      *            Position to start reading data from.
  766.      * @param inAvail
  767.      *            Amount of bytes available from input for encoding.
  768.      * @param context
  769.      *            the context to be used
  770.      */
  771.     @Override
  772.     void encode(final byte[] in, int inPos, final int inAvail, final Context context) {
  773.         if (context.eof) {
  774.             return;
  775.         }
  776.         // inAvail < 0 is how we're informed of EOF in the underlying data we're
  777.         // encoding.
  778.         if (inAvail < 0) {
  779.             context.eof = true;
  780.             if (0 == context.modulus && lineLength == 0) {
  781.                 return; // no leftovers to process and not using chunking
  782.             }
  783.             final byte[] buffer = ensureBufferSize(encodeSize, context);
  784.             final int savedPos = context.pos;
  785.             switch (context.modulus) { // 0-2
  786.                 case 0 : // nothing to do here
  787.                     break;
  788.                 case 1 : // 8 bits = 6 + 2
  789.                     // top 6 bits:
  790.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 2 & MASK_6_BITS];
  791.                     // remaining 2:
  792.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea << 4 & MASK_6_BITS];
  793.                     // URL-SAFE skips the padding to further reduce size.
  794.                     if (encodeTable == STANDARD_ENCODE_TABLE) {
  795.                         buffer[context.pos++] = pad;
  796.                         buffer[context.pos++] = pad;
  797.                     }
  798.                     break;

  799.                 case 2 : // 16 bits = 6 + 6 + 4
  800.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 10 & MASK_6_BITS];
  801.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 4 & MASK_6_BITS];
  802.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea << 2 & MASK_6_BITS];
  803.                     // URL-SAFE skips the padding to further reduce size.
  804.                     if (encodeTable == STANDARD_ENCODE_TABLE) {
  805.                         buffer[context.pos++] = pad;
  806.                     }
  807.                     break;
  808.                 default:
  809.                     throw new IllegalStateException("Impossible modulus " + context.modulus);
  810.             }
  811.             context.currentLinePos += context.pos - savedPos; // keep track of current line position
  812.             // if currentPos == 0 we are at the start of a line, so don't add CRLF
  813.             if (lineLength > 0 && context.currentLinePos > 0) {
  814.                 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
  815.                 context.pos += lineSeparator.length;
  816.             }
  817.         } else {
  818.             for (int i = 0; i < inAvail; i++) {
  819.                 final byte[] buffer = ensureBufferSize(encodeSize, context);
  820.                 context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK;
  821.                 int b = in[inPos++];
  822.                 if (b < 0) {
  823.                     b += 256;
  824.                 }
  825.                 context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE
  826.                 if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract
  827.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 18 & MASK_6_BITS];
  828.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 12 & MASK_6_BITS];
  829.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 6 & MASK_6_BITS];
  830.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6_BITS];
  831.                     context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
  832.                     if (lineLength > 0 && lineLength <= context.currentLinePos) {
  833.                         System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
  834.                         context.pos += lineSeparator.length;
  835.                         context.currentLinePos = 0;
  836.                     }
  837.                 }
  838.             }
  839.         }
  840.     }

  841.     /**
  842.      * Gets the line separator (for testing only).
  843.      *
  844.      * @return the line separator.
  845.      */
  846.     byte[] getLineSeparator() {
  847.         return lineSeparator;
  848.     }

  849.     /**
  850.      * Returns whether or not the {@code octet} is in the Base64 alphabet.
  851.      *
  852.      * @param octet
  853.      *            The value to test
  854.      * @return {@code true} if the value is defined in the Base64 alphabet {@code false} otherwise.
  855.      */
  856.     @Override
  857.     protected boolean isInAlphabet(final byte octet) {
  858.         return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
  859.     }

  860.     /**
  861.      * Returns our current encode mode. True if we're URL-safe, false otherwise.
  862.      *
  863.      * @return true if we're in URL-safe mode, false otherwise.
  864.      * @since 1.4
  865.      */
  866.     public boolean isUrlSafe() {
  867.         return isUrlSafe;
  868.     }

  869.     /**
  870.      * Validates whether decoding the final trailing character is possible in the context
  871.      * of the set of possible base 64 values.
  872.      * <p>
  873.      * The character is valid if the lower bits within the provided mask are zero. This
  874.      * is used to test the final trailing base-64 digit is zero in the bits that will be discarded.
  875.      * </p>
  876.      *
  877.      * @param emptyBitsMask The mask of the lower bits that should be empty
  878.      * @param context the context to be used
  879.      * @throws IllegalArgumentException if the bits being checked contain any non-zero value
  880.      */
  881.     private void validateCharacter(final int emptyBitsMask, final Context context) {
  882.         if (isStrictDecoding() && (context.ibitWorkArea & emptyBitsMask) != 0) {
  883.             throw new IllegalArgumentException(
  884.                 "Strict decoding: Last encoded character (before the paddings if any) is a valid " +
  885.                 "base 64 alphabet but not a possible encoding. " +
  886.                 "Expected the discarded bits from the character to be zero.");
  887.         }
  888.     }

  889.     /**
  890.      * Validates whether decoding allows an entire final trailing character that cannot be
  891.      * used for a complete byte.
  892.      *
  893.      * @throws IllegalArgumentException if strict decoding is enabled
  894.      */
  895.     private void validateTrailingCharacter() {
  896.         if (isStrictDecoding()) {
  897.             throw new IllegalArgumentException(
  898.                 "Strict decoding: Last encoded character (before the paddings if any) is a valid " +
  899.                 "base 64 alphabet but not a possible encoding. " +
  900.                 "Decoding requires at least two trailing 6-bit characters to create bytes.");
  901.         }
  902.     }

  903. }