Base64.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.codec.binary;

  18. import java.math.BigInteger;
  19. import java.util.Arrays;
  20. import java.util.Objects;

  21. import org.apache.commons.codec.CodecPolicy;

  22. /**
  23.  * Provides Base64 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
  24.  *
  25.  * <p>
  26.  * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
  27.  * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
  28.  * </p>
  29.  * <p>
  30.  * The class can be parameterized in the following manner with various constructors:
  31.  * </p>
  32.  * <ul>
  33.  * <li>URL-safe mode: Default off.</li>
  34.  * <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of
  35.  * 4 in the encoded data.
  36.  * <li>Line separator: Default is CRLF ("\r\n")</li>
  37.  * </ul>
  38.  * <p>
  39.  * The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes.
  40.  * </p>
  41.  * <p>
  42.  * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only
  43.  * encode/decode character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252,
  44.  * UTF-8, etc).
  45.  * </p>
  46.  * <p>
  47.  * This class is thread-safe.
  48.  * </p>
  49.  * <p>
  50.  * You can configure instances with the {@link Builder}.
  51.  * </p>
  52.  * <pre>
  53.  * Base64 base64 = Base64.builder()
  54.  *   .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient, null resets to default
  55.  *   .setEncodeTable(customEncodeTable)         // default is built in, null resets to default
  56.  *   .setLineLength(0)                          // default is none
  57.  *   .setLineSeparator('\r', '\n')              // default is CR LF, null resets to default
  58.  *   .setPadding('=')                           // default is =
  59.  *   .setUrlSafe(false)                         // default is false
  60.  *   .get()
  61.  * </pre>
  62.  *
  63.  * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
  64.  * @since 1.0
  65.  */
  66. public class Base64 extends BaseNCodec {

  67.     /**
  68.      * Builds {@link Base64} instances.
  69.      *
  70.      * @since 1.17.0
  71.      */
  72.     public static class Builder extends AbstractBuilder<Base64, Builder> {

  73.         /**
  74.          * Constructs a new instance.
  75.          */
  76.         public Builder() {
  77.             super(STANDARD_ENCODE_TABLE);
  78.         }

  79.         @Override
  80.         public Base64 get() {
  81.             return new Base64(getLineLength(), getLineSeparator(), getPadding(), getEncodeTable(), getDecodingPolicy());
  82.         }

  83.         /**
  84.          * Sets the URL-safe encoding policy.
  85.          *
  86.          * @param urlSafe URL-safe encoding policy, null resets to the default.
  87.          * @return {@code this} instance.
  88.          */
  89.         public Builder setUrlSafe(final boolean urlSafe) {
  90.             return setEncodeTable(toUrlSafeEncodeTable(urlSafe));
  91.         }

  92.     }

  93.     /**
  94.      * BASE64 characters are 6 bits in length.
  95.      * They are formed by taking a block of 3 octets to form a 24-bit string,
  96.      * which is converted into 4 BASE64 characters.
  97.      */
  98.     private static final int BITS_PER_ENCODED_BYTE = 6;
  99.     private static final int BYTES_PER_UNENCODED_BLOCK = 3;
  100.     private static final int BYTES_PER_ENCODED_BLOCK = 4;
  101.     private static final int ALPHABET_LENGTH = 64;
  102.     private static final int DECODING_TABLE_LENGTH = 256;

  103.     /**
  104.      * This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet"
  105.      * equivalents as specified in Table 1 of RFC 2045.
  106.      * <p>
  107.      * Thanks to "commons" project in ws.apache.org for this code.
  108.      * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
  109.      * </p>
  110.      */
  111.     private static final byte[] STANDARD_ENCODE_TABLE = {
  112.             'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  113.             'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  114.             'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
  115.             'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
  116.             '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
  117.     };

  118.     /**
  119.      * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and /
  120.      * changed to - and _ to make the encoded Base64 results more URL-SAFE.
  121.      * This table is only used when the Base64's mode is set to URL-SAFE.
  122.      */
  123.     private static final byte[] URL_SAFE_ENCODE_TABLE = {
  124.             'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  125.             'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  126.             'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
  127.             'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
  128.             '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
  129.     };

  130.     /**
  131.      * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified
  132.      * in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64
  133.      * alphabet but fall within the bounds of the array are translated to -1.
  134.      * <p>
  135.      * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both
  136.      * URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit).
  137.      * </p>
  138.      * <p>
  139.      * Thanks to "commons" project in ws.apache.org for this code.
  140.      * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
  141.      * </p>
  142.      */
  143.     private static final byte[] DECODE_TABLE = {
  144.         //   0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
  145.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
  146.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
  147.             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, // 20-2f + - /
  148.             52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
  149.             -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 40-4f A-O
  150.             15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _
  151.             -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o
  152.             41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51                      // 70-7a p-z
  153.     };

  154.     /**
  155.      * Base64 uses 6-bit fields.
  156.      */
  157.     /** Mask used to extract 6 bits, used when encoding */
  158.     private static final int MASK_6BITS = 0x3f;

  159.     // The static final fields above are used for the original static byte[] methods on Base64.
  160.     // The private member fields below are used with the new streaming approach, which requires
  161.     // some state be preserved between calls of encode() and decode().

  162.     /** Mask used to extract 4 bits, used when decoding final trailing character. */
  163.     private static final int MASK_4BITS = 0xf;
  164.     /** Mask used to extract 2 bits, used when decoding final trailing character. */
  165.     private static final int MASK_2BITS = 0x3;

  166.     /**
  167.      * Creates a new Builder.
  168.      *
  169.      * @return a new Builder.
  170.      * @since 1.17.0
  171.      */
  172.     public static Builder builder() {
  173.         return new Builder();
  174.     }

  175.     /**
  176.      * Decodes Base64 data into octets.
  177.      * <p>
  178.      * <strong>Note:</strong> this method seamlessly handles data encoded in URL-safe or normal mode.
  179.      * </p>
  180.      *
  181.      * @param base64Data
  182.      *            Byte array containing Base64 data
  183.      * @return Array containing decoded data.
  184.      */
  185.     public static byte[] decodeBase64(final byte[] base64Data) {
  186.         return new Base64().decode(base64Data);
  187.     }

  188.     /**
  189.      * Decodes a Base64 String into octets.
  190.      * <p>
  191.      * <strong>Note:</strong> this method seamlessly handles data encoded in URL-safe or normal mode.
  192.      * </p>
  193.      *
  194.      * @param base64String
  195.      *            String containing Base64 data
  196.      * @return Array containing decoded data.
  197.      * @since 1.4
  198.      */
  199.     public static byte[] decodeBase64(final String base64String) {
  200.         return new Base64().decode(base64String);
  201.     }

  202.     /**
  203.      * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
  204.      *
  205.      * @param pArray
  206.      *            a byte array containing base64 character data
  207.      * @return A BigInteger
  208.      * @since 1.4
  209.      */
  210.     public static BigInteger decodeInteger(final byte[] pArray) {
  211.         return new BigInteger(1, decodeBase64(pArray));
  212.     }

  213.     /**
  214.      * Encodes binary data using the base64 algorithm but does not chunk the output.
  215.      *
  216.      * @param binaryData
  217.      *            binary data to encode
  218.      * @return byte[] containing Base64 characters in their UTF-8 representation.
  219.      */
  220.     public static byte[] encodeBase64(final byte[] binaryData) {
  221.         return encodeBase64(binaryData, false);
  222.     }

  223.     /**
  224.      * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
  225.      *
  226.      * @param binaryData
  227.      *            Array containing binary data to encode.
  228.      * @param isChunked
  229.      *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
  230.      * @return Base64-encoded data.
  231.      * @throws IllegalArgumentException
  232.      *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
  233.      */
  234.     public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked) {
  235.         return encodeBase64(binaryData, isChunked, false);
  236.     }

  237.     /**
  238.      * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
  239.      *
  240.      * @param binaryData
  241.      *            Array containing binary data to encode.
  242.      * @param isChunked
  243.      *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
  244.      * @param urlSafe
  245.      *            if {@code true} this encoder will emit - and _ instead of the usual + and / characters.
  246.      *            <strong>Note: No padding is added when encoding using the URL-safe alphabet.</strong>
  247.      * @return Base64-encoded data.
  248.      * @throws IllegalArgumentException
  249.      *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
  250.      * @since 1.4
  251.      */
  252.     public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe) {
  253.         return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
  254.     }

  255.     /**
  256.      * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
  257.      *
  258.      * @param binaryData
  259.      *            Array containing binary data to encode.
  260.      * @param isChunked
  261.      *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
  262.      * @param urlSafe
  263.      *            if {@code true} this encoder will emit - and _ instead of the usual + and / characters.
  264.      *            <strong>Note: No padding is added when encoding using the URL-safe alphabet.</strong>
  265.      * @param maxResultSize
  266.      *            The maximum result size to accept.
  267.      * @return Base64-encoded data.
  268.      * @throws IllegalArgumentException
  269.      *             Thrown when the input array needs an output array bigger than maxResultSize
  270.      * @since 1.4
  271.      */
  272.     public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked,
  273.                                       final boolean urlSafe, final int maxResultSize) {
  274.         if (BinaryCodec.isEmpty(binaryData)) {
  275.             return binaryData;
  276.         }
  277.         // Create this so can use the super-class method
  278.         // Also ensures that the same roundings are performed by the ctor and the code
  279.         final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
  280.         final long len = b64.getEncodedLength(binaryData);
  281.         if (len > maxResultSize) {
  282.             throw new IllegalArgumentException("Input array too big, the output array would be bigger (" +
  283.                 len +
  284.                 ") than the specified maximum size of " +
  285.                 maxResultSize);
  286.         }
  287.         return b64.encode(binaryData);
  288.     }

  289.     /**
  290.      * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
  291.      *
  292.      * @param binaryData
  293.      *            binary data to encode
  294.      * @return Base64 characters chunked in 76 character blocks
  295.      */
  296.     public static byte[] encodeBase64Chunked(final byte[] binaryData) {
  297.         return encodeBase64(binaryData, true);
  298.     }

  299.     /**
  300.      * Encodes binary data using the base64 algorithm but does not chunk the output.
  301.      *
  302.      * NOTE:  We changed the behavior of this method from multi-line chunking (commons-codec-1.4) to
  303.      * single-line non-chunking (commons-codec-1.5).
  304.      *
  305.      * @param binaryData
  306.      *            binary data to encode
  307.      * @return String containing Base64 characters.
  308.      * @since 1.4 (NOTE:  1.4 chunked the output, whereas 1.5 does not).
  309.      */
  310.     public static String encodeBase64String(final byte[] binaryData) {
  311.         return StringUtils.newStringUsAscii(encodeBase64(binaryData, false));
  312.     }

  313.     /**
  314.      * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
  315.      * url-safe variation emits - and _ instead of + and / characters.
  316.      * <strong>Note: No padding is added.</strong>
  317.      * @param binaryData
  318.      *            binary data to encode
  319.      * @return byte[] containing Base64 characters in their UTF-8 representation.
  320.      * @since 1.4
  321.      */
  322.     public static byte[] encodeBase64URLSafe(final byte[] binaryData) {
  323.         return encodeBase64(binaryData, false, true);
  324.     }

  325.     /**
  326.      * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
  327.      * url-safe variation emits - and _ instead of + and / characters.
  328.      * <strong>Note: No padding is added.</strong>
  329.      * @param binaryData
  330.      *            binary data to encode
  331.      * @return String containing Base64 characters
  332.      * @since 1.4
  333.      */
  334.     public static String encodeBase64URLSafeString(final byte[] binaryData) {
  335.         return StringUtils.newStringUsAscii(encodeBase64(binaryData, false, true));
  336.     }

  337.     /**
  338.      * Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
  339.      *
  340.      * @param bigInteger
  341.      *            a BigInteger
  342.      * @return A byte array containing base64 character data
  343.      * @throws NullPointerException
  344.      *             if null is passed in
  345.      * @since 1.4
  346.      */
  347.     public static byte[] encodeInteger(final BigInteger bigInteger) {
  348.         Objects.requireNonNull(bigInteger, "bigInteger");
  349.         return encodeBase64(toIntegerBytes(bigInteger), false);
  350.     }

  351.     /**
  352.      * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
  353.      * method treats whitespace as valid.
  354.      *
  355.      * @param arrayOctet
  356.      *            byte array to test
  357.      * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
  358.      *         {@code false}, otherwise
  359.      * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0.
  360.      */
  361.     @Deprecated
  362.     public static boolean isArrayByteBase64(final byte[] arrayOctet) {
  363.         return isBase64(arrayOctet);
  364.     }

  365.     /**
  366.      * Returns whether or not the {@code octet} is in the base 64 alphabet.
  367.      *
  368.      * @param octet
  369.      *            The value to test
  370.      * @return {@code true} if the value is defined in the base 64 alphabet, {@code false} otherwise.
  371.      * @since 1.4
  372.      */
  373.     public static boolean isBase64(final byte octet) {
  374.         return octet == PAD_DEFAULT || octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1;
  375.     }

  376.     /**
  377.      * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
  378.      * method treats whitespace as valid.
  379.      *
  380.      * @param arrayOctet
  381.      *            byte array to test
  382.      * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
  383.      *         {@code false}, otherwise
  384.      * @since 1.5
  385.      */
  386.     public static boolean isBase64(final byte[] arrayOctet) {
  387.         for (final byte element : arrayOctet) {
  388.             if (!isBase64(element) && !Character.isWhitespace(element)) {
  389.                 return false;
  390.             }
  391.         }
  392.         return true;
  393.     }

  394.     /**
  395.      * Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the
  396.      * method treats whitespace as valid.
  397.      *
  398.      * @param base64
  399.      *            String to test
  400.      * @return {@code true} if all characters in the String are valid characters in the Base64 alphabet or if
  401.      *         the String is empty; {@code false}, otherwise
  402.      *  @since 1.5
  403.      */
  404.     public static boolean isBase64(final String base64) {
  405.         return isBase64(StringUtils.getBytesUtf8(base64));
  406.     }

  407.     /**
  408.      * Returns a byte-array representation of a {@code BigInteger} without sign bit.
  409.      *
  410.      * @param bigInt
  411.      *            {@code BigInteger} to be converted
  412.      * @return a byte array representation of the BigInteger parameter
  413.      */
  414.     static byte[] toIntegerBytes(final BigInteger bigInt) {
  415.         int bitlen = bigInt.bitLength();
  416.         // round bitlen
  417.         bitlen = bitlen + 7 >> 3 << 3;
  418.         final byte[] bigBytes = bigInt.toByteArray();

  419.         if (bigInt.bitLength() % 8 != 0 && bigInt.bitLength() / 8 + 1 == bitlen / 8) {
  420.             return bigBytes;
  421.         }
  422.         // set up params for copying everything but sign bit
  423.         int startSrc = 0;
  424.         int len = bigBytes.length;

  425.         // if bigInt is exactly byte-aligned, just skip signbit in copy
  426.         if (bigInt.bitLength() % 8 == 0) {
  427.             startSrc = 1;
  428.             len--;
  429.         }
  430.         final int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
  431.         final byte[] resizedBytes = new byte[bitlen / 8];
  432.         System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
  433.         return resizedBytes;
  434.     }

  435.     private static byte[] toUrlSafeEncodeTable(final boolean urlSafe) {
  436.         return urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
  437.     }

  438.     /**
  439.      * Encode table to use: either STANDARD or URL_SAFE or custom.
  440.      * Note: the DECODE_TABLE above remains static because it is able
  441.      * to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch
  442.      * between the two modes.
  443.      */
  444.     private final byte[] encodeTable;

  445.     /**
  446.      * Decode table to use.
  447.      */
  448.     private final byte[] decodeTable;

  449.     /**
  450.      * Line separator for encoding. Not used when decoding. Only used if lineLength &gt; 0.
  451.      */
  452.     private final byte[] lineSeparator;

  453.     /**
  454.      * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
  455.      * {@code encodeSize = 4 + lineSeparator.length;}
  456.      */
  457.     private final int encodeSize;

  458.     private final boolean isUrlSafe;

  459.     /**
  460.      * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
  461.      * <p>
  462.      * When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE.
  463.      * </p>
  464.      * <p>
  465.      * When decoding all variants are supported.
  466.      * </p>
  467.      */
  468.     public Base64() {
  469.         this(0);
  470.     }

  471.     /**
  472.      * Constructs a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode.
  473.      * <p>
  474.      * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
  475.      * </p>
  476.      * <p>
  477.      * When decoding all variants are supported.
  478.      * </p>
  479.      *
  480.      * @param urlSafe
  481.      *            if {@code true}, URL-safe encoding is used. In most cases this should be set to
  482.      *            {@code false}.
  483.      * @since 1.4
  484.      */
  485.     public Base64(final boolean urlSafe) {
  486.         this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
  487.     }

  488.     /**
  489.      * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
  490.      * <p>
  491.      * When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is
  492.      * STANDARD_ENCODE_TABLE.
  493.      * </p>
  494.      * <p>
  495.      * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
  496.      * </p>
  497.      * <p>
  498.      * When decoding all variants are supported.
  499.      * </p>
  500.      *
  501.      * @param lineLength
  502.      *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
  503.      *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
  504.      *            decoding.
  505.      * @since 1.4
  506.      */
  507.     public Base64(final int lineLength) {
  508.         this(lineLength, CHUNK_SEPARATOR);
  509.     }

  510.     /**
  511.      * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
  512.      * <p>
  513.      * When encoding the line length and line separator are given in the constructor, and the encoding table is
  514.      * STANDARD_ENCODE_TABLE.
  515.      * </p>
  516.      * <p>
  517.      * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
  518.      * </p>
  519.      * <p>
  520.      * When decoding all variants are supported.
  521.      * </p>
  522.      *
  523.      * @param lineLength
  524.      *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
  525.      *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
  526.      *            decoding.
  527.      * @param lineSeparator
  528.      *            Each line of encoded data will end with this sequence of bytes.
  529.      * @throws IllegalArgumentException
  530.      *             Thrown when the provided lineSeparator included some base64 characters.
  531.      * @since 1.4
  532.      */
  533.     public Base64(final int lineLength, final byte[] lineSeparator) {
  534.         this(lineLength, lineSeparator, false);
  535.     }

  536.     /**
  537.      * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
  538.      * <p>
  539.      * When encoding the line length and line separator are given in the constructor, and the encoding table is
  540.      * STANDARD_ENCODE_TABLE.
  541.      * </p>
  542.      * <p>
  543.      * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
  544.      * </p>
  545.      * <p>
  546.      * When decoding all variants are supported.
  547.      * </p>
  548.      *
  549.      * @param lineLength
  550.      *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
  551.      *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
  552.      *            decoding.
  553.      * @param lineSeparator
  554.      *            Each line of encoded data will end with this sequence of bytes.
  555.      * @param urlSafe
  556.      *            Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
  557.      *            operations. Decoding seamlessly handles both modes.
  558.      *            <strong>Note: No padding is added when using the URL-safe alphabet.</strong>
  559.      * @throws IllegalArgumentException
  560.      *             Thrown when the {@code lineSeparator} contains Base64 characters.
  561.      * @since 1.4
  562.      */
  563.     public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe) {
  564.         this(lineLength, lineSeparator, PAD_DEFAULT, toUrlSafeEncodeTable(urlSafe), DECODING_POLICY_DEFAULT);
  565.     }

  566.     /**
  567.      * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
  568.      * <p>
  569.      * When encoding the line length and line separator are given in the constructor, and the encoding table is
  570.      * STANDARD_ENCODE_TABLE.
  571.      * </p>
  572.      * <p>
  573.      * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
  574.      * </p>
  575.      * <p>
  576.      * When decoding all variants are supported.
  577.      * </p>
  578.      *
  579.      * @param lineLength
  580.      *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
  581.      *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
  582.      *            decoding.
  583.      * @param lineSeparator
  584.      *            Each line of encoded data will end with this sequence of bytes.
  585.      * @param urlSafe
  586.      *            Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
  587.      *            operations. Decoding seamlessly handles both modes.
  588.      *            <strong>Note: No padding is added when using the URL-safe alphabet.</strong>
  589.      * @param decodingPolicy The decoding policy.
  590.      * @throws IllegalArgumentException
  591.      *             Thrown when the {@code lineSeparator} contains Base64 characters.
  592.      * @since 1.15
  593.      */
  594.     public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe, final CodecPolicy decodingPolicy) {
  595.         this(lineLength, lineSeparator, PAD_DEFAULT, toUrlSafeEncodeTable(urlSafe), decodingPolicy);
  596.     }

  597.     /**
  598.      * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
  599.      * <p>
  600.      * When encoding the line length and line separator are given in the constructor, and the encoding table is STANDARD_ENCODE_TABLE.
  601.      * </p>
  602.      * <p>
  603.      * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
  604.      * </p>
  605.      * <p>
  606.      * When decoding all variants are supported.
  607.      * </p>
  608.      *
  609.      * @param lineLength     Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 4). If lineLength &lt;= 0,
  610.      *                       then the output will not be divided into lines (chunks). Ignored when decoding.
  611.      * @param lineSeparator  Each line of encoded data will end with this sequence of bytes; the constructor makes a defensive copy. May be null.
  612.      * @param padding        padding byte.
  613.      * @param encodeTable    The manual encodeTable - a byte array of 64 chars.
  614.      * @param decodingPolicy The decoding policy.
  615.      * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base64 characters.
  616.      */
  617.     private Base64(final int lineLength, final byte[] lineSeparator, final byte padding, final byte[] encodeTable, final CodecPolicy decodingPolicy) {
  618.         super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, toLength(lineSeparator), padding, decodingPolicy);
  619.         Objects.requireNonNull(encodeTable, "encodeTable");
  620.         if (encodeTable.length != ALPHABET_LENGTH) {
  621.             throw new IllegalArgumentException("encodeTable must have exactly 64 entries.");
  622.         }
  623.         this.isUrlSafe = encodeTable == URL_SAFE_ENCODE_TABLE;
  624.         if (encodeTable == STANDARD_ENCODE_TABLE || this.isUrlSafe) {
  625.             decodeTable = DECODE_TABLE;
  626.             // No need of a defensive copy of an internal table.
  627.             this.encodeTable = encodeTable;
  628.         } else {
  629.             this.encodeTable = encodeTable.clone();
  630.             this.decodeTable = calculateDecodeTable(this.encodeTable);
  631.         }
  632.         // TODO could be simplified if there is no requirement to reject invalid line sep when length <=0
  633.         // @see test case Base64Test.testConstructors()
  634.         if (lineSeparator != null) {
  635.             final byte[] lineSeparatorCopy = lineSeparator.clone();
  636.             if (containsAlphabetOrPad(lineSeparatorCopy)) {
  637.                 final String sep = StringUtils.newStringUtf8(lineSeparatorCopy);
  638.                 throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]");
  639.             }
  640.             if (lineLength > 0) { // null line-sep forces no chunking rather than throwing IAE
  641.                 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparatorCopy.length;
  642.                 this.lineSeparator = lineSeparatorCopy;
  643.             } else {
  644.                 this.encodeSize = BYTES_PER_ENCODED_BLOCK;
  645.                 this.lineSeparator = null;
  646.             }
  647.         } else {
  648.             this.encodeSize = BYTES_PER_ENCODED_BLOCK;
  649.             this.lineSeparator = null;
  650.         }
  651.     }

  652.     /**
  653.      * Calculates a decode table for a given encode table.
  654.      *
  655.      * @param encodeTable that is used to determine decode lookup table
  656.      * @return decodeTable
  657.      */
  658.     private byte[] calculateDecodeTable(final byte[] encodeTable) {
  659.         final byte[] decodeTable = new byte[DECODING_TABLE_LENGTH];
  660.         Arrays.fill(decodeTable, (byte) -1);
  661.         for (int i = 0; i < encodeTable.length; i++) {
  662.             decodeTable[encodeTable[i]] = (byte) i;
  663.         }
  664.         return decodeTable;
  665.     }

  666.     /**
  667.      * <p>
  668.      * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once
  669.      * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1"
  670.      * call is not necessary when decoding, but it doesn't hurt, either.
  671.      * </p>
  672.      * <p>
  673.      * Ignores all non-base64 characters. This is how chunked (for example 76 character) data is handled, since CR and LF are
  674.      * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in,
  675.      * garbage-out philosophy: it will not check the provided data for validity.
  676.      * </p>
  677.      * <p>
  678.      * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
  679.      * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
  680.      * </p>
  681.      *
  682.      * @param input
  683.      *            byte[] array of ASCII data to base64 decode.
  684.      * @param inPos
  685.      *            Position to start reading data from.
  686.      * @param inAvail
  687.      *            Amount of bytes available from input for decoding.
  688.      * @param context
  689.      *            the context to be used
  690.      */
  691.     @Override
  692.     void decode(final byte[] input, int inPos, final int inAvail, final Context context) {
  693.         if (context.eof) {
  694.             return;
  695.         }
  696.         if (inAvail < 0) {
  697.             context.eof = true;
  698.         }
  699.         final int decodeSize = this.encodeSize - 1;
  700.         for (int i = 0; i < inAvail; i++) {
  701.             final byte[] buffer = ensureBufferSize(decodeSize, context);
  702.             final byte b = input[inPos++];
  703.             if (b == pad) {
  704.                 // We're done.
  705.                 context.eof = true;
  706.                 break;
  707.             }
  708.             if (b >= 0 && b < decodeTable.length) {
  709.                 final int result = decodeTable[b];
  710.                 if (result >= 0) {
  711.                     context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK;
  712.                     context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result;
  713.                     if (context.modulus == 0) {
  714.                         buffer[context.pos++] = (byte) (context.ibitWorkArea >> 16 & MASK_8BITS);
  715.                         buffer[context.pos++] = (byte) (context.ibitWorkArea >> 8 & MASK_8BITS);
  716.                         buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
  717.                     }
  718.                 }
  719.             }
  720.         }

  721.         // Two forms of EOF as far as base64 decoder is concerned: actual
  722.         // EOF (-1) and first time '=' character is encountered in stream.
  723.         // This approach makes the '=' padding characters completely optional.
  724.         if (context.eof && context.modulus != 0) {
  725.             final byte[] buffer = ensureBufferSize(decodeSize, context);

  726.             // We have some spare bits remaining
  727.             // Output all whole multiples of 8 bits and ignore the rest
  728.             switch (context.modulus) {
  729. //              case 0 : // impossible, as excluded above
  730.                 case 1 : // 6 bits - either ignore entirely, or raise an exception
  731.                     validateTrailingCharacter();
  732.                     break;
  733.                 case 2 : // 12 bits = 8 + 4
  734.                     validateCharacter(MASK_4BITS, context);
  735.                     context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits
  736.                     buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
  737.                     break;
  738.                 case 3 : // 18 bits = 8 + 8 + 2
  739.                     validateCharacter(MASK_2BITS, context);
  740.                     context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits
  741.                     buffer[context.pos++] = (byte) (context.ibitWorkArea >> 8 & MASK_8BITS);
  742.                     buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
  743.                     break;
  744.                 default:
  745.                     throw new IllegalStateException("Impossible modulus " + context.modulus);
  746.             }
  747.         }
  748.     }

  749.     /**
  750.      * <p>
  751.      * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with
  752.      * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, to flush last
  753.      * remaining bytes (if not multiple of 3).
  754.      * </p>
  755.      * <p><strong>Note: No padding is added when encoding using the URL-safe alphabet.</strong></p>
  756.      * <p>
  757.      * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
  758.      * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
  759.      * </p>
  760.      *
  761.      * @param in
  762.      *            byte[] array of binary data to base64 encode.
  763.      * @param inPos
  764.      *            Position to start reading data from.
  765.      * @param inAvail
  766.      *            Amount of bytes available from input for encoding.
  767.      * @param context
  768.      *            the context to be used
  769.      */
  770.     @Override
  771.     void encode(final byte[] in, int inPos, final int inAvail, final Context context) {
  772.         if (context.eof) {
  773.             return;
  774.         }
  775.         // inAvail < 0 is how we're informed of EOF in the underlying data we're
  776.         // encoding.
  777.         if (inAvail < 0) {
  778.             context.eof = true;
  779.             if (0 == context.modulus && lineLength == 0) {
  780.                 return; // no leftovers to process and not using chunking
  781.             }
  782.             final byte[] buffer = ensureBufferSize(encodeSize, context);
  783.             final int savedPos = context.pos;
  784.             switch (context.modulus) { // 0-2
  785.                 case 0 : // nothing to do here
  786.                     break;
  787.                 case 1 : // 8 bits = 6 + 2
  788.                     // top 6 bits:
  789.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 2 & MASK_6BITS];
  790.                     // remaining 2:
  791.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea << 4 & MASK_6BITS];
  792.                     // URL-SAFE skips the padding to further reduce size.
  793.                     if (encodeTable == STANDARD_ENCODE_TABLE) {
  794.                         buffer[context.pos++] = pad;
  795.                         buffer[context.pos++] = pad;
  796.                     }
  797.                     break;

  798.                 case 2 : // 16 bits = 6 + 6 + 4
  799.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 10 & MASK_6BITS];
  800.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 4 & MASK_6BITS];
  801.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea << 2 & MASK_6BITS];
  802.                     // URL-SAFE skips the padding to further reduce size.
  803.                     if (encodeTable == STANDARD_ENCODE_TABLE) {
  804.                         buffer[context.pos++] = pad;
  805.                     }
  806.                     break;
  807.                 default:
  808.                     throw new IllegalStateException("Impossible modulus " + context.modulus);
  809.             }
  810.             context.currentLinePos += context.pos - savedPos; // keep track of current line position
  811.             // if currentPos == 0 we are at the start of a line, so don't add CRLF
  812.             if (lineLength > 0 && context.currentLinePos > 0) {
  813.                 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
  814.                 context.pos += lineSeparator.length;
  815.             }
  816.         } else {
  817.             for (int i = 0; i < inAvail; i++) {
  818.                 final byte[] buffer = ensureBufferSize(encodeSize, context);
  819.                 context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK;
  820.                 int b = in[inPos++];
  821.                 if (b < 0) {
  822.                     b += 256;
  823.                 }
  824.                 context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE
  825.                 if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract
  826.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 18 & MASK_6BITS];
  827.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 12 & MASK_6BITS];
  828.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 6 & MASK_6BITS];
  829.                     buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS];
  830.                     context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
  831.                     if (lineLength > 0 && lineLength <= context.currentLinePos) {
  832.                         System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
  833.                         context.pos += lineSeparator.length;
  834.                         context.currentLinePos = 0;
  835.                     }
  836.                 }
  837.             }
  838.         }
  839.     }

  840.     /**
  841.      * Gets the line separator (for testing only).
  842.      *
  843.      * @return the line separator.
  844.      */
  845.     byte[] getLineSeparator() {
  846.         return lineSeparator;
  847.     }

  848.     /**
  849.      * Returns whether or not the {@code octet} is in the Base64 alphabet.
  850.      *
  851.      * @param octet
  852.      *            The value to test
  853.      * @return {@code true} if the value is defined in the Base64 alphabet {@code false} otherwise.
  854.      */
  855.     @Override
  856.     protected boolean isInAlphabet(final byte octet) {
  857.         return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
  858.     }

  859.     /**
  860.      * Returns our current encode mode. True if we're URL-safe, false otherwise.
  861.      *
  862.      * @return true if we're in URL-safe mode, false otherwise.
  863.      * @since 1.4
  864.      */
  865.     public boolean isUrlSafe() {
  866.         return isUrlSafe;
  867.     }

  868.     /**
  869.      * Validates whether decoding the final trailing character is possible in the context
  870.      * of the set of possible base 64 values.
  871.      * <p>
  872.      * The character is valid if the lower bits within the provided mask are zero. This
  873.      * is used to test the final trailing base-64 digit is zero in the bits that will be discarded.
  874.      * </p>
  875.      *
  876.      * @param emptyBitsMask The mask of the lower bits that should be empty
  877.      * @param context the context to be used
  878.      * @throws IllegalArgumentException if the bits being checked contain any non-zero value
  879.      */
  880.     private void validateCharacter(final int emptyBitsMask, final Context context) {
  881.         if (isStrictDecoding() && (context.ibitWorkArea & emptyBitsMask) != 0) {
  882.             throw new IllegalArgumentException(
  883.                 "Strict decoding: Last encoded character (before the paddings if any) is a valid " +
  884.                 "base 64 alphabet but not a possible encoding. " +
  885.                 "Expected the discarded bits from the character to be zero.");
  886.         }
  887.     }

  888.     /**
  889.      * Validates whether decoding allows an entire final trailing character that cannot be
  890.      * used for a complete byte.
  891.      *
  892.      * @throws IllegalArgumentException if strict decoding is enabled
  893.      */
  894.     private void validateTrailingCharacter() {
  895.         if (isStrictDecoding()) {
  896.             throw new IllegalArgumentException(
  897.                 "Strict decoding: Last encoded character (before the paddings if any) is a valid " +
  898.                 "base 64 alphabet but not a possible encoding. " +
  899.                 "Decoding requires at least two trailing 6-bit characters to create bytes.");
  900.         }
  901.     }

  902. }