001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import java.math.BigInteger;
021import java.util.Objects;
022
023import org.apache.commons.codec.CodecPolicy;
024
025/**
026 * Provides Base64 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
027 *
028 * <p>
029 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
030 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
031 * </p>
032 * <p>
033 * The class can be parameterized in the following manner with various constructors:
034 * </p>
035 * <ul>
036 * <li>URL-safe mode: Default off.</li>
037 * <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of
038 * 4 in the encoded data.
039 * <li>Line separator: Default is CRLF ("\r\n")</li>
040 * </ul>
041 * <p>
042 * The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes.
043 * </p>
044 * <p>
045 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only
046 * encode/decode character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252,
047 * UTF-8, etc).
048 * </p>
049 * <p>
050 * This class is thread-safe.
051 * </p>
052 *
053 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
054 * @since 1.0
055 */
056public class Base64 extends BaseNCodec {
057
058    /**
059     * BASE32 characters are 6 bits in length.
060     * They are formed by taking a block of 3 octets to form a 24-bit string,
061     * which is converted into 4 BASE64 characters.
062     */
063    private static final int BITS_PER_ENCODED_BYTE = 6;
064    private static final int BYTES_PER_UNENCODED_BLOCK = 3;
065    private static final int BYTES_PER_ENCODED_BLOCK = 4;
066
067    /**
068     * This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet"
069     * equivalents as specified in Table 1 of RFC 2045.
070     * <p>
071     * Thanks to "commons" project in ws.apache.org for this code.
072     * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
073     * </p>
074     */
075    private static final byte[] STANDARD_ENCODE_TABLE = {
076            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
077            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
078            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
079            'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
080            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
081    };
082
083    /**
084     * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and /
085     * changed to - and _ to make the encoded Base64 results more URL-SAFE.
086     * This table is only used when the Base64's mode is set to URL-SAFE.
087     */
088    private static final byte[] URL_SAFE_ENCODE_TABLE = {
089            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
090            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
091            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
092            'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
093            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
094    };
095
096    /**
097     * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified
098     * in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64
099     * alphabet but fall within the bounds of the array are translated to -1.
100     * <p>
101     * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both
102     * URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit).
103     * </p>
104     * <p>
105     * Thanks to "commons" project in ws.apache.org for this code.
106     * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
107     * </p>
108     */
109    private static final byte[] DECODE_TABLE = {
110        //   0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
111            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
112            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
113            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, // 20-2f + - /
114            52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
115            -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 40-4f A-O
116            15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _
117            -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o
118            41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51                      // 70-7a p-z
119    };
120
121    // The static final fields above are used for the original static byte[] methods on Base64.
122    // The private member fields below are used with the new streaming approach, which requires
123    // some state be preserved between calls of encode() and decode().
124
125    /**
126     * Base64 uses 6-bit fields.
127     */
128    /** Mask used to extract 6 bits, used when encoding */
129    private static final int MASK_6BITS = 0x3f;
130    /** Mask used to extract 4 bits, used when decoding final trailing character. */
131    private static final int MASK_4BITS = 0xf;
132    /** Mask used to extract 2 bits, used when decoding final trailing character. */
133    private static final int MASK_2BITS = 0x3;
134
135    /**
136     * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able
137     * to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch
138     * between the two modes.
139     */
140    private final byte[] encodeTable;
141
142    /** Only one decode table currently; keep for consistency with Base32 code. */
143    private final byte[] decodeTable = DECODE_TABLE;
144
145    /**
146     * Line separator for encoding. Not used when decoding. Only used if lineLength &gt; 0.
147     */
148    private final byte[] lineSeparator;
149
150    /**
151     * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
152     * {@code decodeSize = 3 + lineSeparator.length;}
153     */
154    private final int decodeSize;
155
156    /**
157     * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
158     * {@code encodeSize = 4 + lineSeparator.length;}
159     */
160    private final int encodeSize;
161
162    /**
163     * Decodes Base64 data into octets.
164     * <p>
165     * <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode.
166     * </p>
167     *
168     * @param base64Data
169     *            Byte array containing Base64 data
170     * @return Array containing decoded data.
171     */
172    public static byte[] decodeBase64(final byte[] base64Data) {
173        return new Base64().decode(base64Data);
174    }
175
176    /**
177     * Decodes a Base64 String into octets.
178     * <p>
179     * <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode.
180     * </p>
181     *
182     * @param base64String
183     *            String containing Base64 data
184     * @return Array containing decoded data.
185     * @since 1.4
186     */
187    public static byte[] decodeBase64(final String base64String) {
188        return new Base64().decode(base64String);
189    }
190
191    // Implementation of integer encoding used for crypto
192    /**
193     * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
194     *
195     * @param pArray
196     *            a byte array containing base64 character data
197     * @return A BigInteger
198     * @since 1.4
199     */
200    public static BigInteger decodeInteger(final byte[] pArray) {
201        return new BigInteger(1, decodeBase64(pArray));
202    }
203
204    /**
205     * Encodes binary data using the base64 algorithm but does not chunk the output.
206     *
207     * @param binaryData
208     *            binary data to encode
209     * @return byte[] containing Base64 characters in their UTF-8 representation.
210     */
211    public static byte[] encodeBase64(final byte[] binaryData) {
212        return encodeBase64(binaryData, false);
213    }
214
215    /**
216     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
217     *
218     * @param binaryData
219     *            Array containing binary data to encode.
220     * @param isChunked
221     *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
222     * @return Base64-encoded data.
223     * @throws IllegalArgumentException
224     *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
225     */
226    public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked) {
227        return encodeBase64(binaryData, isChunked, false);
228    }
229
230    /**
231     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
232     *
233     * @param binaryData
234     *            Array containing binary data to encode.
235     * @param isChunked
236     *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
237     * @param urlSafe
238     *            if {@code true} this encoder will emit - and _ instead of the usual + and / characters.
239     *            <b>Note: no padding is added when encoding using the URL-safe alphabet.</b>
240     * @return Base64-encoded data.
241     * @throws IllegalArgumentException
242     *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
243     * @since 1.4
244     */
245    public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe) {
246        return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
247    }
248
249    /**
250     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
251     *
252     * @param binaryData
253     *            Array containing binary data to encode.
254     * @param isChunked
255     *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
256     * @param urlSafe
257     *            if {@code true} this encoder will emit - and _ instead of the usual + and / characters.
258     *            <b>Note: no padding is added when encoding using the URL-safe alphabet.</b>
259     * @param maxResultSize
260     *            The maximum result size to accept.
261     * @return Base64-encoded data.
262     * @throws IllegalArgumentException
263     *             Thrown when the input array needs an output array bigger than maxResultSize
264     * @since 1.4
265     */
266    public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked,
267                                      final boolean urlSafe, final int maxResultSize) {
268        if (BinaryCodec.isEmpty(binaryData)) {
269            return binaryData;
270        }
271
272        // Create this so can use the super-class method
273        // Also ensures that the same roundings are performed by the ctor and the code
274        final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
275        final long len = b64.getEncodedLength(binaryData);
276        if (len > maxResultSize) {
277            throw new IllegalArgumentException("Input array too big, the output array would be bigger (" +
278                len +
279                ") than the specified maximum size of " +
280                maxResultSize);
281        }
282
283        return b64.encode(binaryData);
284    }
285
286    /**
287     * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
288     *
289     * @param binaryData
290     *            binary data to encode
291     * @return Base64 characters chunked in 76 character blocks
292     */
293    public static byte[] encodeBase64Chunked(final byte[] binaryData) {
294        return encodeBase64(binaryData, true);
295    }
296
297    /**
298     * Encodes binary data using the base64 algorithm but does not chunk the output.
299     *
300     * NOTE:  We changed the behavior of this method from multi-line chunking (commons-codec-1.4) to
301     * single-line non-chunking (commons-codec-1.5).
302     *
303     * @param binaryData
304     *            binary data to encode
305     * @return String containing Base64 characters.
306     * @since 1.4 (NOTE:  1.4 chunked the output, whereas 1.5 does not).
307     */
308    public static String encodeBase64String(final byte[] binaryData) {
309        return StringUtils.newStringUsAscii(encodeBase64(binaryData, false));
310    }
311
312    /**
313     * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
314     * url-safe variation emits - and _ instead of + and / characters.
315     * <b>Note: no padding is added.</b>
316     * @param binaryData
317     *            binary data to encode
318     * @return byte[] containing Base64 characters in their UTF-8 representation.
319     * @since 1.4
320     */
321    public static byte[] encodeBase64URLSafe(final byte[] binaryData) {
322        return encodeBase64(binaryData, false, true);
323    }
324
325    /**
326     * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
327     * url-safe variation emits - and _ instead of + and / characters.
328     * <b>Note: no padding is added.</b>
329     * @param binaryData
330     *            binary data to encode
331     * @return String containing Base64 characters
332     * @since 1.4
333     */
334    public static String encodeBase64URLSafeString(final byte[] binaryData) {
335        return StringUtils.newStringUsAscii(encodeBase64(binaryData, false, true));
336    }
337
338    /**
339     * Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
340     *
341     * @param bigInteger
342     *            a BigInteger
343     * @return A byte array containing base64 character data
344     * @throws NullPointerException
345     *             if null is passed in
346     * @since 1.4
347     */
348    public static byte[] encodeInteger(final BigInteger bigInteger) {
349        Objects.requireNonNull(bigInteger, "bigInteger");
350        return encodeBase64(toIntegerBytes(bigInteger), false);
351    }
352
353    /**
354     * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
355     * method treats whitespace as valid.
356     *
357     * @param arrayOctet
358     *            byte array to test
359     * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
360     *         {@code false}, otherwise
361     * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0.
362     */
363    @Deprecated
364    public static boolean isArrayByteBase64(final byte[] arrayOctet) {
365        return isBase64(arrayOctet);
366    }
367
368    /**
369     * Returns whether or not the {@code octet} is in the base 64 alphabet.
370     *
371     * @param octet
372     *            The value to test
373     * @return {@code true} if the value is defined in the base 64 alphabet, {@code false} otherwise.
374     * @since 1.4
375     */
376    public static boolean isBase64(final byte octet) {
377        return octet == PAD_DEFAULT || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1);
378    }
379
380    /**
381     * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
382     * method treats whitespace as valid.
383     *
384     * @param arrayOctet
385     *            byte array to test
386     * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
387     *         {@code false}, otherwise
388     * @since 1.5
389     */
390    public static boolean isBase64(final byte[] arrayOctet) {
391        for (final byte element : arrayOctet) {
392            if (!isBase64(element) && !Character.isWhitespace(element)) {
393                return false;
394            }
395        }
396        return true;
397    }
398
399    /**
400     * Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the
401     * method treats whitespace as valid.
402     *
403     * @param base64
404     *            String to test
405     * @return {@code true} if all characters in the String are valid characters in the Base64 alphabet or if
406     *         the String is empty; {@code false}, otherwise
407     *  @since 1.5
408     */
409    public static boolean isBase64(final String base64) {
410        return isBase64(StringUtils.getBytesUtf8(base64));
411    }
412
413    /**
414     * Returns a byte-array representation of a {@code BigInteger} without sign bit.
415     *
416     * @param bigInt
417     *            {@code BigInteger} to be converted
418     * @return a byte array representation of the BigInteger parameter
419     */
420    static byte[] toIntegerBytes(final BigInteger bigInt) {
421        int bitlen = bigInt.bitLength();
422        // round bitlen
423        bitlen = ((bitlen + 7) >> 3) << 3;
424        final byte[] bigBytes = bigInt.toByteArray();
425
426        if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) {
427            return bigBytes;
428        }
429        // set up params for copying everything but sign bit
430        int startSrc = 0;
431        int len = bigBytes.length;
432
433        // if bigInt is exactly byte-aligned, just skip signbit in copy
434        if ((bigInt.bitLength() % 8) == 0) {
435            startSrc = 1;
436            len--;
437        }
438        final int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
439        final byte[] resizedBytes = new byte[bitlen / 8];
440        System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
441        return resizedBytes;
442    }
443
444    /**
445     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
446     * <p>
447     * When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE.
448     * </p>
449     *
450     * <p>
451     * When decoding all variants are supported.
452     * </p>
453     */
454    public Base64() {
455        this(0);
456    }
457
458    /**
459     * Creates a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode.
460     * <p>
461     * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
462     * </p>
463     *
464     * <p>
465     * When decoding all variants are supported.
466     * </p>
467     *
468     * @param urlSafe
469     *            if {@code true}, URL-safe encoding is used. In most cases this should be set to
470     *            {@code false}.
471     * @since 1.4
472     */
473    public Base64(final boolean urlSafe) {
474        this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
475    }
476
477    /**
478     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
479     * <p>
480     * When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is
481     * STANDARD_ENCODE_TABLE.
482     * </p>
483     * <p>
484     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
485     * </p>
486     * <p>
487     * When decoding all variants are supported.
488     * </p>
489     *
490     * @param lineLength
491     *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
492     *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
493     *            decoding.
494     * @since 1.4
495     */
496    public Base64(final int lineLength) {
497        this(lineLength, CHUNK_SEPARATOR);
498    }
499
500    /**
501     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
502     * <p>
503     * When encoding the line length and line separator are given in the constructor, and the encoding table is
504     * STANDARD_ENCODE_TABLE.
505     * </p>
506     * <p>
507     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
508     * </p>
509     * <p>
510     * When decoding all variants are supported.
511     * </p>
512     *
513     * @param lineLength
514     *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
515     *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
516     *            decoding.
517     * @param lineSeparator
518     *            Each line of encoded data will end with this sequence of bytes.
519     * @throws IllegalArgumentException
520     *             Thrown when the provided lineSeparator included some base64 characters.
521     * @since 1.4
522     */
523    public Base64(final int lineLength, final byte[] lineSeparator) {
524        this(lineLength, lineSeparator, false);
525    }
526
527    /**
528     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
529     * <p>
530     * When encoding the line length and line separator are given in the constructor, and the encoding table is
531     * STANDARD_ENCODE_TABLE.
532     * </p>
533     * <p>
534     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
535     * </p>
536     * <p>
537     * When decoding all variants are supported.
538     * </p>
539     *
540     * @param lineLength
541     *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
542     *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
543     *            decoding.
544     * @param lineSeparator
545     *            Each line of encoded data will end with this sequence of bytes.
546     * @param urlSafe
547     *            Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
548     *            operations. Decoding seamlessly handles both modes.
549     *            <b>Note: no padding is added when using the URL-safe alphabet.</b>
550     * @throws IllegalArgumentException
551     *             Thrown when the {@code lineSeparator} contains Base64 characters.
552     * @since 1.4
553     */
554    public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe) {
555        this(lineLength, lineSeparator, urlSafe, DECODING_POLICY_DEFAULT);
556    }
557
558    /**
559     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
560     * <p>
561     * When encoding the line length and line separator are given in the constructor, and the encoding table is
562     * STANDARD_ENCODE_TABLE.
563     * </p>
564     * <p>
565     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
566     * </p>
567     * <p>
568     * When decoding all variants are supported.
569     * </p>
570     *
571     * @param lineLength
572     *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
573     *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
574     *            decoding.
575     * @param lineSeparator
576     *            Each line of encoded data will end with this sequence of bytes.
577     * @param urlSafe
578     *            Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
579     *            operations. Decoding seamlessly handles both modes.
580     *            <b>Note: no padding is added when using the URL-safe alphabet.</b>
581     * @param decodingPolicy The decoding policy.
582     * @throws IllegalArgumentException
583     *             Thrown when the {@code lineSeparator} contains Base64 characters.
584     * @since 1.15
585     */
586    public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe,
587                  final CodecPolicy decodingPolicy) {
588        super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK,
589                lineLength,
590                lineSeparator == null ? 0 : lineSeparator.length,
591                PAD_DEFAULT,
592                decodingPolicy);
593        // TODO could be simplified if there is no requirement to reject invalid line sep when length <=0
594        // @see test case Base64Test.testConstructors()
595        if (lineSeparator != null) {
596            if (containsAlphabetOrPad(lineSeparator)) {
597                final String sep = StringUtils.newStringUtf8(lineSeparator);
598                throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]");
599            }
600            if (lineLength > 0){ // null line-sep forces no chunking rather than throwing IAE
601                this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
602                this.lineSeparator = lineSeparator.clone();
603            } else {
604                this.encodeSize = BYTES_PER_ENCODED_BLOCK;
605                this.lineSeparator = null;
606            }
607        } else {
608            this.encodeSize = BYTES_PER_ENCODED_BLOCK;
609            this.lineSeparator = null;
610        }
611        this.decodeSize = this.encodeSize - 1;
612        this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
613    }
614
615    // Implementation of the Encoder Interface
616
617    /**
618     * <p>
619     * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once
620     * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1"
621     * call is not necessary when decoding, but it doesn't hurt, either.
622     * </p>
623     * <p>
624     * Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are
625     * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in,
626     * garbage-out philosophy: it will not check the provided data for validity.
627     * </p>
628     * <p>
629     * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
630     * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
631     * </p>
632     *
633     * @param input
634     *            byte[] array of ASCII data to base64 decode.
635     * @param inPos
636     *            Position to start reading data from.
637     * @param inAvail
638     *            Amount of bytes available from input for decoding.
639     * @param context
640     *            the context to be used
641     */
642    @Override
643    void decode(final byte[] input, int inPos, final int inAvail, final Context context) {
644        if (context.eof) {
645            return;
646        }
647        if (inAvail < 0) {
648            context.eof = true;
649        }
650        for (int i = 0; i < inAvail; i++) {
651            final byte[] buffer = ensureBufferSize(decodeSize, context);
652            final byte b = input[inPos++];
653            if (b == pad) {
654                // We're done.
655                context.eof = true;
656                break;
657            }
658            if (b >= 0 && b < DECODE_TABLE.length) {
659                final int result = DECODE_TABLE[b];
660                if (result >= 0) {
661                    context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK;
662                    context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result;
663                    if (context.modulus == 0) {
664                        buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 16) & MASK_8BITS);
665                        buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
666                        buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
667                    }
668                }
669            }
670        }
671
672        // Two forms of EOF as far as base64 decoder is concerned: actual
673        // EOF (-1) and first time '=' character is encountered in stream.
674        // This approach makes the '=' padding characters completely optional.
675        if (context.eof && context.modulus != 0) {
676            final byte[] buffer = ensureBufferSize(decodeSize, context);
677
678            // We have some spare bits remaining
679            // Output all whole multiples of 8 bits and ignore the rest
680            switch (context.modulus) {
681//              case 0 : // impossible, as excluded above
682                case 1 : // 6 bits - either ignore entirely, or raise an exception
683                    validateTrailingCharacter();
684                    break;
685                case 2 : // 12 bits = 8 + 4
686                    validateCharacter(MASK_4BITS, context);
687                    context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits
688                    buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
689                    break;
690                case 3 : // 18 bits = 8 + 8 + 2
691                    validateCharacter(MASK_2BITS, context);
692                    context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits
693                    buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
694                    buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
695                    break;
696                default:
697                    throw new IllegalStateException("Impossible modulus " + context.modulus);
698            }
699        }
700    }
701
702    /**
703     * <p>
704     * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with
705     * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, to flush last
706     * remaining bytes (if not multiple of 3).
707     * </p>
708     * <p><b>Note: no padding is added when encoding using the URL-safe alphabet.</b></p>
709     * <p>
710     * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
711     * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
712     * </p>
713     *
714     * @param in
715     *            byte[] array of binary data to base64 encode.
716     * @param inPos
717     *            Position to start reading data from.
718     * @param inAvail
719     *            Amount of bytes available from input for encoding.
720     * @param context
721     *            the context to be used
722     */
723    @Override
724    void encode(final byte[] in, int inPos, final int inAvail, final Context context) {
725        if (context.eof) {
726            return;
727        }
728        // inAvail < 0 is how we're informed of EOF in the underlying data we're
729        // encoding.
730        if (inAvail < 0) {
731            context.eof = true;
732            if (0 == context.modulus && lineLength == 0) {
733                return; // no leftovers to process and not using chunking
734            }
735            final byte[] buffer = ensureBufferSize(encodeSize, context);
736            final int savedPos = context.pos;
737            switch (context.modulus) { // 0-2
738                case 0 : // nothing to do here
739                    break;
740                case 1 : // 8 bits = 6 + 2
741                    // top 6 bits:
742                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 2) & MASK_6BITS];
743                    // remaining 2:
744                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 4) & MASK_6BITS];
745                    // URL-SAFE skips the padding to further reduce size.
746                    if (encodeTable == STANDARD_ENCODE_TABLE) {
747                        buffer[context.pos++] = pad;
748                        buffer[context.pos++] = pad;
749                    }
750                    break;
751
752                case 2 : // 16 bits = 6 + 6 + 4
753                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 10) & MASK_6BITS];
754                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 4) & MASK_6BITS];
755                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 2) & MASK_6BITS];
756                    // URL-SAFE skips the padding to further reduce size.
757                    if (encodeTable == STANDARD_ENCODE_TABLE) {
758                        buffer[context.pos++] = pad;
759                    }
760                    break;
761                default:
762                    throw new IllegalStateException("Impossible modulus " + context.modulus);
763            }
764            context.currentLinePos += context.pos - savedPos; // keep track of current line position
765            // if currentPos == 0 we are at the start of a line, so don't add CRLF
766            if (lineLength > 0 && context.currentLinePos > 0) {
767                System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
768                context.pos += lineSeparator.length;
769            }
770        } else {
771            for (int i = 0; i < inAvail; i++) {
772                final byte[] buffer = ensureBufferSize(encodeSize, context);
773                context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK;
774                int b = in[inPos++];
775                if (b < 0) {
776                    b += 256;
777                }
778                context.ibitWorkArea = (context.ibitWorkArea << 8) + b; //  BITS_PER_BYTE
779                if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract
780                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 18) & MASK_6BITS];
781                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 12) & MASK_6BITS];
782                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 6) & MASK_6BITS];
783                    buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS];
784                    context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
785                    if (lineLength > 0 && lineLength <= context.currentLinePos) {
786                        System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
787                        context.pos += lineSeparator.length;
788                        context.currentLinePos = 0;
789                    }
790                }
791            }
792        }
793    }
794
795    /**
796     * Returns whether or not the {@code octet} is in the Base64 alphabet.
797     *
798     * @param octet
799     *            The value to test
800     * @return {@code true} if the value is defined in the Base64 alphabet {@code false} otherwise.
801     */
802    @Override
803    protected boolean isInAlphabet(final byte octet) {
804        return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
805    }
806
807    /**
808     * Returns our current encode mode. True if we're URL-SAFE, false otherwise.
809     *
810     * @return true if we're in URL-SAFE mode, false otherwise.
811     * @since 1.4
812     */
813    public boolean isUrlSafe() {
814        return this.encodeTable == URL_SAFE_ENCODE_TABLE;
815    }
816
817    /**
818     * Validates whether decoding the final trailing character is possible in the context
819     * of the set of possible base 64 values.
820     * <p>
821     * The character is valid if the lower bits within the provided mask are zero. This
822     * is used to test the final trailing base-64 digit is zero in the bits that will be discarded.
823     * </p>
824     *
825     * @param emptyBitsMask The mask of the lower bits that should be empty
826     * @param context the context to be used
827     *
828     * @throws IllegalArgumentException if the bits being checked contain any non-zero value
829     */
830    private void validateCharacter(final int emptyBitsMask, final Context context) {
831        if (isStrictDecoding() && (context.ibitWorkArea & emptyBitsMask) != 0) {
832            throw new IllegalArgumentException(
833                "Strict decoding: Last encoded character (before the paddings if any) is a valid " +
834                "base 64 alphabet but not a possible encoding. " +
835                "Expected the discarded bits from the character to be zero.");
836        }
837    }
838
839    /**
840     * Validates whether decoding allows an entire final trailing character that cannot be
841     * used for a complete byte.
842     *
843     * @throws IllegalArgumentException if strict decoding is enabled
844     */
845    private void validateTrailingCharacter() {
846        if (isStrictDecoding()) {
847            throw new IllegalArgumentException(
848                "Strict decoding: Last encoded character (before the paddings if any) is a valid " +
849                "base 64 alphabet but not a possible encoding. " +
850                "Decoding requires at least two trailing 6-bit characters to create bytes.");
851        }
852    }
853
854}