001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import java.math.BigInteger;
021import java.util.Objects;
022
023import org.apache.commons.codec.CodecPolicy;
024
025/**
026 * Provides Base64 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
027 *
028 * <p>
029 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
030 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
031 * </p>
032 * <p>
033 * The class can be parameterized in the following manner with various constructors:
034 * </p>
035 * <ul>
036 * <li>URL-safe mode: Default off.</li>
037 * <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of
038 * 4 in the encoded data.
039 * <li>Line separator: Default is CRLF ("\r\n")</li>
040 * </ul>
041 * <p>
042 * The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes.
043 * </p>
044 * <p>
045 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only
046 * encode/decode character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252,
047 * UTF-8, etc).
048 * </p>
049 * <p>
050 * This class is thread-safe.
051 * </p>
052 *
053 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
054 * @since 1.0
055 */
056public class Base64 extends BaseNCodec {
057
058    /**
059     * BASE64 characters are 6 bits in length.
060     * They are formed by taking a block of 3 octets to form a 24-bit string,
061     * which is converted into 4 BASE64 characters.
062     */
063    private static final int BITS_PER_ENCODED_BYTE = 6;
064    private static final int BYTES_PER_UNENCODED_BLOCK = 3;
065    private static final int BYTES_PER_ENCODED_BLOCK = 4;
066
067    /**
068     * This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet"
069     * equivalents as specified in Table 1 of RFC 2045.
070     * <p>
071     * Thanks to "commons" project in ws.apache.org for this code.
072     * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
073     * </p>
074     */
075    private static final byte[] STANDARD_ENCODE_TABLE = {
076            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
077            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
078            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
079            'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
080            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
081    };
082
083    /**
084     * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and /
085     * changed to - and _ to make the encoded Base64 results more URL-SAFE.
086     * This table is only used when the Base64's mode is set to URL-SAFE.
087     */
088    private static final byte[] URL_SAFE_ENCODE_TABLE = {
089            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
090            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
091            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
092            'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
093            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
094    };
095
096    /**
097     * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified
098     * in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64
099     * alphabet but fall within the bounds of the array are translated to -1.
100     * <p>
101     * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both
102     * URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit).
103     * </p>
104     * <p>
105     * Thanks to "commons" project in ws.apache.org for this code.
106     * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
107     * </p>
108     */
109    private static final byte[] DECODE_TABLE = {
110        //   0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
111            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
112            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
113            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, // 20-2f + - /
114            52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
115            -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 40-4f A-O
116            15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _
117            -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o
118            41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51                      // 70-7a p-z
119    };
120
121    // The static final fields above are used for the original static byte[] methods on Base64.
122    // The private member fields below are used with the new streaming approach, which requires
123    // some state be preserved between calls of encode() and decode().
124
125    /**
126     * Base64 uses 6-bit fields.
127     */
128    /** Mask used to extract 6 bits, used when encoding */
129    private static final int MASK_6BITS = 0x3f;
130    /** Mask used to extract 4 bits, used when decoding final trailing character. */
131    private static final int MASK_4BITS = 0xf;
132    /** Mask used to extract 2 bits, used when decoding final trailing character. */
133    private static final int MASK_2BITS = 0x3;
134
135    /**
136     * Decodes Base64 data into octets.
137     * <p>
138     * <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode.
139     * </p>
140     *
141     * @param base64Data
142     *            Byte array containing Base64 data
143     * @return Array containing decoded data.
144     */
145    public static byte[] decodeBase64(final byte[] base64Data) {
146        return new Base64().decode(base64Data);
147    }
148
149    /**
150     * Decodes a Base64 String into octets.
151     * <p>
152     * <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode.
153     * </p>
154     *
155     * @param base64String
156     *            String containing Base64 data
157     * @return Array containing decoded data.
158     * @since 1.4
159     */
160    public static byte[] decodeBase64(final String base64String) {
161        return new Base64().decode(base64String);
162    }
163
164    // Implementation of integer encoding used for crypto
165    /**
166     * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
167     *
168     * @param pArray
169     *            a byte array containing base64 character data
170     * @return A BigInteger
171     * @since 1.4
172     */
173    public static BigInteger decodeInteger(final byte[] pArray) {
174        return new BigInteger(1, decodeBase64(pArray));
175    }
176
177    /**
178     * Encodes binary data using the base64 algorithm but does not chunk the output.
179     *
180     * @param binaryData
181     *            binary data to encode
182     * @return byte[] containing Base64 characters in their UTF-8 representation.
183     */
184    public static byte[] encodeBase64(final byte[] binaryData) {
185        return encodeBase64(binaryData, false);
186    }
187
188    /**
189     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
190     *
191     * @param binaryData
192     *            Array containing binary data to encode.
193     * @param isChunked
194     *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
195     * @return Base64-encoded data.
196     * @throws IllegalArgumentException
197     *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
198     */
199    public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked) {
200        return encodeBase64(binaryData, isChunked, false);
201    }
202
203    /**
204     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
205     *
206     * @param binaryData
207     *            Array containing binary data to encode.
208     * @param isChunked
209     *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
210     * @param urlSafe
211     *            if {@code true} this encoder will emit - and _ instead of the usual + and / characters.
212     *            <b>Note: no padding is added when encoding using the URL-safe alphabet.</b>
213     * @return Base64-encoded data.
214     * @throws IllegalArgumentException
215     *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
216     * @since 1.4
217     */
218    public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe) {
219        return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
220    }
221
222    /**
223     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
224     *
225     * @param binaryData
226     *            Array containing binary data to encode.
227     * @param isChunked
228     *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
229     * @param urlSafe
230     *            if {@code true} this encoder will emit - and _ instead of the usual + and / characters.
231     *            <b>Note: no padding is added when encoding using the URL-safe alphabet.</b>
232     * @param maxResultSize
233     *            The maximum result size to accept.
234     * @return Base64-encoded data.
235     * @throws IllegalArgumentException
236     *             Thrown when the input array needs an output array bigger than maxResultSize
237     * @since 1.4
238     */
239    public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked,
240                                      final boolean urlSafe, final int maxResultSize) {
241        if (BinaryCodec.isEmpty(binaryData)) {
242            return binaryData;
243        }
244
245        // Create this so can use the super-class method
246        // Also ensures that the same roundings are performed by the ctor and the code
247        final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
248        final long len = b64.getEncodedLength(binaryData);
249        if (len > maxResultSize) {
250            throw new IllegalArgumentException("Input array too big, the output array would be bigger (" +
251                len +
252                ") than the specified maximum size of " +
253                maxResultSize);
254        }
255
256        return b64.encode(binaryData);
257    }
258
259    /**
260     * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
261     *
262     * @param binaryData
263     *            binary data to encode
264     * @return Base64 characters chunked in 76 character blocks
265     */
266    public static byte[] encodeBase64Chunked(final byte[] binaryData) {
267        return encodeBase64(binaryData, true);
268    }
269
270    /**
271     * Encodes binary data using the base64 algorithm but does not chunk the output.
272     *
273     * NOTE:  We changed the behavior of this method from multi-line chunking (commons-codec-1.4) to
274     * single-line non-chunking (commons-codec-1.5).
275     *
276     * @param binaryData
277     *            binary data to encode
278     * @return String containing Base64 characters.
279     * @since 1.4 (NOTE:  1.4 chunked the output, whereas 1.5 does not).
280     */
281    public static String encodeBase64String(final byte[] binaryData) {
282        return StringUtils.newStringUsAscii(encodeBase64(binaryData, false));
283    }
284
285    /**
286     * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
287     * url-safe variation emits - and _ instead of + and / characters.
288     * <b>Note: no padding is added.</b>
289     * @param binaryData
290     *            binary data to encode
291     * @return byte[] containing Base64 characters in their UTF-8 representation.
292     * @since 1.4
293     */
294    public static byte[] encodeBase64URLSafe(final byte[] binaryData) {
295        return encodeBase64(binaryData, false, true);
296    }
297
298    /**
299     * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
300     * url-safe variation emits - and _ instead of + and / characters.
301     * <b>Note: no padding is added.</b>
302     * @param binaryData
303     *            binary data to encode
304     * @return String containing Base64 characters
305     * @since 1.4
306     */
307    public static String encodeBase64URLSafeString(final byte[] binaryData) {
308        return StringUtils.newStringUsAscii(encodeBase64(binaryData, false, true));
309    }
310
311    /**
312     * Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
313     *
314     * @param bigInteger
315     *            a BigInteger
316     * @return A byte array containing base64 character data
317     * @throws NullPointerException
318     *             if null is passed in
319     * @since 1.4
320     */
321    public static byte[] encodeInteger(final BigInteger bigInteger) {
322        Objects.requireNonNull(bigInteger, "bigInteger");
323        return encodeBase64(toIntegerBytes(bigInteger), false);
324    }
325
326    /**
327     * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
328     * method treats whitespace as valid.
329     *
330     * @param arrayOctet
331     *            byte array to test
332     * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
333     *         {@code false}, otherwise
334     * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0.
335     */
336    @Deprecated
337    public static boolean isArrayByteBase64(final byte[] arrayOctet) {
338        return isBase64(arrayOctet);
339    }
340
341    /**
342     * Returns whether or not the {@code octet} is in the base 64 alphabet.
343     *
344     * @param octet
345     *            The value to test
346     * @return {@code true} if the value is defined in the base 64 alphabet, {@code false} otherwise.
347     * @since 1.4
348     */
349    public static boolean isBase64(final byte octet) {
350        return octet == PAD_DEFAULT || octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1;
351    }
352
353    /**
354     * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
355     * method treats whitespace as valid.
356     *
357     * @param arrayOctet
358     *            byte array to test
359     * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
360     *         {@code false}, otherwise
361     * @since 1.5
362     */
363    public static boolean isBase64(final byte[] arrayOctet) {
364        for (final byte element : arrayOctet) {
365            if (!isBase64(element) && !Character.isWhitespace(element)) {
366                return false;
367            }
368        }
369        return true;
370    }
371
372    /**
373     * Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the
374     * method treats whitespace as valid.
375     *
376     * @param base64
377     *            String to test
378     * @return {@code true} if all characters in the String are valid characters in the Base64 alphabet or if
379     *         the String is empty; {@code false}, otherwise
380     *  @since 1.5
381     */
382    public static boolean isBase64(final String base64) {
383        return isBase64(StringUtils.getBytesUtf8(base64));
384    }
385
386    /**
387     * Returns a byte-array representation of a {@code BigInteger} without sign bit.
388     *
389     * @param bigInt
390     *            {@code BigInteger} to be converted
391     * @return a byte array representation of the BigInteger parameter
392     */
393    static byte[] toIntegerBytes(final BigInteger bigInt) {
394        int bitlen = bigInt.bitLength();
395        // round bitlen
396        bitlen = bitlen + 7 >> 3 << 3;
397        final byte[] bigBytes = bigInt.toByteArray();
398
399        if (bigInt.bitLength() % 8 != 0 && bigInt.bitLength() / 8 + 1 == bitlen / 8) {
400            return bigBytes;
401        }
402        // set up params for copying everything but sign bit
403        int startSrc = 0;
404        int len = bigBytes.length;
405
406        // if bigInt is exactly byte-aligned, just skip signbit in copy
407        if (bigInt.bitLength() % 8 == 0) {
408            startSrc = 1;
409            len--;
410        }
411        final int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
412        final byte[] resizedBytes = new byte[bitlen / 8];
413        System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
414        return resizedBytes;
415    }
416
417    /**
418     * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able
419     * to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch
420     * between the two modes.
421     */
422    private final byte[] encodeTable;
423
424    /** Only one decode table currently; keep for consistency with Base32 code. */
425    private final byte[] decodeTable = DECODE_TABLE;
426
427    /**
428     * Line separator for encoding. Not used when decoding. Only used if lineLength &gt; 0.
429     */
430    private final byte[] lineSeparator;
431
432    /**
433     * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
434     * {@code decodeSize = 3 + lineSeparator.length;}
435     */
436    private final int decodeSize;
437
438    /**
439     * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
440     * {@code encodeSize = 4 + lineSeparator.length;}
441     */
442    private final int encodeSize;
443
444    /**
445     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
446     * <p>
447     * When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE.
448     * </p>
449     *
450     * <p>
451     * When decoding all variants are supported.
452     * </p>
453     */
454    public Base64() {
455        this(0);
456    }
457
458    /**
459     * Creates a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode.
460     * <p>
461     * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
462     * </p>
463     *
464     * <p>
465     * When decoding all variants are supported.
466     * </p>
467     *
468     * @param urlSafe
469     *            if {@code true}, URL-safe encoding is used. In most cases this should be set to
470     *            {@code false}.
471     * @since 1.4
472     */
473    public Base64(final boolean urlSafe) {
474        this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
475    }
476
477    /**
478     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
479     * <p>
480     * When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is
481     * STANDARD_ENCODE_TABLE.
482     * </p>
483     * <p>
484     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
485     * </p>
486     * <p>
487     * When decoding all variants are supported.
488     * </p>
489     *
490     * @param lineLength
491     *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
492     *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
493     *            decoding.
494     * @since 1.4
495     */
496    public Base64(final int lineLength) {
497        this(lineLength, CHUNK_SEPARATOR);
498    }
499
500    /**
501     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
502     * <p>
503     * When encoding the line length and line separator are given in the constructor, and the encoding table is
504     * STANDARD_ENCODE_TABLE.
505     * </p>
506     * <p>
507     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
508     * </p>
509     * <p>
510     * When decoding all variants are supported.
511     * </p>
512     *
513     * @param lineLength
514     *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
515     *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
516     *            decoding.
517     * @param lineSeparator
518     *            Each line of encoded data will end with this sequence of bytes.
519     * @throws IllegalArgumentException
520     *             Thrown when the provided lineSeparator included some base64 characters.
521     * @since 1.4
522     */
523    public Base64(final int lineLength, final byte[] lineSeparator) {
524        this(lineLength, lineSeparator, false);
525    }
526
527    /**
528     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
529     * <p>
530     * When encoding the line length and line separator are given in the constructor, and the encoding table is
531     * STANDARD_ENCODE_TABLE.
532     * </p>
533     * <p>
534     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
535     * </p>
536     * <p>
537     * When decoding all variants are supported.
538     * </p>
539     *
540     * @param lineLength
541     *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
542     *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
543     *            decoding.
544     * @param lineSeparator
545     *            Each line of encoded data will end with this sequence of bytes.
546     * @param urlSafe
547     *            Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
548     *            operations. Decoding seamlessly handles both modes.
549     *            <b>Note: no padding is added when using the URL-safe alphabet.</b>
550     * @throws IllegalArgumentException
551     *             Thrown when the {@code lineSeparator} contains Base64 characters.
552     * @since 1.4
553     */
554    public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe) {
555        this(lineLength, lineSeparator, urlSafe, DECODING_POLICY_DEFAULT);
556    }
557
558    /**
559     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
560     * <p>
561     * When encoding the line length and line separator are given in the constructor, and the encoding table is
562     * STANDARD_ENCODE_TABLE.
563     * </p>
564     * <p>
565     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
566     * </p>
567     * <p>
568     * When decoding all variants are supported.
569     * </p>
570     *
571     * @param lineLength
572     *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
573     *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
574     *            decoding.
575     * @param lineSeparator
576     *            Each line of encoded data will end with this sequence of bytes.
577     * @param urlSafe
578     *            Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
579     *            operations. Decoding seamlessly handles both modes.
580     *            <b>Note: no padding is added when using the URL-safe alphabet.</b>
581     * @param decodingPolicy The decoding policy.
582     * @throws IllegalArgumentException
583     *             Thrown when the {@code lineSeparator} contains Base64 characters.
584     * @since 1.15
585     */
586    public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe,
587                  final CodecPolicy decodingPolicy) {
588        super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK,
589                lineLength,
590                lineSeparator == null ? 0 : lineSeparator.length,
591                PAD_DEFAULT,
592                decodingPolicy);
593        // TODO could be simplified if there is no requirement to reject invalid line sep when length <=0
594        // @see test case Base64Test.testConstructors()
595        if (lineSeparator != null) {
596            if (containsAlphabetOrPad(lineSeparator)) {
597                final String sep = StringUtils.newStringUtf8(lineSeparator);
598                throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]");
599            }
600            if (lineLength > 0){ // null line-sep forces no chunking rather than throwing IAE
601                this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
602                this.lineSeparator = lineSeparator.clone();
603            } else {
604                this.encodeSize = BYTES_PER_ENCODED_BLOCK;
605                this.lineSeparator = null;
606            }
607        } else {
608            this.encodeSize = BYTES_PER_ENCODED_BLOCK;
609            this.lineSeparator = null;
610        }
611        this.decodeSize = this.encodeSize - 1;
612        this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
613    }
614
615    // Implementation of the Encoder Interface
616
617    /**
618     * <p>
619     * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once
620     * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1"
621     * call is not necessary when decoding, but it doesn't hurt, either.
622     * </p>
623     * <p>
624     * Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are
625     * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in,
626     * garbage-out philosophy: it will not check the provided data for validity.
627     * </p>
628     * <p>
629     * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
630     * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
631     * </p>
632     *
633     * @param input
634     *            byte[] array of ASCII data to base64 decode.
635     * @param inPos
636     *            Position to start reading data from.
637     * @param inAvail
638     *            Amount of bytes available from input for decoding.
639     * @param context
640     *            the context to be used
641     */
642    @Override
643    void decode(final byte[] input, int inPos, final int inAvail, final Context context) {
644        if (context.eof) {
645            return;
646        }
647        if (inAvail < 0) {
648            context.eof = true;
649        }
650        for (int i = 0; i < inAvail; i++) {
651            final byte[] buffer = ensureBufferSize(decodeSize, context);
652            final byte b = input[inPos++];
653            if (b == pad) {
654                // We're done.
655                context.eof = true;
656                break;
657            }
658            if (b >= 0 && b < DECODE_TABLE.length) {
659                final int result = DECODE_TABLE[b];
660                if (result >= 0) {
661                    context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK;
662                    context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result;
663                    if (context.modulus == 0) {
664                        buffer[context.pos++] = (byte) (context.ibitWorkArea >> 16 & MASK_8BITS);
665                        buffer[context.pos++] = (byte) (context.ibitWorkArea >> 8 & MASK_8BITS);
666                        buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
667                    }
668                }
669            }
670        }
671
672        // Two forms of EOF as far as base64 decoder is concerned: actual
673        // EOF (-1) and first time '=' character is encountered in stream.
674        // This approach makes the '=' padding characters completely optional.
675        if (context.eof && context.modulus != 0) {
676            final byte[] buffer = ensureBufferSize(decodeSize, context);
677
678            // We have some spare bits remaining
679            // Output all whole multiples of 8 bits and ignore the rest
680            switch (context.modulus) {
681//              case 0 : // impossible, as excluded above
682                case 1 : // 6 bits - either ignore entirely, or raise an exception
683                    validateTrailingCharacter();
684                    break;
685                case 2 : // 12 bits = 8 + 4
686                    validateCharacter(MASK_4BITS, context);
687                    context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits
688                    buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
689                    break;
690                case 3 : // 18 bits = 8 + 8 + 2
691                    validateCharacter(MASK_2BITS, context);
692                    context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits
693                    buffer[context.pos++] = (byte) (context.ibitWorkArea >> 8 & MASK_8BITS);
694                    buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
695                    break;
696                default:
697                    throw new IllegalStateException("Impossible modulus " + context.modulus);
698            }
699        }
700    }
701
702    /**
703     * <p>
704     * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with
705     * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, to flush last
706     * remaining bytes (if not multiple of 3).
707     * </p>
708     * <p><b>Note: no padding is added when encoding using the URL-safe alphabet.</b></p>
709     * <p>
710     * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
711     * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
712     * </p>
713     *
714     * @param in
715     *            byte[] array of binary data to base64 encode.
716     * @param inPos
717     *            Position to start reading data from.
718     * @param inAvail
719     *            Amount of bytes available from input for encoding.
720     * @param context
721     *            the context to be used
722     */
723    @Override
724    void encode(final byte[] in, int inPos, final int inAvail, final Context context) {
725        if (context.eof) {
726            return;
727        }
728        // inAvail < 0 is how we're informed of EOF in the underlying data we're
729        // encoding.
730        if (inAvail < 0) {
731            context.eof = true;
732            if (0 == context.modulus && lineLength == 0) {
733                return; // no leftovers to process and not using chunking
734            }
735            final byte[] buffer = ensureBufferSize(encodeSize, context);
736            final int savedPos = context.pos;
737            switch (context.modulus) { // 0-2
738                case 0 : // nothing to do here
739                    break;
740                case 1 : // 8 bits = 6 + 2
741                    // top 6 bits:
742                    buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 2 & MASK_6BITS];
743                    // remaining 2:
744                    buffer[context.pos++] = encodeTable[context.ibitWorkArea << 4 & MASK_6BITS];
745                    // URL-SAFE skips the padding to further reduce size.
746                    if (encodeTable == STANDARD_ENCODE_TABLE) {
747                        buffer[context.pos++] = pad;
748                        buffer[context.pos++] = pad;
749                    }
750                    break;
751
752                case 2 : // 16 bits = 6 + 6 + 4
753                    buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 10 & MASK_6BITS];
754                    buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 4 & MASK_6BITS];
755                    buffer[context.pos++] = encodeTable[context.ibitWorkArea << 2 & MASK_6BITS];
756                    // URL-SAFE skips the padding to further reduce size.
757                    if (encodeTable == STANDARD_ENCODE_TABLE) {
758                        buffer[context.pos++] = pad;
759                    }
760                    break;
761                default:
762                    throw new IllegalStateException("Impossible modulus " + context.modulus);
763            }
764            context.currentLinePos += context.pos - savedPos; // keep track of current line position
765            // if currentPos == 0 we are at the start of a line, so don't add CRLF
766            if (lineLength > 0 && context.currentLinePos > 0) {
767                System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
768                context.pos += lineSeparator.length;
769            }
770        } else {
771            for (int i = 0; i < inAvail; i++) {
772                final byte[] buffer = ensureBufferSize(encodeSize, context);
773                context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK;
774                int b = in[inPos++];
775                if (b < 0) {
776                    b += 256;
777                }
778                context.ibitWorkArea = (context.ibitWorkArea << 8) + b; //  BITS_PER_BYTE
779                if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract
780                    buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 18 & MASK_6BITS];
781                    buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 12 & MASK_6BITS];
782                    buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 6 & MASK_6BITS];
783                    buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS];
784                    context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
785                    if (lineLength > 0 && lineLength <= context.currentLinePos) {
786                        System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
787                        context.pos += lineSeparator.length;
788                        context.currentLinePos = 0;
789                    }
790                }
791            }
792        }
793    }
794
795    /**
796     * Returns whether or not the {@code octet} is in the Base64 alphabet.
797     *
798     * @param octet
799     *            The value to test
800     * @return {@code true} if the value is defined in the Base64 alphabet {@code false} otherwise.
801     */
802    @Override
803    protected boolean isInAlphabet(final byte octet) {
804        return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
805    }
806
807    /**
808     * Returns our current encode mode. True if we're URL-SAFE, false otherwise.
809     *
810     * @return true if we're in URL-SAFE mode, false otherwise.
811     * @since 1.4
812     */
813    public boolean isUrlSafe() {
814        return this.encodeTable == URL_SAFE_ENCODE_TABLE;
815    }
816
817    /**
818     * Validates whether decoding the final trailing character is possible in the context
819     * of the set of possible base 64 values.
820     * <p>
821     * The character is valid if the lower bits within the provided mask are zero. This
822     * is used to test the final trailing base-64 digit is zero in the bits that will be discarded.
823     * </p>
824     *
825     * @param emptyBitsMask The mask of the lower bits that should be empty
826     * @param context the context to be used
827     *
828     * @throws IllegalArgumentException if the bits being checked contain any non-zero value
829     */
830    private void validateCharacter(final int emptyBitsMask, final Context context) {
831        if (isStrictDecoding() && (context.ibitWorkArea & emptyBitsMask) != 0) {
832            throw new IllegalArgumentException(
833                "Strict decoding: Last encoded character (before the paddings if any) is a valid " +
834                "base 64 alphabet but not a possible encoding. " +
835                "Expected the discarded bits from the character to be zero.");
836        }
837    }
838
839    /**
840     * Validates whether decoding allows an entire final trailing character that cannot be
841     * used for a complete byte.
842     *
843     * @throws IllegalArgumentException if strict decoding is enabled
844     */
845    private void validateTrailingCharacter() {
846        if (isStrictDecoding()) {
847            throw new IllegalArgumentException(
848                "Strict decoding: Last encoded character (before the paddings if any) is a valid " +
849                "base 64 alphabet but not a possible encoding. " +
850                "Decoding requires at least two trailing 6-bit characters to create bytes.");
851        }
852    }
853
854}