View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import java.math.BigInteger;
21  import java.util.Objects;
22  
23  import org.apache.commons.codec.CodecPolicy;
24  
25  /**
26   * Provides Base64 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
27   *
28   * <p>
29   * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
30   * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
31   * </p>
32   * <p>
33   * The class can be parameterized in the following manner with various constructors:
34   * </p>
35   * <ul>
36   * <li>URL-safe mode: Default off.</li>
37   * <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of
38   * 4 in the encoded data.
39   * <li>Line separator: Default is CRLF ("\r\n")</li>
40   * </ul>
41   * <p>
42   * The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes.
43   * </p>
44   * <p>
45   * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only
46   * encode/decode character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252,
47   * UTF-8, etc).
48   * </p>
49   * <p>
50   * This class is thread-safe.
51   * </p>
52   *
53   * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
54   * @since 1.0
55   */
56  public class Base64 extends BaseNCodec {
57  
58      /**
59       * BASE64 characters are 6 bits in length.
60       * They are formed by taking a block of 3 octets to form a 24-bit string,
61       * which is converted into 4 BASE64 characters.
62       */
63      private static final int BITS_PER_ENCODED_BYTE = 6;
64      private static final int BYTES_PER_UNENCODED_BLOCK = 3;
65      private static final int BYTES_PER_ENCODED_BLOCK = 4;
66  
67      /**
68       * This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet"
69       * equivalents as specified in Table 1 of RFC 2045.
70       * <p>
71       * Thanks to "commons" project in ws.apache.org for this code.
72       * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
73       * </p>
74       */
75      private static final byte[] STANDARD_ENCODE_TABLE = {
76              'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
77              'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
78              'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
79              'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
80              '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
81      };
82  
83      /**
84       * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and /
85       * changed to - and _ to make the encoded Base64 results more URL-SAFE.
86       * This table is only used when the Base64's mode is set to URL-SAFE.
87       */
88      private static final byte[] URL_SAFE_ENCODE_TABLE = {
89              'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
90              'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
91              'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
92              'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
93              '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
94      };
95  
96      /**
97       * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified
98       * in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64
99       * alphabet but fall within the bounds of the array are translated to -1.
100      * <p>
101      * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both
102      * URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit).
103      * </p>
104      * <p>
105      * Thanks to "commons" project in ws.apache.org for this code.
106      * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
107      * </p>
108      */
109     private static final byte[] DECODE_TABLE = {
110         //   0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
111             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
112             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
113             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, // 20-2f + - /
114             52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
115             -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, // 40-4f A-O
116             15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _
117             -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o
118             41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51                      // 70-7a p-z
119     };
120 
121     // The static final fields above are used for the original static byte[] methods on Base64.
122     // The private member fields below are used with the new streaming approach, which requires
123     // some state be preserved between calls of encode() and decode().
124 
125     /**
126      * Base64 uses 6-bit fields.
127      */
128     /** Mask used to extract 6 bits, used when encoding */
129     private static final int MASK_6BITS = 0x3f;
130     /** Mask used to extract 4 bits, used when decoding final trailing character. */
131     private static final int MASK_4BITS = 0xf;
132     /** Mask used to extract 2 bits, used when decoding final trailing character. */
133     private static final int MASK_2BITS = 0x3;
134 
135     /**
136      * Decodes Base64 data into octets.
137      * <p>
138      * <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode.
139      * </p>
140      *
141      * @param base64Data
142      *            Byte array containing Base64 data
143      * @return Array containing decoded data.
144      */
145     public static byte[] decodeBase64(final byte[] base64Data) {
146         return new Base64().decode(base64Data);
147     }
148 
149     /**
150      * Decodes a Base64 String into octets.
151      * <p>
152      * <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode.
153      * </p>
154      *
155      * @param base64String
156      *            String containing Base64 data
157      * @return Array containing decoded data.
158      * @since 1.4
159      */
160     public static byte[] decodeBase64(final String base64String) {
161         return new Base64().decode(base64String);
162     }
163 
164     // Implementation of integer encoding used for crypto
165     /**
166      * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
167      *
168      * @param pArray
169      *            a byte array containing base64 character data
170      * @return A BigInteger
171      * @since 1.4
172      */
173     public static BigInteger decodeInteger(final byte[] pArray) {
174         return new BigInteger(1, decodeBase64(pArray));
175     }
176 
177     /**
178      * Encodes binary data using the base64 algorithm but does not chunk the output.
179      *
180      * @param binaryData
181      *            binary data to encode
182      * @return byte[] containing Base64 characters in their UTF-8 representation.
183      */
184     public static byte[] encodeBase64(final byte[] binaryData) {
185         return encodeBase64(binaryData, false);
186     }
187 
188     /**
189      * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
190      *
191      * @param binaryData
192      *            Array containing binary data to encode.
193      * @param isChunked
194      *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
195      * @return Base64-encoded data.
196      * @throws IllegalArgumentException
197      *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
198      */
199     public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked) {
200         return encodeBase64(binaryData, isChunked, false);
201     }
202 
203     /**
204      * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
205      *
206      * @param binaryData
207      *            Array containing binary data to encode.
208      * @param isChunked
209      *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
210      * @param urlSafe
211      *            if {@code true} this encoder will emit - and _ instead of the usual + and / characters.
212      *            <b>Note: no padding is added when encoding using the URL-safe alphabet.</b>
213      * @return Base64-encoded data.
214      * @throws IllegalArgumentException
215      *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
216      * @since 1.4
217      */
218     public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe) {
219         return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
220     }
221 
222     /**
223      * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
224      *
225      * @param binaryData
226      *            Array containing binary data to encode.
227      * @param isChunked
228      *            if {@code true} this encoder will chunk the base64 output into 76 character blocks
229      * @param urlSafe
230      *            if {@code true} this encoder will emit - and _ instead of the usual + and / characters.
231      *            <b>Note: no padding is added when encoding using the URL-safe alphabet.</b>
232      * @param maxResultSize
233      *            The maximum result size to accept.
234      * @return Base64-encoded data.
235      * @throws IllegalArgumentException
236      *             Thrown when the input array needs an output array bigger than maxResultSize
237      * @since 1.4
238      */
239     public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked,
240                                       final boolean urlSafe, final int maxResultSize) {
241         if (BinaryCodec.isEmpty(binaryData)) {
242             return binaryData;
243         }
244 
245         // Create this so can use the super-class method
246         // Also ensures that the same roundings are performed by the ctor and the code
247         final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
248         final long len = b64.getEncodedLength(binaryData);
249         if (len > maxResultSize) {
250             throw new IllegalArgumentException("Input array too big, the output array would be bigger (" +
251                 len +
252                 ") than the specified maximum size of " +
253                 maxResultSize);
254         }
255 
256         return b64.encode(binaryData);
257     }
258 
259     /**
260      * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
261      *
262      * @param binaryData
263      *            binary data to encode
264      * @return Base64 characters chunked in 76 character blocks
265      */
266     public static byte[] encodeBase64Chunked(final byte[] binaryData) {
267         return encodeBase64(binaryData, true);
268     }
269 
270     /**
271      * Encodes binary data using the base64 algorithm but does not chunk the output.
272      *
273      * NOTE:  We changed the behavior of this method from multi-line chunking (commons-codec-1.4) to
274      * single-line non-chunking (commons-codec-1.5).
275      *
276      * @param binaryData
277      *            binary data to encode
278      * @return String containing Base64 characters.
279      * @since 1.4 (NOTE:  1.4 chunked the output, whereas 1.5 does not).
280      */
281     public static String encodeBase64String(final byte[] binaryData) {
282         return StringUtils.newStringUsAscii(encodeBase64(binaryData, false));
283     }
284 
285     /**
286      * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
287      * url-safe variation emits - and _ instead of + and / characters.
288      * <b>Note: no padding is added.</b>
289      * @param binaryData
290      *            binary data to encode
291      * @return byte[] containing Base64 characters in their UTF-8 representation.
292      * @since 1.4
293      */
294     public static byte[] encodeBase64URLSafe(final byte[] binaryData) {
295         return encodeBase64(binaryData, false, true);
296     }
297 
298     /**
299      * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
300      * url-safe variation emits - and _ instead of + and / characters.
301      * <b>Note: no padding is added.</b>
302      * @param binaryData
303      *            binary data to encode
304      * @return String containing Base64 characters
305      * @since 1.4
306      */
307     public static String encodeBase64URLSafeString(final byte[] binaryData) {
308         return StringUtils.newStringUsAscii(encodeBase64(binaryData, false, true));
309     }
310 
311     /**
312      * Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
313      *
314      * @param bigInteger
315      *            a BigInteger
316      * @return A byte array containing base64 character data
317      * @throws NullPointerException
318      *             if null is passed in
319      * @since 1.4
320      */
321     public static byte[] encodeInteger(final BigInteger bigInteger) {
322         Objects.requireNonNull(bigInteger, "bigInteger");
323         return encodeBase64(toIntegerBytes(bigInteger), false);
324     }
325 
326     /**
327      * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
328      * method treats whitespace as valid.
329      *
330      * @param arrayOctet
331      *            byte array to test
332      * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
333      *         {@code false}, otherwise
334      * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0.
335      */
336     @Deprecated
337     public static boolean isArrayByteBase64(final byte[] arrayOctet) {
338         return isBase64(arrayOctet);
339     }
340 
341     /**
342      * Returns whether or not the {@code octet} is in the base 64 alphabet.
343      *
344      * @param octet
345      *            The value to test
346      * @return {@code true} if the value is defined in the base 64 alphabet, {@code false} otherwise.
347      * @since 1.4
348      */
349     public static boolean isBase64(final byte octet) {
350         return octet == PAD_DEFAULT || octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1;
351     }
352 
353     /**
354      * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
355      * method treats whitespace as valid.
356      *
357      * @param arrayOctet
358      *            byte array to test
359      * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
360      *         {@code false}, otherwise
361      * @since 1.5
362      */
363     public static boolean isBase64(final byte[] arrayOctet) {
364         for (final byte element : arrayOctet) {
365             if (!isBase64(element) && !Character.isWhitespace(element)) {
366                 return false;
367             }
368         }
369         return true;
370     }
371 
372     /**
373      * Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the
374      * method treats whitespace as valid.
375      *
376      * @param base64
377      *            String to test
378      * @return {@code true} if all characters in the String are valid characters in the Base64 alphabet or if
379      *         the String is empty; {@code false}, otherwise
380      *  @since 1.5
381      */
382     public static boolean isBase64(final String base64) {
383         return isBase64(StringUtils.getBytesUtf8(base64));
384     }
385 
386     /**
387      * Returns a byte-array representation of a {@code BigInteger} without sign bit.
388      *
389      * @param bigInt
390      *            {@code BigInteger} to be converted
391      * @return a byte array representation of the BigInteger parameter
392      */
393     static byte[] toIntegerBytes(final BigInteger bigInt) {
394         int bitlen = bigInt.bitLength();
395         // round bitlen
396         bitlen = bitlen + 7 >> 3 << 3;
397         final byte[] bigBytes = bigInt.toByteArray();
398 
399         if (bigInt.bitLength() % 8 != 0 && bigInt.bitLength() / 8 + 1 == bitlen / 8) {
400             return bigBytes;
401         }
402         // set up params for copying everything but sign bit
403         int startSrc = 0;
404         int len = bigBytes.length;
405 
406         // if bigInt is exactly byte-aligned, just skip signbit in copy
407         if (bigInt.bitLength() % 8 == 0) {
408             startSrc = 1;
409             len--;
410         }
411         final int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
412         final byte[] resizedBytes = new byte[bitlen / 8];
413         System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
414         return resizedBytes;
415     }
416 
417     /**
418      * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able
419      * to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch
420      * between the two modes.
421      */
422     private final byte[] encodeTable;
423 
424     /** Only one decode table currently; keep for consistency with Base32 code. */
425     private final byte[] decodeTable = DECODE_TABLE;
426 
427     /**
428      * Line separator for encoding. Not used when decoding. Only used if lineLength &gt; 0.
429      */
430     private final byte[] lineSeparator;
431 
432     /**
433      * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
434      * {@code decodeSize = 3 + lineSeparator.length;}
435      */
436     private final int decodeSize;
437 
438     /**
439      * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
440      * {@code encodeSize = 4 + lineSeparator.length;}
441      */
442     private final int encodeSize;
443 
444     /**
445      * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
446      * <p>
447      * When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE.
448      * </p>
449      *
450      * <p>
451      * When decoding all variants are supported.
452      * </p>
453      */
454     public Base64() {
455         this(0);
456     }
457 
458     /**
459      * Creates a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode.
460      * <p>
461      * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
462      * </p>
463      *
464      * <p>
465      * When decoding all variants are supported.
466      * </p>
467      *
468      * @param urlSafe
469      *            if {@code true}, URL-safe encoding is used. In most cases this should be set to
470      *            {@code false}.
471      * @since 1.4
472      */
473     public Base64(final boolean urlSafe) {
474         this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
475     }
476 
477     /**
478      * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
479      * <p>
480      * When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is
481      * STANDARD_ENCODE_TABLE.
482      * </p>
483      * <p>
484      * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
485      * </p>
486      * <p>
487      * When decoding all variants are supported.
488      * </p>
489      *
490      * @param lineLength
491      *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
492      *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
493      *            decoding.
494      * @since 1.4
495      */
496     public Base64(final int lineLength) {
497         this(lineLength, CHUNK_SEPARATOR);
498     }
499 
500     /**
501      * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
502      * <p>
503      * When encoding the line length and line separator are given in the constructor, and the encoding table is
504      * STANDARD_ENCODE_TABLE.
505      * </p>
506      * <p>
507      * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
508      * </p>
509      * <p>
510      * When decoding all variants are supported.
511      * </p>
512      *
513      * @param lineLength
514      *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
515      *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
516      *            decoding.
517      * @param lineSeparator
518      *            Each line of encoded data will end with this sequence of bytes.
519      * @throws IllegalArgumentException
520      *             Thrown when the provided lineSeparator included some base64 characters.
521      * @since 1.4
522      */
523     public Base64(final int lineLength, final byte[] lineSeparator) {
524         this(lineLength, lineSeparator, false);
525     }
526 
527     /**
528      * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
529      * <p>
530      * When encoding the line length and line separator are given in the constructor, and the encoding table is
531      * STANDARD_ENCODE_TABLE.
532      * </p>
533      * <p>
534      * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
535      * </p>
536      * <p>
537      * When decoding all variants are supported.
538      * </p>
539      *
540      * @param lineLength
541      *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
542      *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
543      *            decoding.
544      * @param lineSeparator
545      *            Each line of encoded data will end with this sequence of bytes.
546      * @param urlSafe
547      *            Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
548      *            operations. Decoding seamlessly handles both modes.
549      *            <b>Note: no padding is added when using the URL-safe alphabet.</b>
550      * @throws IllegalArgumentException
551      *             Thrown when the {@code lineSeparator} contains Base64 characters.
552      * @since 1.4
553      */
554     public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe) {
555         this(lineLength, lineSeparator, urlSafe, DECODING_POLICY_DEFAULT);
556     }
557 
558     /**
559      * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
560      * <p>
561      * When encoding the line length and line separator are given in the constructor, and the encoding table is
562      * STANDARD_ENCODE_TABLE.
563      * </p>
564      * <p>
565      * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
566      * </p>
567      * <p>
568      * When decoding all variants are supported.
569      * </p>
570      *
571      * @param lineLength
572      *            Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of
573      *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
574      *            decoding.
575      * @param lineSeparator
576      *            Each line of encoded data will end with this sequence of bytes.
577      * @param urlSafe
578      *            Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
579      *            operations. Decoding seamlessly handles both modes.
580      *            <b>Note: no padding is added when using the URL-safe alphabet.</b>
581      * @param decodingPolicy The decoding policy.
582      * @throws IllegalArgumentException
583      *             Thrown when the {@code lineSeparator} contains Base64 characters.
584      * @since 1.15
585      */
586     public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe,
587                   final CodecPolicy decodingPolicy) {
588         super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK,
589                 lineLength,
590                 lineSeparator == null ? 0 : lineSeparator.length,
591                 PAD_DEFAULT,
592                 decodingPolicy);
593         // TODO could be simplified if there is no requirement to reject invalid line sep when length <=0
594         // @see test case Base64Test.testConstructors()
595         if (lineSeparator != null) {
596             if (containsAlphabetOrPad(lineSeparator)) {
597                 final String sep = StringUtils.newStringUtf8(lineSeparator);
598                 throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]");
599             }
600             if (lineLength > 0){ // null line-sep forces no chunking rather than throwing IAE
601                 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
602                 this.lineSeparator = lineSeparator.clone();
603             } else {
604                 this.encodeSize = BYTES_PER_ENCODED_BLOCK;
605                 this.lineSeparator = null;
606             }
607         } else {
608             this.encodeSize = BYTES_PER_ENCODED_BLOCK;
609             this.lineSeparator = null;
610         }
611         this.decodeSize = this.encodeSize - 1;
612         this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
613     }
614 
615     // Implementation of the Encoder Interface
616 
617     /**
618      * <p>
619      * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once
620      * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1"
621      * call is not necessary when decoding, but it doesn't hurt, either.
622      * </p>
623      * <p>
624      * Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are
625      * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in,
626      * garbage-out philosophy: it will not check the provided data for validity.
627      * </p>
628      * <p>
629      * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
630      * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
631      * </p>
632      *
633      * @param input
634      *            byte[] array of ASCII data to base64 decode.
635      * @param inPos
636      *            Position to start reading data from.
637      * @param inAvail
638      *            Amount of bytes available from input for decoding.
639      * @param context
640      *            the context to be used
641      */
642     @Override
643     void decode(final byte[] input, int inPos, final int inAvail, final Context context) {
644         if (context.eof) {
645             return;
646         }
647         if (inAvail < 0) {
648             context.eof = true;
649         }
650         for (int i = 0; i < inAvail; i++) {
651             final byte[] buffer = ensureBufferSize(decodeSize, context);
652             final byte b = input[inPos++];
653             if (b == pad) {
654                 // We're done.
655                 context.eof = true;
656                 break;
657             }
658             if (b >= 0 && b < DECODE_TABLE.length) {
659                 final int result = DECODE_TABLE[b];
660                 if (result >= 0) {
661                     context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK;
662                     context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result;
663                     if (context.modulus == 0) {
664                         buffer[context.pos++] = (byte) (context.ibitWorkArea >> 16 & MASK_8BITS);
665                         buffer[context.pos++] = (byte) (context.ibitWorkArea >> 8 & MASK_8BITS);
666                         buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
667                     }
668                 }
669             }
670         }
671 
672         // Two forms of EOF as far as base64 decoder is concerned: actual
673         // EOF (-1) and first time '=' character is encountered in stream.
674         // This approach makes the '=' padding characters completely optional.
675         if (context.eof && context.modulus != 0) {
676             final byte[] buffer = ensureBufferSize(decodeSize, context);
677 
678             // We have some spare bits remaining
679             // Output all whole multiples of 8 bits and ignore the rest
680             switch (context.modulus) {
681 //              case 0 : // impossible, as excluded above
682                 case 1 : // 6 bits - either ignore entirely, or raise an exception
683                     validateTrailingCharacter();
684                     break;
685                 case 2 : // 12 bits = 8 + 4
686                     validateCharacter(MASK_4BITS, context);
687                     context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits
688                     buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
689                     break;
690                 case 3 : // 18 bits = 8 + 8 + 2
691                     validateCharacter(MASK_2BITS, context);
692                     context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits
693                     buffer[context.pos++] = (byte) (context.ibitWorkArea >> 8 & MASK_8BITS);
694                     buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
695                     break;
696                 default:
697                     throw new IllegalStateException("Impossible modulus " + context.modulus);
698             }
699         }
700     }
701 
702     /**
703      * <p>
704      * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with
705      * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, to flush last
706      * remaining bytes (if not multiple of 3).
707      * </p>
708      * <p><b>Note: no padding is added when encoding using the URL-safe alphabet.</b></p>
709      * <p>
710      * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
711      * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
712      * </p>
713      *
714      * @param in
715      *            byte[] array of binary data to base64 encode.
716      * @param inPos
717      *            Position to start reading data from.
718      * @param inAvail
719      *            Amount of bytes available from input for encoding.
720      * @param context
721      *            the context to be used
722      */
723     @Override
724     void encode(final byte[] in, int inPos, final int inAvail, final Context context) {
725         if (context.eof) {
726             return;
727         }
728         // inAvail < 0 is how we're informed of EOF in the underlying data we're
729         // encoding.
730         if (inAvail < 0) {
731             context.eof = true;
732             if (0 == context.modulus && lineLength == 0) {
733                 return; // no leftovers to process and not using chunking
734             }
735             final byte[] buffer = ensureBufferSize(encodeSize, context);
736             final int savedPos = context.pos;
737             switch (context.modulus) { // 0-2
738                 case 0 : // nothing to do here
739                     break;
740                 case 1 : // 8 bits = 6 + 2
741                     // top 6 bits:
742                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 2 & MASK_6BITS];
743                     // remaining 2:
744                     buffer[context.pos++] = encodeTable[context.ibitWorkArea << 4 & MASK_6BITS];
745                     // URL-SAFE skips the padding to further reduce size.
746                     if (encodeTable == STANDARD_ENCODE_TABLE) {
747                         buffer[context.pos++] = pad;
748                         buffer[context.pos++] = pad;
749                     }
750                     break;
751 
752                 case 2 : // 16 bits = 6 + 6 + 4
753                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 10 & MASK_6BITS];
754                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 4 & MASK_6BITS];
755                     buffer[context.pos++] = encodeTable[context.ibitWorkArea << 2 & MASK_6BITS];
756                     // URL-SAFE skips the padding to further reduce size.
757                     if (encodeTable == STANDARD_ENCODE_TABLE) {
758                         buffer[context.pos++] = pad;
759                     }
760                     break;
761                 default:
762                     throw new IllegalStateException("Impossible modulus " + context.modulus);
763             }
764             context.currentLinePos += context.pos - savedPos; // keep track of current line position
765             // if currentPos == 0 we are at the start of a line, so don't add CRLF
766             if (lineLength > 0 && context.currentLinePos > 0) {
767                 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
768                 context.pos += lineSeparator.length;
769             }
770         } else {
771             for (int i = 0; i < inAvail; i++) {
772                 final byte[] buffer = ensureBufferSize(encodeSize, context);
773                 context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK;
774                 int b = in[inPos++];
775                 if (b < 0) {
776                     b += 256;
777                 }
778                 context.ibitWorkArea = (context.ibitWorkArea << 8) + b; //  BITS_PER_BYTE
779                 if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract
780                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 18 & MASK_6BITS];
781                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 12 & MASK_6BITS];
782                     buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 6 & MASK_6BITS];
783                     buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS];
784                     context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
785                     if (lineLength > 0 && lineLength <= context.currentLinePos) {
786                         System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
787                         context.pos += lineSeparator.length;
788                         context.currentLinePos = 0;
789                     }
790                 }
791             }
792         }
793     }
794 
795     /**
796      * Returns whether or not the {@code octet} is in the Base64 alphabet.
797      *
798      * @param octet
799      *            The value to test
800      * @return {@code true} if the value is defined in the Base64 alphabet {@code false} otherwise.
801      */
802     @Override
803     protected boolean isInAlphabet(final byte octet) {
804         return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
805     }
806 
807     /**
808      * Returns our current encode mode. True if we're URL-SAFE, false otherwise.
809      *
810      * @return true if we're in URL-SAFE mode, false otherwise.
811      * @since 1.4
812      */
813     public boolean isUrlSafe() {
814         return this.encodeTable == URL_SAFE_ENCODE_TABLE;
815     }
816 
817     /**
818      * Validates whether decoding the final trailing character is possible in the context
819      * of the set of possible base 64 values.
820      * <p>
821      * The character is valid if the lower bits within the provided mask are zero. This
822      * is used to test the final trailing base-64 digit is zero in the bits that will be discarded.
823      * </p>
824      *
825      * @param emptyBitsMask The mask of the lower bits that should be empty
826      * @param context the context to be used
827      *
828      * @throws IllegalArgumentException if the bits being checked contain any non-zero value
829      */
830     private void validateCharacter(final int emptyBitsMask, final Context context) {
831         if (isStrictDecoding() && (context.ibitWorkArea & emptyBitsMask) != 0) {
832             throw new IllegalArgumentException(
833                 "Strict decoding: Last encoded character (before the paddings if any) is a valid " +
834                 "base 64 alphabet but not a possible encoding. " +
835                 "Expected the discarded bits from the character to be zero.");
836         }
837     }
838 
839     /**
840      * Validates whether decoding allows an entire final trailing character that cannot be
841      * used for a complete byte.
842      *
843      * @throws IllegalArgumentException if strict decoding is enabled
844      */
845     private void validateTrailingCharacter() {
846         if (isStrictDecoding()) {
847             throw new IllegalArgumentException(
848                 "Strict decoding: Last encoded character (before the paddings if any) is a valid " +
849                 "base 64 alphabet but not a possible encoding. " +
850                 "Decoding requires at least two trailing 6-bit characters to create bytes.");
851         }
852     }
853 
854 }