001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import java.math.BigInteger;
021
022/**
023 * Provides Base64 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
024 *
025 * <p>
026 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
027 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
028 * </p>
029 * <p>
030 * The class can be parameterized in the following manner with various constructors:
031 * </p>
032 * <ul>
033 * <li>URL-safe mode: Default off.</li>
034 * <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of
035 * 4 in the encoded data.
036 * <li>Line separator: Default is CRLF ("\r\n")</li>
037 * </ul>
038 * <p>
039 * The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes.
040 * </p>
041 * <p>
042 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only
043 * encode/decode character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252,
044 * UTF-8, etc).
045 * </p>
046 * <p>
047 * This class is thread-safe.
048 * </p>
049 *
050 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
051 * @since 1.0
052 * @version $Id: Base64.html 928559 2014-11-10 02:53:54Z ggregory $
053 */
054public class Base64 extends BaseNCodec {
055
056    /**
057     * BASE32 characters are 6 bits in length.
058     * They are formed by taking a block of 3 octets to form a 24-bit string,
059     * which is converted into 4 BASE64 characters.
060     */
061    private static final int BITS_PER_ENCODED_BYTE = 6;
062    private static final int BYTES_PER_UNENCODED_BLOCK = 3;
063    private static final int BYTES_PER_ENCODED_BLOCK = 4;
064
065    /**
066     * Chunk separator per RFC 2045 section 2.1.
067     *
068     * <p>
069     * N.B. The next major release may break compatibility and make this field private.
070     * </p>
071     *
072     * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
073     */
074    static final byte[] CHUNK_SEPARATOR = {'\r', '\n'};
075
076    /**
077     * This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet"
078     * equivalents as specified in Table 1 of RFC 2045.
079     *
080     * Thanks to "commons" project in ws.apache.org for this code.
081     * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
082     */
083    private static final byte[] STANDARD_ENCODE_TABLE = {
084            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
085            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
086            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
087            'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
088            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
089    };
090
091    /**
092     * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and /
093     * changed to - and _ to make the encoded Base64 results more URL-SAFE.
094     * This table is only used when the Base64's mode is set to URL-SAFE.
095     */
096    private static final byte[] URL_SAFE_ENCODE_TABLE = {
097            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
098            'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
099            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
100            'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
101            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
102    };
103
104    /**
105     * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified
106     * in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64
107     * alphabet but fall within the bounds of the array are translated to -1.
108     *
109     * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both
110     * URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit).
111     *
112     * Thanks to "commons" project in ws.apache.org for this code.
113     * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
114     */
115    private static final byte[] DECODE_TABLE = {
116            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
117            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
118            -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, 52, 53, 54,
119            55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4,
120            5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
121            24, 25, -1, -1, -1, -1, 63, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34,
122            35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51
123    };
124
125    /**
126     * Base64 uses 6-bit fields.
127     */
128    /** Mask used to extract 6 bits, used when encoding */
129    private static final int MASK_6BITS = 0x3f;
130
131    // The static final fields above are used for the original static byte[] methods on Base64.
132    // The private member fields below are used with the new streaming approach, which requires
133    // some state be preserved between calls of encode() and decode().
134
135    /**
136     * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able
137     * to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch
138     * between the two modes.
139     */
140    private final byte[] encodeTable;
141
142    // Only one decode table currently; keep for consistency with Base32 code
143    private final byte[] decodeTable = DECODE_TABLE;
144
145    /**
146     * Line separator for encoding. Not used when decoding. Only used if lineLength &gt; 0.
147     */
148    private final byte[] lineSeparator;
149
150    /**
151     * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
152     * <code>decodeSize = 3 + lineSeparator.length;</code>
153     */
154    private final int decodeSize;
155
156    /**
157     * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
158     * <code>encodeSize = 4 + lineSeparator.length;</code>
159     */
160    private final int encodeSize;
161
162    /**
163     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
164     * <p>
165     * When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE.
166     * </p>
167     *
168     * <p>
169     * When decoding all variants are supported.
170     * </p>
171     */
172    public Base64() {
173        this(0);
174    }
175
176    /**
177     * Creates a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode.
178     * <p>
179     * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
180     * </p>
181     *
182     * <p>
183     * When decoding all variants are supported.
184     * </p>
185     *
186     * @param urlSafe
187     *            if <code>true</code>, URL-safe encoding is used. In most cases this should be set to
188     *            <code>false</code>.
189     * @since 1.4
190     */
191    public Base64(final boolean urlSafe) {
192        this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
193    }
194
195    /**
196     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
197     * <p>
198     * When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is
199     * STANDARD_ENCODE_TABLE.
200     * </p>
201     * <p>
202     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
203     * </p>
204     * <p>
205     * When decoding all variants are supported.
206     * </p>
207     *
208     * @param lineLength
209     *            Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
210     *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
211     *            decoding.
212     * @since 1.4
213     */
214    public Base64(final int lineLength) {
215        this(lineLength, CHUNK_SEPARATOR);
216    }
217
218    /**
219     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
220     * <p>
221     * When encoding the line length and line separator are given in the constructor, and the encoding table is
222     * STANDARD_ENCODE_TABLE.
223     * </p>
224     * <p>
225     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
226     * </p>
227     * <p>
228     * When decoding all variants are supported.
229     * </p>
230     *
231     * @param lineLength
232     *            Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
233     *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
234     *            decoding.
235     * @param lineSeparator
236     *            Each line of encoded data will end with this sequence of bytes.
237     * @throws IllegalArgumentException
238     *             Thrown when the provided lineSeparator included some base64 characters.
239     * @since 1.4
240     */
241    public Base64(final int lineLength, final byte[] lineSeparator) {
242        this(lineLength, lineSeparator, false);
243    }
244
245    /**
246     * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
247     * <p>
248     * When encoding the line length and line separator are given in the constructor, and the encoding table is
249     * STANDARD_ENCODE_TABLE.
250     * </p>
251     * <p>
252     * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
253     * </p>
254     * <p>
255     * When decoding all variants are supported.
256     * </p>
257     *
258     * @param lineLength
259     *            Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
260     *            4). If lineLength &lt;= 0, then the output will not be divided into lines (chunks). Ignored when
261     *            decoding.
262     * @param lineSeparator
263     *            Each line of encoded data will end with this sequence of bytes.
264     * @param urlSafe
265     *            Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
266     *            operations. Decoding seamlessly handles both modes.
267     *            <b>Note: no padding is added when using the URL-safe alphabet.</b>
268     * @throws IllegalArgumentException
269     *             The provided lineSeparator included some base64 characters. That's not going to work!
270     * @since 1.4
271     */
272    public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe) {
273        super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK,
274                lineLength,
275                lineSeparator == null ? 0 : lineSeparator.length);
276        // TODO could be simplified if there is no requirement to reject invalid line sep when length <=0
277        // @see test case Base64Test.testConstructors()
278        if (lineSeparator != null) {
279            if (containsAlphabetOrPad(lineSeparator)) {
280                final String sep = StringUtils.newStringUtf8(lineSeparator);
281                throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]");
282            }
283            if (lineLength > 0){ // null line-sep forces no chunking rather than throwing IAE
284                this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
285                this.lineSeparator = new byte[lineSeparator.length];
286                System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length);
287            } else {
288                this.encodeSize = BYTES_PER_ENCODED_BLOCK;
289                this.lineSeparator = null;
290            }
291        } else {
292            this.encodeSize = BYTES_PER_ENCODED_BLOCK;
293            this.lineSeparator = null;
294        }
295        this.decodeSize = this.encodeSize - 1;
296        this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
297    }
298
299    /**
300     * Returns our current encode mode. True if we're URL-SAFE, false otherwise.
301     *
302     * @return true if we're in URL-SAFE mode, false otherwise.
303     * @since 1.4
304     */
305    public boolean isUrlSafe() {
306        return this.encodeTable == URL_SAFE_ENCODE_TABLE;
307    }
308
309    /**
310     * <p>
311     * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with
312     * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, to flush last
313     * remaining bytes (if not multiple of 3).
314     * </p>
315     * <p><b>Note: no padding is added when encoding using the URL-safe alphabet.</b></p>
316     * <p>
317     * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
318     * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
319     * </p>
320     *
321     * @param in
322     *            byte[] array of binary data to base64 encode.
323     * @param inPos
324     *            Position to start reading data from.
325     * @param inAvail
326     *            Amount of bytes available from input for encoding.
327     * @param context
328     *            the context to be used
329     */
330    @Override
331    void encode(final byte[] in, int inPos, final int inAvail, final Context context) {
332        if (context.eof) {
333            return;
334        }
335        // inAvail < 0 is how we're informed of EOF in the underlying data we're
336        // encoding.
337        if (inAvail < 0) {
338            context.eof = true;
339            if (0 == context.modulus && lineLength == 0) {
340                return; // no leftovers to process and not using chunking
341            }
342            final byte[] buffer = ensureBufferSize(encodeSize, context);
343            final int savedPos = context.pos;
344            switch (context.modulus) { // 0-2
345                case 0 : // nothing to do here
346                    break;
347                case 1 : // 8 bits = 6 + 2
348                    // top 6 bits:
349                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 2) & MASK_6BITS];
350                    // remaining 2:
351                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 4) & MASK_6BITS];
352                    // URL-SAFE skips the padding to further reduce size.
353                    if (encodeTable == STANDARD_ENCODE_TABLE) {
354                        buffer[context.pos++] = pad;
355                        buffer[context.pos++] = pad;
356                    }
357                    break;
358
359                case 2 : // 16 bits = 6 + 6 + 4
360                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 10) & MASK_6BITS];
361                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 4) & MASK_6BITS];
362                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 2) & MASK_6BITS];
363                    // URL-SAFE skips the padding to further reduce size.
364                    if (encodeTable == STANDARD_ENCODE_TABLE) {
365                        buffer[context.pos++] = pad;
366                    }
367                    break;
368                default:
369                    throw new IllegalStateException("Impossible modulus "+context.modulus);
370            }
371            context.currentLinePos += context.pos - savedPos; // keep track of current line position
372            // if currentPos == 0 we are at the start of a line, so don't add CRLF
373            if (lineLength > 0 && context.currentLinePos > 0) {
374                System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
375                context.pos += lineSeparator.length;
376            }
377        } else {
378            for (int i = 0; i < inAvail; i++) {
379                final byte[] buffer = ensureBufferSize(encodeSize, context);
380                context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK;
381                int b = in[inPos++];
382                if (b < 0) {
383                    b += 256;
384                }
385                context.ibitWorkArea = (context.ibitWorkArea << 8) + b; //  BITS_PER_BYTE
386                if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract
387                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 18) & MASK_6BITS];
388                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 12) & MASK_6BITS];
389                    buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 6) & MASK_6BITS];
390                    buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS];
391                    context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
392                    if (lineLength > 0 && lineLength <= context.currentLinePos) {
393                        System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
394                        context.pos += lineSeparator.length;
395                        context.currentLinePos = 0;
396                    }
397                }
398            }
399        }
400    }
401
402    /**
403     * <p>
404     * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once
405     * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1"
406     * call is not necessary when decoding, but it doesn't hurt, either.
407     * </p>
408     * <p>
409     * Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are
410     * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in,
411     * garbage-out philosophy: it will not check the provided data for validity.
412     * </p>
413     * <p>
414     * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
415     * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
416     * </p>
417     *
418     * @param in
419     *            byte[] array of ascii data to base64 decode.
420     * @param inPos
421     *            Position to start reading data from.
422     * @param inAvail
423     *            Amount of bytes available from input for encoding.
424     * @param context
425     *            the context to be used
426     */
427    @Override
428    void decode(final byte[] in, int inPos, final int inAvail, final Context context) {
429        if (context.eof) {
430            return;
431        }
432        if (inAvail < 0) {
433            context.eof = true;
434        }
435        for (int i = 0; i < inAvail; i++) {
436            final byte[] buffer = ensureBufferSize(decodeSize, context);
437            final byte b = in[inPos++];
438            if (b == pad) {
439                // We're done.
440                context.eof = true;
441                break;
442            } else {
443                if (b >= 0 && b < DECODE_TABLE.length) {
444                    final int result = DECODE_TABLE[b];
445                    if (result >= 0) {
446                        context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK;
447                        context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result;
448                        if (context.modulus == 0) {
449                            buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 16) & MASK_8BITS);
450                            buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
451                            buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
452                        }
453                    }
454                }
455            }
456        }
457
458        // Two forms of EOF as far as base64 decoder is concerned: actual
459        // EOF (-1) and first time '=' character is encountered in stream.
460        // This approach makes the '=' padding characters completely optional.
461        if (context.eof && context.modulus != 0) {
462            final byte[] buffer = ensureBufferSize(decodeSize, context);
463
464            // We have some spare bits remaining
465            // Output all whole multiples of 8 bits and ignore the rest
466            switch (context.modulus) {
467//              case 0 : // impossible, as excluded above
468                case 1 : // 6 bits - ignore entirely
469                    // TODO not currently tested; perhaps it is impossible?
470                    break;
471                case 2 : // 12 bits = 8 + 4
472                    context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits
473                    buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
474                    break;
475                case 3 : // 18 bits = 8 + 8 + 2
476                    context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits
477                    buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
478                    buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
479                    break;
480                default:
481                    throw new IllegalStateException("Impossible modulus "+context.modulus);
482            }
483        }
484    }
485
486    /**
487     * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
488     * method treats whitespace as valid.
489     *
490     * @param arrayOctet
491     *            byte array to test
492     * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
493     *         <code>false</code>, otherwise
494     * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0.
495     */
496    @Deprecated
497    public static boolean isArrayByteBase64(final byte[] arrayOctet) {
498        return isBase64(arrayOctet);
499    }
500
501    /**
502     * Returns whether or not the <code>octet</code> is in the base 64 alphabet.
503     *
504     * @param octet
505     *            The value to test
506     * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise.
507     * @since 1.4
508     */
509    public static boolean isBase64(final byte octet) {
510        return octet == PAD_DEFAULT || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1);
511    }
512
513    /**
514     * Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the
515     * method treats whitespace as valid.
516     *
517     * @param base64
518     *            String to test
519     * @return <code>true</code> if all characters in the String are valid characters in the Base64 alphabet or if
520     *         the String is empty; <code>false</code>, otherwise
521     *  @since 1.5
522     */
523    public static boolean isBase64(final String base64) {
524        return isBase64(StringUtils.getBytesUtf8(base64));
525    }
526
527    /**
528     * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
529     * method treats whitespace as valid.
530     *
531     * @param arrayOctet
532     *            byte array to test
533     * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
534     *         <code>false</code>, otherwise
535     * @since 1.5
536     */
537    public static boolean isBase64(final byte[] arrayOctet) {
538        for (int i = 0; i < arrayOctet.length; i++) {
539            if (!isBase64(arrayOctet[i]) && !isWhiteSpace(arrayOctet[i])) {
540                return false;
541            }
542        }
543        return true;
544    }
545
546    /**
547     * Encodes binary data using the base64 algorithm but does not chunk the output.
548     *
549     * @param binaryData
550     *            binary data to encode
551     * @return byte[] containing Base64 characters in their UTF-8 representation.
552     */
553    public static byte[] encodeBase64(final byte[] binaryData) {
554        return encodeBase64(binaryData, false);
555    }
556
557    /**
558     * Encodes binary data using the base64 algorithm but does not chunk the output.
559     *
560     * NOTE:  We changed the behaviour of this method from multi-line chunking (commons-codec-1.4) to
561     * single-line non-chunking (commons-codec-1.5).
562     *
563     * @param binaryData
564     *            binary data to encode
565     * @return String containing Base64 characters.
566     * @since 1.4 (NOTE:  1.4 chunked the output, whereas 1.5 does not).
567     */
568    public static String encodeBase64String(final byte[] binaryData) {
569        return StringUtils.newStringUtf8(encodeBase64(binaryData, false));
570    }
571
572    /**
573     * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
574     * url-safe variation emits - and _ instead of + and / characters.
575     * <b>Note: no padding is added.</b>
576     * @param binaryData
577     *            binary data to encode
578     * @return byte[] containing Base64 characters in their UTF-8 representation.
579     * @since 1.4
580     */
581    public static byte[] encodeBase64URLSafe(final byte[] binaryData) {
582        return encodeBase64(binaryData, false, true);
583    }
584
585    /**
586     * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
587     * url-safe variation emits - and _ instead of + and / characters.
588     * <b>Note: no padding is added.</b>
589     * @param binaryData
590     *            binary data to encode
591     * @return String containing Base64 characters
592     * @since 1.4
593     */
594    public static String encodeBase64URLSafeString(final byte[] binaryData) {
595        return StringUtils.newStringUtf8(encodeBase64(binaryData, false, true));
596    }
597
598    /**
599     * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
600     *
601     * @param binaryData
602     *            binary data to encode
603     * @return Base64 characters chunked in 76 character blocks
604     */
605    public static byte[] encodeBase64Chunked(final byte[] binaryData) {
606        return encodeBase64(binaryData, true);
607    }
608
609    /**
610     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
611     *
612     * @param binaryData
613     *            Array containing binary data to encode.
614     * @param isChunked
615     *            if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
616     * @return Base64-encoded data.
617     * @throws IllegalArgumentException
618     *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
619     */
620    public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked) {
621        return encodeBase64(binaryData, isChunked, false);
622    }
623
624    /**
625     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
626     *
627     * @param binaryData
628     *            Array containing binary data to encode.
629     * @param isChunked
630     *            if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
631     * @param urlSafe
632     *            if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters.
633     *            <b>Note: no padding is added when encoding using the URL-safe alphabet.</b>
634     * @return Base64-encoded data.
635     * @throws IllegalArgumentException
636     *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
637     * @since 1.4
638     */
639    public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe) {
640        return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
641    }
642
643    /**
644     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
645     *
646     * @param binaryData
647     *            Array containing binary data to encode.
648     * @param isChunked
649     *            if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
650     * @param urlSafe
651     *            if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters.
652     *            <b>Note: no padding is added when encoding using the URL-safe alphabet.</b>
653     * @param maxResultSize
654     *            The maximum result size to accept.
655     * @return Base64-encoded data.
656     * @throws IllegalArgumentException
657     *             Thrown when the input array needs an output array bigger than maxResultSize
658     * @since 1.4
659     */
660    public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked,
661                                      final boolean urlSafe, final int maxResultSize) {
662        if (binaryData == null || binaryData.length == 0) {
663            return binaryData;
664        }
665
666        // Create this so can use the super-class method
667        // Also ensures that the same roundings are performed by the ctor and the code
668        final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
669        final long len = b64.getEncodedLength(binaryData);
670        if (len > maxResultSize) {
671            throw new IllegalArgumentException("Input array too big, the output array would be bigger (" +
672                len +
673                ") than the specified maximum size of " +
674                maxResultSize);
675        }
676
677        return b64.encode(binaryData);
678    }
679
680    /**
681     * Decodes a Base64 String into octets.
682     * <p>
683     * <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode.
684     * </p>
685     *
686     * @param base64String
687     *            String containing Base64 data
688     * @return Array containing decoded data.
689     * @since 1.4
690     */
691    public static byte[] decodeBase64(final String base64String) {
692        return new Base64().decode(base64String);
693    }
694
695    /**
696     * Decodes Base64 data into octets.
697     * <p>
698     * <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode.
699     * </p>
700     *
701     * @param base64Data
702     *            Byte array containing Base64 data
703     * @return Array containing decoded data.
704     */
705    public static byte[] decodeBase64(final byte[] base64Data) {
706        return new Base64().decode(base64Data);
707    }
708
709    // Implementation of the Encoder Interface
710
711    // Implementation of integer encoding used for crypto
712    /**
713     * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
714     *
715     * @param pArray
716     *            a byte array containing base64 character data
717     * @return A BigInteger
718     * @since 1.4
719     */
720    public static BigInteger decodeInteger(final byte[] pArray) {
721        return new BigInteger(1, decodeBase64(pArray));
722    }
723
724    /**
725     * Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
726     *
727     * @param bigInt
728     *            a BigInteger
729     * @return A byte array containing base64 character data
730     * @throws NullPointerException
731     *             if null is passed in
732     * @since 1.4
733     */
734    public static byte[] encodeInteger(final BigInteger bigInt) {
735        if (bigInt == null) {
736            throw new NullPointerException("encodeInteger called with null parameter");
737        }
738        return encodeBase64(toIntegerBytes(bigInt), false);
739    }
740
741    /**
742     * Returns a byte-array representation of a <code>BigInteger</code> without sign bit.
743     *
744     * @param bigInt
745     *            <code>BigInteger</code> to be converted
746     * @return a byte array representation of the BigInteger parameter
747     */
748    static byte[] toIntegerBytes(final BigInteger bigInt) {
749        int bitlen = bigInt.bitLength();
750        // round bitlen
751        bitlen = ((bitlen + 7) >> 3) << 3;
752        final byte[] bigBytes = bigInt.toByteArray();
753
754        if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) {
755            return bigBytes;
756        }
757        // set up params for copying everything but sign bit
758        int startSrc = 0;
759        int len = bigBytes.length;
760
761        // if bigInt is exactly byte-aligned, just skip signbit in copy
762        if ((bigInt.bitLength() % 8) == 0) {
763            startSrc = 1;
764            len--;
765        }
766        final int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
767        final byte[] resizedBytes = new byte[bitlen / 8];
768        System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
769        return resizedBytes;
770    }
771
772    /**
773     * Returns whether or not the <code>octet</code> is in the Base64 alphabet.
774     *
775     * @param octet
776     *            The value to test
777     * @return <code>true</code> if the value is defined in the the Base64 alphabet <code>false</code> otherwise.
778     */
779    @Override
780    protected boolean isInAlphabet(final byte octet) {
781        return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
782    }
783
784}