View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import org.apache.commons.codec.BinaryDecoder;
21  import org.apache.commons.codec.BinaryEncoder;
22  import org.apache.commons.codec.DecoderException;
23  import org.apache.commons.codec.EncoderException;
24  
25  /**
26   * Provides Base64 encoding and decoding as defined by RFC 2045.
27   * 
28   * <p>
29   * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
30   * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
31   * </p>
32   * 
33   * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
34   * @author Apache Software Foundation
35   * @since 1.0-dev
36   * @version $Id: Base64.java 618419 2008-02-04 20:20:41Z ggregory $
37   */
38  public class Base64 implements BinaryEncoder, BinaryDecoder {
39  
40      /**
41       * Chunk size per RFC 2045 section 6.8.
42       * 
43       * <p>
44       * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
45       * equal signs.
46       * </p>
47       * 
48       * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
49       */
50      static final int CHUNK_SIZE = 76;
51  
52      /**
53       * Chunk separator per RFC 2045 section 2.1.
54       * 
55       * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
56       */
57      static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes();
58  
59      /**
60       * The base length.
61       */
62      private static final int BASELENGTH = 255;
63  
64      /**
65       * Lookup length.
66       */
67      private static final int LOOKUPLENGTH = 64;
68  
69      /**
70       * Used to calculate the number of bits in a byte.
71       */
72      private static final int EIGHTBIT = 8;
73  
74      /**
75       * Used when encoding something which has fewer than 24 bits.
76       */
77      private static final int SIXTEENBIT = 16;
78  
79      /**
80       * Used to determine how many bits data contains.
81       */
82      private static final int TWENTYFOURBITGROUP = 24;
83  
84      /**
85       * Used to get the number of Quadruples.
86       */
87      private static final int FOURBYTE = 4;
88  
89      /**
90       * Used to test the sign of a byte.
91       */
92      private static final int SIGN = -128;
93  
94      /**
95       * Byte used to pad output.
96       */
97      private static final byte PAD = (byte) '=';
98  
99      /**
100      * Contains the Base64 values <code>0</code> through <code>63</code> accessed by using character encodings as
101      * indices.
102      * <p>
103      * For example, <code>base64Alphabet['+']</code> returns <code>62</code>.
104      * </p>
105      * <p>
106      * The value of undefined encodings is <code>-1</code>.
107      * </p>
108      */
109     private static final byte[] base64Alphabet = new byte[BASELENGTH];
110 
111     /**
112      * <p>
113      * Contains the Base64 encodings <code>A</code> through <code>Z</code>, followed by <code>a</code> through
114      * <code>z</code>, followed by <code>0</code> through <code>9</code>, followed by <code>+</code>, and
115      * <code>/</code>.
116      * </p>
117      * <p>
118      * This array is accessed by using character values as indices.
119      * </p>
120      * <p>
121      * For example, <code>lookUpBase64Alphabet[62] </code> returns <code>'+'</code>.
122      * </p>
123      */
124     private static final byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH];
125 
126     // Populating the lookup and character arrays
127     static {
128         for (int i = 0; i < BASELENGTH; i++) {
129             base64Alphabet[i] = (byte) -1;
130         }
131         for (int i = 'Z'; i >= 'A'; i--) {
132             base64Alphabet[i] = (byte) (i - 'A');
133         }
134         for (int i = 'z'; i >= 'a'; i--) {
135             base64Alphabet[i] = (byte) (i - 'a' + 26);
136         }
137         for (int i = '9'; i >= '0'; i--) {
138             base64Alphabet[i] = (byte) (i - '0' + 52);
139         }
140 
141         base64Alphabet['+'] = 62;
142         base64Alphabet['/'] = 63;
143 
144         for (int i = 0; i <= 25; i++) {
145             lookUpBase64Alphabet[i] = (byte) ('A' + i);
146         }
147 
148         for (int i = 26, j = 0; i <= 51; i++, j++) {
149             lookUpBase64Alphabet[i] = (byte) ('a' + j);
150         }
151 
152         for (int i = 52, j = 0; i <= 61; i++, j++) {
153             lookUpBase64Alphabet[i] = (byte) ('0' + j);
154         }
155 
156         lookUpBase64Alphabet[62] = (byte) '+';
157         lookUpBase64Alphabet[63] = (byte) '/';
158     }
159 
160     /**
161      * Returns whether or not the <code>octect</code> is in the base 64 alphabet.
162      * 
163      * @param octect
164      *            The value to test
165      * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise.
166      */
167     private static boolean isBase64(byte octect) {
168         if (octect == PAD) {
169             return true;
170         } else if (octect < 0 || base64Alphabet[octect] == -1) {
171             return false;
172         } else {
173             return true;
174         }
175     }
176 
177     /**
178      * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet.
179      * 
180      * @param arrayOctect
181      *            byte array to test
182      * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is
183      *         empty; false, otherwise
184      */
185     public static boolean isArrayByteBase64(byte[] arrayOctect) {
186 
187         arrayOctect = discardWhitespace(arrayOctect);
188 
189         int length = arrayOctect.length;
190         if (length == 0) {
191             // shouldn't a 0 length array be valid base64 data?
192             // return false;
193             return true;
194         }
195         for (int i = 0; i < length; i++) {
196             if (!isBase64(arrayOctect[i])) {
197                 return false;
198             }
199         }
200         return true;
201     }
202 
203     /**
204      * Encodes binary data using the base64 algorithm but does not chunk the output.
205      * 
206      * @param binaryData
207      *            binary data to encode
208      * @return Base64 characters
209      */
210     public static byte[] encodeBase64(byte[] binaryData) {
211         return encodeBase64(binaryData, false);
212     }
213 
214     /**
215      * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
216      * 
217      * @param binaryData
218      *            binary data to encode
219      * @return Base64 characters chunked in 76 character blocks
220      */
221     public static byte[] encodeBase64Chunked(byte[] binaryData) {
222         return encodeBase64(binaryData, true);
223     }
224 
225     /**
226      * Decodes an Object using the base64 algorithm. This method is provided in order to satisfy the requirements of the
227      * Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[].
228      * 
229      * @param pObject
230      *            Object to decode
231      * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] supplied.
232      * @throws DecoderException
233      *             if the parameter supplied is not of type byte[]
234      */
235     public Object decode(Object pObject) throws DecoderException {
236         if (!(pObject instanceof byte[])) {
237             throw new DecoderException("Parameter supplied to Base64 decode is not a byte[]");
238         }
239         return decode((byte[]) pObject);
240     }
241 
242     /**
243      * Decodes a byte[] containing containing characters in the Base64 alphabet.
244      * 
245      * @param pArray
246      *            A byte array containing Base64 character data
247      * @return a byte array containing binary data
248      */
249     public byte[] decode(byte[] pArray) {
250         return decodeBase64(pArray);
251     }
252 
253     /**
254      * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
255      * 
256      * @param binaryData
257      *            Array containing binary data to encode.
258      * @param isChunked
259      *            if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
260      * @return Base64-encoded data.
261      * @throws IllegalArgumentException
262      *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
263      */
264     public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
265         long binaryDataLength = binaryData.length;
266         long lengthDataBits = binaryDataLength * EIGHTBIT;
267         long fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
268         long tripletCount = lengthDataBits / TWENTYFOURBITGROUP;
269         long encodedDataLengthLong = 0;
270         int chunckCount = 0;
271 
272         if (fewerThan24bits != 0) {
273             // data not divisible by 24 bit
274             encodedDataLengthLong = (tripletCount + 1) * 4;
275         } else {
276             // 16 or 8 bit
277             encodedDataLengthLong = tripletCount * 4;
278         }
279 
280         // If the output is to be "chunked" into 76 character sections,
281         // for compliance with RFC 2045 MIME, then it is important to
282         // allow for extra length to account for the separator(s)
283         if (isChunked) {
284 
285             chunckCount = (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math
286                     .ceil((float) encodedDataLengthLong / CHUNK_SIZE));
287             encodedDataLengthLong += chunckCount * CHUNK_SEPARATOR.length;
288         }
289 
290         if (encodedDataLengthLong > Integer.MAX_VALUE) {
291             throw new IllegalArgumentException(
292                     "Input array too big, output array would be bigger than Integer.MAX_VALUE=" + Integer.MAX_VALUE);
293         }
294         int encodedDataLength = (int) encodedDataLengthLong;
295         byte encodedData[] = new byte[encodedDataLength];
296 
297         byte k = 0, l = 0, b1 = 0, b2 = 0, b3 = 0;
298 
299         int encodedIndex = 0;
300         int dataIndex = 0;
301         int i = 0;
302         int nextSeparatorIndex = CHUNK_SIZE;
303         int chunksSoFar = 0;
304 
305         // log.debug("number of triplets = " + numberTriplets);
306         for (i = 0; i < tripletCount; i++) {
307             dataIndex = i * 3;
308             b1 = binaryData[dataIndex];
309             b2 = binaryData[dataIndex + 1];
310             b3 = binaryData[dataIndex + 2];
311 
312             // log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3);
313 
314             l = (byte) (b2 & 0x0f);
315             k = (byte) (b1 & 0x03);
316 
317             byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
318             byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
319             byte val3 = ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc);
320 
321             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
322             // log.debug( "val2 = " + val2 );
323             // log.debug( "k4 = " + (k<<4) );
324             // log.debug( "vak = " + (val2 | (k<<4)) );
325             encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
326             encodedData[encodedIndex + 2] = lookUpBase64Alphabet[(l << 2) | val3];
327             encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
328 
329             encodedIndex += 4;
330 
331             // If we are chunking, let's put a chunk separator down.
332             if (isChunked) {
333                 // this assumes that CHUNK_SIZE % 4 == 0
334                 if (encodedIndex == nextSeparatorIndex) {
335                     System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedIndex, CHUNK_SEPARATOR.length);
336                     chunksSoFar++;
337                     nextSeparatorIndex = (CHUNK_SIZE * (chunksSoFar + 1)) + (chunksSoFar * CHUNK_SEPARATOR.length);
338                     encodedIndex += CHUNK_SEPARATOR.length;
339                 }
340             }
341         }
342 
343         // form integral number of 6-bit groups
344         dataIndex = i * 3;
345 
346         if (fewerThan24bits == EIGHTBIT) {
347             b1 = binaryData[dataIndex];
348             k = (byte) (b1 & 0x03);
349             // log.debug("b1=" + b1);
350             // log.debug("b1<<2 = " + (b1>>2) );
351             byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
352             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
353             encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
354             encodedData[encodedIndex + 2] = PAD;
355             encodedData[encodedIndex + 3] = PAD;
356         } else if (fewerThan24bits == SIXTEENBIT) {
357 
358             b1 = binaryData[dataIndex];
359             b2 = binaryData[dataIndex + 1];
360             l = (byte) (b2 & 0x0f);
361             k = (byte) (b1 & 0x03);
362 
363             byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
364             byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
365 
366             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
367             encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
368             encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
369             encodedData[encodedIndex + 3] = PAD;
370         }
371 
372         if (isChunked) {
373             // we also add a separator to the end of the final chunk.
374             if (chunksSoFar < chunckCount) {
375                 System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedDataLength - CHUNK_SEPARATOR.length,
376                         CHUNK_SEPARATOR.length);
377             }
378         }
379 
380         return encodedData;
381     }
382 
383     /**
384      * Decodes Base64 data into octects
385      * 
386      * @param base64Data
387      *            Byte array containing Base64 data
388      * @return Array containing decoded data.
389      */
390     public static byte[] decodeBase64(byte[] base64Data) {
391         // RFC 2045 requires that we discard ALL non-Base64 characters
392         base64Data = discardNonBase64(base64Data);
393 
394         // handle the edge case, so we don't have to worry about it later
395         if (base64Data.length == 0) {
396             return new byte[0];
397         }
398 
399         int numberQuadruple = base64Data.length / FOURBYTE;
400         byte decodedData[] = null;
401         byte b1 = 0, b2 = 0, b3 = 0, b4 = 0, marker0 = 0, marker1 = 0;
402 
403         // Throw away anything not in base64Data
404 
405         int encodedIndex = 0;
406         int dataIndex = 0;
407         {
408             // this sizes the output array properly - rlw
409             int lastData = base64Data.length;
410             // ignore the '=' padding
411             while (base64Data[lastData - 1] == PAD) {
412                 if (--lastData == 0) {
413                     return new byte[0];
414                 }
415             }
416             decodedData = new byte[lastData - numberQuadruple];
417         }
418 
419         for (int i = 0; i < numberQuadruple; i++) {
420             dataIndex = i * 4;
421             marker0 = base64Data[dataIndex + 2];
422             marker1 = base64Data[dataIndex + 3];
423 
424             b1 = base64Alphabet[base64Data[dataIndex]];
425             b2 = base64Alphabet[base64Data[dataIndex + 1]];
426 
427             if (marker0 != PAD && marker1 != PAD) {
428                 // No PAD e.g 3cQl
429                 b3 = base64Alphabet[marker0];
430                 b4 = base64Alphabet[marker1];
431 
432                 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
433                 decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
434                 decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);
435             } else if (marker0 == PAD) {
436                 // Two PAD e.g. 3c[Pad][Pad]
437                 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
438             } else if (marker1 == PAD) {
439                 // One PAD e.g. 3cQ[Pad]
440                 b3 = base64Alphabet[marker0];
441 
442                 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
443                 decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
444             }
445             encodedIndex += 3;
446         }
447         return decodedData;
448     }
449 
450     /**
451      * Discards any whitespace from a base-64 encoded block.
452      * 
453      * @param data
454      *            The base-64 encoded data to discard the whitespace from.
455      * @return The data, less whitespace (see RFC 2045).
456      */
457     static byte[] discardWhitespace(byte[] data) {
458         byte groomedData[] = new byte[data.length];
459         int bytesCopied = 0;
460 
461         for (int i = 0; i < data.length; i++) {
462             switch (data[i]) {
463                 case (byte) ' ' :
464                 case (byte) '\n' :
465                 case (byte) '\r' :
466                 case (byte) '\t' :
467                     break;
468                 default :
469                     groomedData[bytesCopied++] = data[i];
470             }
471         }
472 
473         byte packedData[] = new byte[bytesCopied];
474 
475         System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
476 
477         return packedData;
478     }
479 
480     /**
481      * Discards any characters outside of the base64 alphabet, per the requirements on page 25 of RFC 2045 - "Any
482      * characters outside of the base64 alphabet are to be ignored in base64 encoded data."
483      * 
484      * @param data
485      *            The base-64 encoded data to groom
486      * @return The data, less non-base64 characters (see RFC 2045).
487      */
488     static byte[] discardNonBase64(byte[] data) {
489         byte groomedData[] = new byte[data.length];
490         int bytesCopied = 0;
491 
492         for (int i = 0; i < data.length; i++) {
493             if (isBase64(data[i])) {
494                 groomedData[bytesCopied++] = data[i];
495             }
496         }
497 
498         byte packedData[] = new byte[bytesCopied];
499 
500         System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
501 
502         return packedData;
503     }
504 
505     // Implementation of the Encoder Interface
506 
507     /**
508      * Encodes an Object using the base64 algorithm. This method is provided in order to satisfy the requirements of the
509      * Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
510      * 
511      * @param pObject
512      *            Object to encode
513      * @return An object (of type byte[]) containing the base64 encoded data which corresponds to the byte[] supplied.
514      * @throws EncoderException
515      *             if the parameter supplied is not of type byte[]
516      */
517     public Object encode(Object pObject) throws EncoderException {
518         if (!(pObject instanceof byte[])) {
519             throw new EncoderException("Parameter supplied to Base64 encode is not a byte[]");
520         }
521         return encode((byte[]) pObject);
522     }
523 
524     /**
525      * Encodes a byte[] containing binary data, into a byte[] containing characters in the Base64 alphabet.
526      * 
527      * @param pArray
528      *            a byte array containing binary data
529      * @return A byte array containing only Base64 character data
530      */
531     public byte[] encode(byte[] pArray) {
532         return encodeBase64(pArray, false);
533     }
534 
535 }