1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.codec.binary; 19 20 import java.math.BigInteger; 21 import java.util.Objects; 22 23 import org.apache.commons.codec.CodecPolicy; 24 25 /** 26 * Provides Base64 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>. 27 * 28 * <p> 29 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose 30 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein. 31 * </p> 32 * <p> 33 * The class can be parameterized in the following manner with various constructors: 34 * </p> 35 * <ul> 36 * <li>URL-safe mode: Default off.</li> 37 * <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of 38 * 4 in the encoded data. 39 * <li>Line separator: Default is CRLF ("\r\n")</li> 40 * </ul> 41 * <p> 42 * The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes. 43 * </p> 44 * <p> 45 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only 46 * encode/decode character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, 47 * UTF-8, etc). 48 * </p> 49 * <p> 50 * This class is thread-safe. 51 * </p> 52 * 53 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a> 54 * @since 1.0 55 */ 56 public class Base64 extends BaseNCodec { 57 58 /** 59 * BASE64 characters are 6 bits in length. 60 * They are formed by taking a block of 3 octets to form a 24-bit string, 61 * which is converted into 4 BASE64 characters. 62 */ 63 private static final int BITS_PER_ENCODED_BYTE = 6; 64 private static final int BYTES_PER_UNENCODED_BLOCK = 3; 65 private static final int BYTES_PER_ENCODED_BLOCK = 4; 66 67 /** 68 * This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet" 69 * equivalents as specified in Table 1 of RFC 2045. 70 * <p> 71 * Thanks to "commons" project in ws.apache.org for this code. 72 * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 73 * </p> 74 */ 75 private static final byte[] STANDARD_ENCODE_TABLE = { 76 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 77 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 78 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 79 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 80 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' 81 }; 82 83 /** 84 * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and / 85 * changed to - and _ to make the encoded Base64 results more URL-SAFE. 86 * This table is only used when the Base64's mode is set to URL-SAFE. 87 */ 88 private static final byte[] URL_SAFE_ENCODE_TABLE = { 89 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 90 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 91 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 92 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 93 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' 94 }; 95 96 /** 97 * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified 98 * in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64 99 * alphabet but fall within the bounds of the array are translated to -1. 100 * <p> 101 * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both 102 * URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit). 103 * </p> 104 * <p> 105 * Thanks to "commons" project in ws.apache.org for this code. 106 * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 107 * </p> 108 */ 109 private static final byte[] DECODE_TABLE = { 110 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 111 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 112 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 113 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, // 20-2f + - / 114 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 115 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O 116 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _ 117 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o 118 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z 119 }; 120 121 // The static final fields above are used for the original static byte[] methods on Base64. 122 // The private member fields below are used with the new streaming approach, which requires 123 // some state be preserved between calls of encode() and decode(). 124 125 /** 126 * Base64 uses 6-bit fields. 127 */ 128 /** Mask used to extract 6 bits, used when encoding */ 129 private static final int MASK_6BITS = 0x3f; 130 /** Mask used to extract 4 bits, used when decoding final trailing character. */ 131 private static final int MASK_4BITS = 0xf; 132 /** Mask used to extract 2 bits, used when decoding final trailing character. */ 133 private static final int MASK_2BITS = 0x3; 134 135 /** 136 * Decodes Base64 data into octets. 137 * <p> 138 * <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode. 139 * </p> 140 * 141 * @param base64Data 142 * Byte array containing Base64 data 143 * @return Array containing decoded data. 144 */ 145 public static byte[] decodeBase64(final byte[] base64Data) { 146 return new Base64().decode(base64Data); 147 } 148 149 /** 150 * Decodes a Base64 String into octets. 151 * <p> 152 * <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode. 153 * </p> 154 * 155 * @param base64String 156 * String containing Base64 data 157 * @return Array containing decoded data. 158 * @since 1.4 159 */ 160 public static byte[] decodeBase64(final String base64String) { 161 return new Base64().decode(base64String); 162 } 163 164 // Implementation of integer encoding used for crypto 165 /** 166 * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature. 167 * 168 * @param pArray 169 * a byte array containing base64 character data 170 * @return A BigInteger 171 * @since 1.4 172 */ 173 public static BigInteger decodeInteger(final byte[] pArray) { 174 return new BigInteger(1, decodeBase64(pArray)); 175 } 176 177 /** 178 * Encodes binary data using the base64 algorithm but does not chunk the output. 179 * 180 * @param binaryData 181 * binary data to encode 182 * @return byte[] containing Base64 characters in their UTF-8 representation. 183 */ 184 public static byte[] encodeBase64(final byte[] binaryData) { 185 return encodeBase64(binaryData, false); 186 } 187 188 /** 189 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. 190 * 191 * @param binaryData 192 * Array containing binary data to encode. 193 * @param isChunked 194 * if {@code true} this encoder will chunk the base64 output into 76 character blocks 195 * @return Base64-encoded data. 196 * @throws IllegalArgumentException 197 * Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE} 198 */ 199 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked) { 200 return encodeBase64(binaryData, isChunked, false); 201 } 202 203 /** 204 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. 205 * 206 * @param binaryData 207 * Array containing binary data to encode. 208 * @param isChunked 209 * if {@code true} this encoder will chunk the base64 output into 76 character blocks 210 * @param urlSafe 211 * if {@code true} this encoder will emit - and _ instead of the usual + and / characters. 212 * <b>Note: no padding is added when encoding using the URL-safe alphabet.</b> 213 * @return Base64-encoded data. 214 * @throws IllegalArgumentException 215 * Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE} 216 * @since 1.4 217 */ 218 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe) { 219 return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE); 220 } 221 222 /** 223 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. 224 * 225 * @param binaryData 226 * Array containing binary data to encode. 227 * @param isChunked 228 * if {@code true} this encoder will chunk the base64 output into 76 character blocks 229 * @param urlSafe 230 * if {@code true} this encoder will emit - and _ instead of the usual + and / characters. 231 * <b>Note: no padding is added when encoding using the URL-safe alphabet.</b> 232 * @param maxResultSize 233 * The maximum result size to accept. 234 * @return Base64-encoded data. 235 * @throws IllegalArgumentException 236 * Thrown when the input array needs an output array bigger than maxResultSize 237 * @since 1.4 238 */ 239 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, 240 final boolean urlSafe, final int maxResultSize) { 241 if (BinaryCodec.isEmpty(binaryData)) { 242 return binaryData; 243 } 244 245 // Create this so can use the super-class method 246 // Also ensures that the same roundings are performed by the ctor and the code 247 final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe); 248 final long len = b64.getEncodedLength(binaryData); 249 if (len > maxResultSize) { 250 throw new IllegalArgumentException("Input array too big, the output array would be bigger (" + 251 len + 252 ") than the specified maximum size of " + 253 maxResultSize); 254 } 255 256 return b64.encode(binaryData); 257 } 258 259 /** 260 * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks 261 * 262 * @param binaryData 263 * binary data to encode 264 * @return Base64 characters chunked in 76 character blocks 265 */ 266 public static byte[] encodeBase64Chunked(final byte[] binaryData) { 267 return encodeBase64(binaryData, true); 268 } 269 270 /** 271 * Encodes binary data using the base64 algorithm but does not chunk the output. 272 * 273 * NOTE: We changed the behavior of this method from multi-line chunking (commons-codec-1.4) to 274 * single-line non-chunking (commons-codec-1.5). 275 * 276 * @param binaryData 277 * binary data to encode 278 * @return String containing Base64 characters. 279 * @since 1.4 (NOTE: 1.4 chunked the output, whereas 1.5 does not). 280 */ 281 public static String encodeBase64String(final byte[] binaryData) { 282 return StringUtils.newStringUsAscii(encodeBase64(binaryData, false)); 283 } 284 285 /** 286 * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The 287 * url-safe variation emits - and _ instead of + and / characters. 288 * <b>Note: no padding is added.</b> 289 * @param binaryData 290 * binary data to encode 291 * @return byte[] containing Base64 characters in their UTF-8 representation. 292 * @since 1.4 293 */ 294 public static byte[] encodeBase64URLSafe(final byte[] binaryData) { 295 return encodeBase64(binaryData, false, true); 296 } 297 298 /** 299 * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The 300 * url-safe variation emits - and _ instead of + and / characters. 301 * <b>Note: no padding is added.</b> 302 * @param binaryData 303 * binary data to encode 304 * @return String containing Base64 characters 305 * @since 1.4 306 */ 307 public static String encodeBase64URLSafeString(final byte[] binaryData) { 308 return StringUtils.newStringUsAscii(encodeBase64(binaryData, false, true)); 309 } 310 311 /** 312 * Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature. 313 * 314 * @param bigInteger 315 * a BigInteger 316 * @return A byte array containing base64 character data 317 * @throws NullPointerException 318 * if null is passed in 319 * @since 1.4 320 */ 321 public static byte[] encodeInteger(final BigInteger bigInteger) { 322 Objects.requireNonNull(bigInteger, "bigInteger"); 323 return encodeBase64(toIntegerBytes(bigInteger), false); 324 } 325 326 /** 327 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the 328 * method treats whitespace as valid. 329 * 330 * @param arrayOctet 331 * byte array to test 332 * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty; 333 * {@code false}, otherwise 334 * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0. 335 */ 336 @Deprecated 337 public static boolean isArrayByteBase64(final byte[] arrayOctet) { 338 return isBase64(arrayOctet); 339 } 340 341 /** 342 * Returns whether or not the {@code octet} is in the base 64 alphabet. 343 * 344 * @param octet 345 * The value to test 346 * @return {@code true} if the value is defined in the base 64 alphabet, {@code false} otherwise. 347 * @since 1.4 348 */ 349 public static boolean isBase64(final byte octet) { 350 return octet == PAD_DEFAULT || octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1; 351 } 352 353 /** 354 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the 355 * method treats whitespace as valid. 356 * 357 * @param arrayOctet 358 * byte array to test 359 * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty; 360 * {@code false}, otherwise 361 * @since 1.5 362 */ 363 public static boolean isBase64(final byte[] arrayOctet) { 364 for (final byte element : arrayOctet) { 365 if (!isBase64(element) && !Character.isWhitespace(element)) { 366 return false; 367 } 368 } 369 return true; 370 } 371 372 /** 373 * Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the 374 * method treats whitespace as valid. 375 * 376 * @param base64 377 * String to test 378 * @return {@code true} if all characters in the String are valid characters in the Base64 alphabet or if 379 * the String is empty; {@code false}, otherwise 380 * @since 1.5 381 */ 382 public static boolean isBase64(final String base64) { 383 return isBase64(StringUtils.getBytesUtf8(base64)); 384 } 385 386 /** 387 * Returns a byte-array representation of a {@code BigInteger} without sign bit. 388 * 389 * @param bigInt 390 * {@code BigInteger} to be converted 391 * @return a byte array representation of the BigInteger parameter 392 */ 393 static byte[] toIntegerBytes(final BigInteger bigInt) { 394 int bitlen = bigInt.bitLength(); 395 // round bitlen 396 bitlen = bitlen + 7 >> 3 << 3; 397 final byte[] bigBytes = bigInt.toByteArray(); 398 399 if (bigInt.bitLength() % 8 != 0 && bigInt.bitLength() / 8 + 1 == bitlen / 8) { 400 return bigBytes; 401 } 402 // set up params for copying everything but sign bit 403 int startSrc = 0; 404 int len = bigBytes.length; 405 406 // if bigInt is exactly byte-aligned, just skip signbit in copy 407 if (bigInt.bitLength() % 8 == 0) { 408 startSrc = 1; 409 len--; 410 } 411 final int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec 412 final byte[] resizedBytes = new byte[bitlen / 8]; 413 System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len); 414 return resizedBytes; 415 } 416 417 /** 418 * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able 419 * to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch 420 * between the two modes. 421 */ 422 private final byte[] encodeTable; 423 424 /** Only one decode table currently; keep for consistency with Base32 code. */ 425 private final byte[] decodeTable = DECODE_TABLE; 426 427 /** 428 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0. 429 */ 430 private final byte[] lineSeparator; 431 432 /** 433 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. 434 * {@code decodeSize = 3 + lineSeparator.length;} 435 */ 436 private final int decodeSize; 437 438 /** 439 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. 440 * {@code encodeSize = 4 + lineSeparator.length;} 441 */ 442 private final int encodeSize; 443 444 /** 445 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 446 * <p> 447 * When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE. 448 * </p> 449 * 450 * <p> 451 * When decoding all variants are supported. 452 * </p> 453 */ 454 public Base64() { 455 this(0); 456 } 457 458 /** 459 * Creates a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode. 460 * <p> 461 * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE. 462 * </p> 463 * 464 * <p> 465 * When decoding all variants are supported. 466 * </p> 467 * 468 * @param urlSafe 469 * if {@code true}, URL-safe encoding is used. In most cases this should be set to 470 * {@code false}. 471 * @since 1.4 472 */ 473 public Base64(final boolean urlSafe) { 474 this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe); 475 } 476 477 /** 478 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 479 * <p> 480 * When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is 481 * STANDARD_ENCODE_TABLE. 482 * </p> 483 * <p> 484 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 485 * </p> 486 * <p> 487 * When decoding all variants are supported. 488 * </p> 489 * 490 * @param lineLength 491 * Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 492 * 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when 493 * decoding. 494 * @since 1.4 495 */ 496 public Base64(final int lineLength) { 497 this(lineLength, CHUNK_SEPARATOR); 498 } 499 500 /** 501 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 502 * <p> 503 * When encoding the line length and line separator are given in the constructor, and the encoding table is 504 * STANDARD_ENCODE_TABLE. 505 * </p> 506 * <p> 507 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 508 * </p> 509 * <p> 510 * When decoding all variants are supported. 511 * </p> 512 * 513 * @param lineLength 514 * Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 515 * 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when 516 * decoding. 517 * @param lineSeparator 518 * Each line of encoded data will end with this sequence of bytes. 519 * @throws IllegalArgumentException 520 * Thrown when the provided lineSeparator included some base64 characters. 521 * @since 1.4 522 */ 523 public Base64(final int lineLength, final byte[] lineSeparator) { 524 this(lineLength, lineSeparator, false); 525 } 526 527 /** 528 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 529 * <p> 530 * When encoding the line length and line separator are given in the constructor, and the encoding table is 531 * STANDARD_ENCODE_TABLE. 532 * </p> 533 * <p> 534 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 535 * </p> 536 * <p> 537 * When decoding all variants are supported. 538 * </p> 539 * 540 * @param lineLength 541 * Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 542 * 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when 543 * decoding. 544 * @param lineSeparator 545 * Each line of encoded data will end with this sequence of bytes. 546 * @param urlSafe 547 * Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode 548 * operations. Decoding seamlessly handles both modes. 549 * <b>Note: no padding is added when using the URL-safe alphabet.</b> 550 * @throws IllegalArgumentException 551 * Thrown when the {@code lineSeparator} contains Base64 characters. 552 * @since 1.4 553 */ 554 public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe) { 555 this(lineLength, lineSeparator, urlSafe, DECODING_POLICY_DEFAULT); 556 } 557 558 /** 559 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 560 * <p> 561 * When encoding the line length and line separator are given in the constructor, and the encoding table is 562 * STANDARD_ENCODE_TABLE. 563 * </p> 564 * <p> 565 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 566 * </p> 567 * <p> 568 * When decoding all variants are supported. 569 * </p> 570 * 571 * @param lineLength 572 * Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 573 * 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when 574 * decoding. 575 * @param lineSeparator 576 * Each line of encoded data will end with this sequence of bytes. 577 * @param urlSafe 578 * Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode 579 * operations. Decoding seamlessly handles both modes. 580 * <b>Note: no padding is added when using the URL-safe alphabet.</b> 581 * @param decodingPolicy The decoding policy. 582 * @throws IllegalArgumentException 583 * Thrown when the {@code lineSeparator} contains Base64 characters. 584 * @since 1.15 585 */ 586 public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe, 587 final CodecPolicy decodingPolicy) { 588 super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 589 lineLength, 590 lineSeparator == null ? 0 : lineSeparator.length, 591 PAD_DEFAULT, 592 decodingPolicy); 593 // TODO could be simplified if there is no requirement to reject invalid line sep when length <=0 594 // @see test case Base64Test.testConstructors() 595 if (lineSeparator != null) { 596 if (containsAlphabetOrPad(lineSeparator)) { 597 final String sep = StringUtils.newStringUtf8(lineSeparator); 598 throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]"); 599 } 600 if (lineLength > 0){ // null line-sep forces no chunking rather than throwing IAE 601 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length; 602 this.lineSeparator = lineSeparator.clone(); 603 } else { 604 this.encodeSize = BYTES_PER_ENCODED_BLOCK; 605 this.lineSeparator = null; 606 } 607 } else { 608 this.encodeSize = BYTES_PER_ENCODED_BLOCK; 609 this.lineSeparator = null; 610 } 611 this.decodeSize = this.encodeSize - 1; 612 this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE; 613 } 614 615 // Implementation of the Encoder Interface 616 617 /** 618 * <p> 619 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once 620 * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" 621 * call is not necessary when decoding, but it doesn't hurt, either. 622 * </p> 623 * <p> 624 * Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are 625 * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in, 626 * garbage-out philosophy: it will not check the provided data for validity. 627 * </p> 628 * <p> 629 * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. 630 * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 631 * </p> 632 * 633 * @param input 634 * byte[] array of ASCII data to base64 decode. 635 * @param inPos 636 * Position to start reading data from. 637 * @param inAvail 638 * Amount of bytes available from input for decoding. 639 * @param context 640 * the context to be used 641 */ 642 @Override 643 void decode(final byte[] input, int inPos, final int inAvail, final Context context) { 644 if (context.eof) { 645 return; 646 } 647 if (inAvail < 0) { 648 context.eof = true; 649 } 650 for (int i = 0; i < inAvail; i++) { 651 final byte[] buffer = ensureBufferSize(decodeSize, context); 652 final byte b = input[inPos++]; 653 if (b == pad) { 654 // We're done. 655 context.eof = true; 656 break; 657 } 658 if (b >= 0 && b < DECODE_TABLE.length) { 659 final int result = DECODE_TABLE[b]; 660 if (result >= 0) { 661 context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK; 662 context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result; 663 if (context.modulus == 0) { 664 buffer[context.pos++] = (byte) (context.ibitWorkArea >> 16 & MASK_8BITS); 665 buffer[context.pos++] = (byte) (context.ibitWorkArea >> 8 & MASK_8BITS); 666 buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS); 667 } 668 } 669 } 670 } 671 672 // Two forms of EOF as far as base64 decoder is concerned: actual 673 // EOF (-1) and first time '=' character is encountered in stream. 674 // This approach makes the '=' padding characters completely optional. 675 if (context.eof && context.modulus != 0) { 676 final byte[] buffer = ensureBufferSize(decodeSize, context); 677 678 // We have some spare bits remaining 679 // Output all whole multiples of 8 bits and ignore the rest 680 switch (context.modulus) { 681 // case 0 : // impossible, as excluded above 682 case 1 : // 6 bits - either ignore entirely, or raise an exception 683 validateTrailingCharacter(); 684 break; 685 case 2 : // 12 bits = 8 + 4 686 validateCharacter(MASK_4BITS, context); 687 context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits 688 buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS); 689 break; 690 case 3 : // 18 bits = 8 + 8 + 2 691 validateCharacter(MASK_2BITS, context); 692 context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits 693 buffer[context.pos++] = (byte) (context.ibitWorkArea >> 8 & MASK_8BITS); 694 buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS); 695 break; 696 default: 697 throw new IllegalStateException("Impossible modulus " + context.modulus); 698 } 699 } 700 } 701 702 /** 703 * <p> 704 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with 705 * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, to flush last 706 * remaining bytes (if not multiple of 3). 707 * </p> 708 * <p><b>Note: no padding is added when encoding using the URL-safe alphabet.</b></p> 709 * <p> 710 * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. 711 * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 712 * </p> 713 * 714 * @param in 715 * byte[] array of binary data to base64 encode. 716 * @param inPos 717 * Position to start reading data from. 718 * @param inAvail 719 * Amount of bytes available from input for encoding. 720 * @param context 721 * the context to be used 722 */ 723 @Override 724 void encode(final byte[] in, int inPos, final int inAvail, final Context context) { 725 if (context.eof) { 726 return; 727 } 728 // inAvail < 0 is how we're informed of EOF in the underlying data we're 729 // encoding. 730 if (inAvail < 0) { 731 context.eof = true; 732 if (0 == context.modulus && lineLength == 0) { 733 return; // no leftovers to process and not using chunking 734 } 735 final byte[] buffer = ensureBufferSize(encodeSize, context); 736 final int savedPos = context.pos; 737 switch (context.modulus) { // 0-2 738 case 0 : // nothing to do here 739 break; 740 case 1 : // 8 bits = 6 + 2 741 // top 6 bits: 742 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 2 & MASK_6BITS]; 743 // remaining 2: 744 buffer[context.pos++] = encodeTable[context.ibitWorkArea << 4 & MASK_6BITS]; 745 // URL-SAFE skips the padding to further reduce size. 746 if (encodeTable == STANDARD_ENCODE_TABLE) { 747 buffer[context.pos++] = pad; 748 buffer[context.pos++] = pad; 749 } 750 break; 751 752 case 2 : // 16 bits = 6 + 6 + 4 753 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 10 & MASK_6BITS]; 754 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 4 & MASK_6BITS]; 755 buffer[context.pos++] = encodeTable[context.ibitWorkArea << 2 & MASK_6BITS]; 756 // URL-SAFE skips the padding to further reduce size. 757 if (encodeTable == STANDARD_ENCODE_TABLE) { 758 buffer[context.pos++] = pad; 759 } 760 break; 761 default: 762 throw new IllegalStateException("Impossible modulus " + context.modulus); 763 } 764 context.currentLinePos += context.pos - savedPos; // keep track of current line position 765 // if currentPos == 0 we are at the start of a line, so don't add CRLF 766 if (lineLength > 0 && context.currentLinePos > 0) { 767 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); 768 context.pos += lineSeparator.length; 769 } 770 } else { 771 for (int i = 0; i < inAvail; i++) { 772 final byte[] buffer = ensureBufferSize(encodeSize, context); 773 context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK; 774 int b = in[inPos++]; 775 if (b < 0) { 776 b += 256; 777 } 778 context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE 779 if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract 780 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 18 & MASK_6BITS]; 781 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 12 & MASK_6BITS]; 782 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 6 & MASK_6BITS]; 783 buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS]; 784 context.currentLinePos += BYTES_PER_ENCODED_BLOCK; 785 if (lineLength > 0 && lineLength <= context.currentLinePos) { 786 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); 787 context.pos += lineSeparator.length; 788 context.currentLinePos = 0; 789 } 790 } 791 } 792 } 793 } 794 795 /** 796 * Returns whether or not the {@code octet} is in the Base64 alphabet. 797 * 798 * @param octet 799 * The value to test 800 * @return {@code true} if the value is defined in the Base64 alphabet {@code false} otherwise. 801 */ 802 @Override 803 protected boolean isInAlphabet(final byte octet) { 804 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1; 805 } 806 807 /** 808 * Returns our current encode mode. True if we're URL-SAFE, false otherwise. 809 * 810 * @return true if we're in URL-SAFE mode, false otherwise. 811 * @since 1.4 812 */ 813 public boolean isUrlSafe() { 814 return this.encodeTable == URL_SAFE_ENCODE_TABLE; 815 } 816 817 /** 818 * Validates whether decoding the final trailing character is possible in the context 819 * of the set of possible base 64 values. 820 * <p> 821 * The character is valid if the lower bits within the provided mask are zero. This 822 * is used to test the final trailing base-64 digit is zero in the bits that will be discarded. 823 * </p> 824 * 825 * @param emptyBitsMask The mask of the lower bits that should be empty 826 * @param context the context to be used 827 * 828 * @throws IllegalArgumentException if the bits being checked contain any non-zero value 829 */ 830 private void validateCharacter(final int emptyBitsMask, final Context context) { 831 if (isStrictDecoding() && (context.ibitWorkArea & emptyBitsMask) != 0) { 832 throw new IllegalArgumentException( 833 "Strict decoding: Last encoded character (before the paddings if any) is a valid " + 834 "base 64 alphabet but not a possible encoding. " + 835 "Expected the discarded bits from the character to be zero."); 836 } 837 } 838 839 /** 840 * Validates whether decoding allows an entire final trailing character that cannot be 841 * used for a complete byte. 842 * 843 * @throws IllegalArgumentException if strict decoding is enabled 844 */ 845 private void validateTrailingCharacter() { 846 if (isStrictDecoding()) { 847 throw new IllegalArgumentException( 848 "Strict decoding: Last encoded character (before the paddings if any) is a valid " + 849 "base 64 alphabet but not a possible encoding. " + 850 "Decoding requires at least two trailing 6-bit characters to create bytes."); 851 } 852 } 853 854 }