001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.math.BigInteger; 021import java.util.Arrays; 022import java.util.Objects; 023 024import org.apache.commons.codec.CodecPolicy; 025 026/** 027 * Provides Base64 encoding and decoding as defined by <a href="https://www.ietf.org/rfc/rfc2045">RFC 2045 Multipurpose Internet Mail Extensions (MIME) Part 028 * One: Format of Internet Message Bodies</a> and portions of <a href="https://datatracker.ietf.org/doc/html/rfc4648">RFC 4648 The Base16, Base32, and Base64 029 * Data Encodings</a> 030 * 031 * <p> 032 * This class implements <a href="https://www.ietf.org/rfc/rfc2045#section-6.8">RFC 2045 6.8. Base64 Content-Transfer-Encoding</a>. 033 * </p> 034 * <p> 035 * The class can be parameterized in the following manner with its {@link Builder}: 036 * </p> 037 * <ul> 038 * <li>URL-safe mode: Default off.</li> 039 * <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 040 * <li>Line separator: Default is CRLF ({@code "\r\n"})</li> 041 * <li>Strict or lenient decoding policy; default is {@link CodecPolicy#LENIENT}.</li> 042 * <li>Custom decoding table.</li> 043 * <li>Custom encoding table.</li> 044 * <li>Padding; defaults is {@code '='}.</li> 045 * </ul> 046 * <p> 047 * The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes, see also 048 * {@code Builder#setDecodeTableFormat(DecodeTableFormat)}. 049 * </p> 050 * <p> 051 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode character encodings which are 052 * compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc). 053 * </p> 054 * <p> 055 * This class is thread-safe. 056 * </p> 057 * <p> 058 * To configure a new instance, use a {@link Builder}. For example: 059 * </p> 060 * 061 * <pre> 062 * Base64 base64 = Base64.builder() 063 * .setDecodingPolicy(CodecPolicy.LENIENT) // default is lenient, null resets to default 064 * .setEncodeTable(customEncodeTable) // default is built in, null resets to default 065 * .setLineLength(0) // default is none 066 * .setLineSeparator('\r', '\n') // default is CR LF, null resets to default 067 * .setPadding('=') // default is '=' 068 * .setUrlSafe(false) // default is false 069 * .get() 070 * </pre> 071 * 072 * @see Base64InputStream 073 * @see Base64OutputStream 074 * @see <a href="https://www.ietf.org/rfc/rfc2045">RFC 2045 Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</a> 075 * @see <a href="https://datatracker.ietf.org/doc/html/rfc4648">RFC 4648 The Base16, Base32, and Base64 Data Encodings</a> 076 * @since 1.0 077 */ 078public class Base64 extends BaseNCodec { 079 080 /** 081 * Builds {@link Base64} instances. 082 * 083 * <p> 084 * To configure a new instance, use a {@link Builder}. For example: 085 * </p> 086 * 087 * <pre> 088 * Base64 base64 = Base64.builder() 089 * .setCodecPolicy(CodecPolicy.LENIENT) // default is lenient, null resets to default 090 * .setEncodeTable(customEncodeTable) // default is built in, null resets to default 091 * .setLineLength(0) // default is none 092 * .setLineSeparator('\r', '\n') // default is CR LF, null resets to default 093 * .setPadding('=') // default is '=' 094 * .setUrlSafe(false) // default is false 095 * .get() 096 * </pre> 097 * 098 * @since 1.17.0 099 */ 100 public static class Builder extends AbstractBuilder<Base64, Builder> { 101 102 /** 103 * Constructs a new instance. 104 */ 105 public Builder() { 106 super(STANDARD_ENCODE_TABLE); 107 setDecodeTableRaw(DECODE_TABLE); 108 setEncodeTableRaw(STANDARD_ENCODE_TABLE); 109 setEncodedBlockSize(BYTES_PER_ENCODED_BLOCK); 110 setUnencodedBlockSize(BYTES_PER_UNENCODED_BLOCK); 111 } 112 113 @Override 114 public Base64 get() { 115 return new Base64(this); 116 } 117 118 /** 119 * Sets the format of the decoding table. This method allows to explicitly state whether a standard or URL-safe Base64 decoding is expected. This method 120 * does not modify behavior on encoding operations. For configuration of the encoding behavior, please use {@link #setUrlSafe(boolean)} method. 121 * <p> 122 * By default, the implementation uses the {@link DecodeTableFormat#MIXED} approach, allowing a seamless handling of both 123 * {@link DecodeTableFormat#URL_SAFE} and {@link DecodeTableFormat#STANDARD} base64. 124 * </p> 125 * 126 * @param format table format to be used on Base64 decoding. Use {@link DecodeTableFormat#MIXED} or null to reset to the default behavior. 127 * @return {@code this} instance. 128 * @since 1.21 129 */ 130 public Builder setDecodeTableFormat(final DecodeTableFormat format) { 131 if (format == null) { 132 return setDecodeTableRaw(DECODE_TABLE); 133 } 134 switch (format) { 135 case STANDARD: 136 return setDecodeTableRaw(STANDARD_DECODE_TABLE); 137 case URL_SAFE: 138 return setDecodeTableRaw(URL_SAFE_DECODE_TABLE); 139 case MIXED: 140 default: 141 return setDecodeTableRaw(DECODE_TABLE); 142 } 143 } 144 145 @Override 146 public Builder setEncodeTable(final byte... encodeTable) { 147 final boolean isStandardEncodeTable = Arrays.equals(encodeTable, STANDARD_ENCODE_TABLE); 148 final boolean isUrlSafe = Arrays.equals(encodeTable, URL_SAFE_ENCODE_TABLE); 149 setDecodeTableRaw(isStandardEncodeTable || isUrlSafe ? DECODE_TABLE : calculateDecodeTable(encodeTable)); 150 return super.setEncodeTable(encodeTable); 151 } 152 153 /** 154 * Sets the URL-safe encoding policy. 155 * <p> 156 * This method does not modify behavior on decoding operations. For configuration of the decoding behavior, please use 157 * {@code Builder.setDecodeTableFormat(DecodeTableFormat)} method. 158 * </p> 159 * 160 * @param urlSafe URL-safe encoding policy, null resets to the default. 161 * @return {@code this} instance. 162 */ 163 public Builder setUrlSafe(final boolean urlSafe) { 164 // Javadoc 8 can't find {@link #setDecodeTableFormat(DecodeTableFormat)} 165 return setEncodeTable(toUrlSafeEncodeTable(urlSafe)); 166 } 167 168 } 169 170 /** 171 * Defines the Base64 table format to be used on decoding. 172 * <p> 173 * By default, the method uses {@link DecodeTableFormat#MIXED} approach, allowing a seamless handling of both {@link DecodeTableFormat#URL_SAFE} and 174 * {@link DecodeTableFormat#STANDARD} base64 options. 175 * </p> 176 * 177 * @since 1.21 178 */ 179 public enum DecodeTableFormat { 180 181 /** 182 * Corresponds to the standard Base64 coding table, as specified in 183 * <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The Base64 Alphabet</a>. 184 */ 185 STANDARD, 186 187 /** 188 * Corresponds to the URL-safe Base64 coding table, as specified in 189 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#:~:text=Table%202%3A%20The%20%22URL%20and%20Filename%20safe%22%20Base%2064%20Alphabet">RFC 190 * 4648 Table 2: The "URL and Filename safe" Base 64 Alphabet</a>. 191 */ 192 URL_SAFE, 193 194 /** 195 * Represents a joint approach, allowing a seamless decoding of both character sets, corresponding to either 196 * <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The Base64 Alphabet</a> or 197 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#:~:text=Table%202%3A%20The%20%22URL%20and%20Filename%20safe%22%20Base%2064%20Alphabet">RFC 198 * 4648 Table 2: The "URL and Filename safe" Base 64 Alphabet</a>. This decoding table is used by default. 199 */ 200 MIXED 201 } 202 203 /** 204 * BASE64 characters are 6 bits in length. 205 * They are formed by taking a block of 3 octets to form a 24-bit string, 206 * which is converted into 4 BASE64 characters. 207 */ 208 private static final int BITS_PER_ENCODED_BYTE = 6; 209 private static final int BYTES_PER_UNENCODED_BLOCK = 3; 210 private static final int BYTES_PER_ENCODED_BLOCK = 4; 211 private static final int DECODING_TABLE_LENGTH = 256; 212 213 /** 214 * This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet" equivalents as specified in 215 * <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The Base64 Alphabet</a>. 216 * <p> 217 * Thanks to "commons" project in ws.apache.org for this code. https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 218 * </p> 219 */ 220 // @formatter:off 221 private static final byte[] STANDARD_ENCODE_TABLE = { 222 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 223 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 224 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 225 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 226 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' 227 }; 228 229 /** 230 * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and / changed to - and _ to make the encoded Base64 results more URL-SAFE. This table is 231 * only used when the Base64's mode is set to URL-SAFE. 232 */ 233 // @formatter:off 234 private static final byte[] URL_SAFE_ENCODE_TABLE = { 235 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 236 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 237 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 238 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 239 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' 240 }; 241 // @formatter:on 242 243 /** 244 * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified in 245 * <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The Base64 Alphabet</a>) into their 6-bit 246 * positive integer equivalents. Characters that are not in the Base64 or Base64 URL-safe alphabets but fall within the bounds of the array are translated 247 * to -1. 248 * <p> 249 * The characters '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both URL_SAFE and STANDARD base64. 250 * (The encoder, on the other hand, needs to know ahead of time what to emit). 251 * </p> 252 * <p> 253 * Thanks to "commons" project in ws.apache.org for this code. https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 254 * </p> 255 */ 256 private static final byte[] DECODE_TABLE = { 257 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 258 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 259 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 260 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, // 20-2f + - / 261 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 262 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O 263 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _ 264 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o 265 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z 266 }; 267 268 /** 269 * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified in 270 * <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The Base64 Alphabet</a>) into their 6-bit 271 * positive integer equivalents. Characters that are not in the Base64 alphabet but fall within the bounds of the array are translated to -1. This decoding 272 * table handles only the standard base64 characters, such as '+' and '/'. The "url-safe" characters such as '-' and '_' are not supported by the table. 273 */ 274 private static final byte[] STANDARD_DECODE_TABLE = { 275 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 276 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 277 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 278 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, // 20-2f + / 279 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 280 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O 281 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, // 50-5f P-Z 282 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o 283 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z 284 }; 285 286 /** 287 * This array is a lookup table that translates Unicode characters drawn from the "Base64 URL-safe Alphabet" (as specified in 288 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#:~:text=Table%202%3A%20The%20%22URL%20and%20Filename%20safe%22%20Base%2064%20Alphabet">RFC 4648 289 * Table 2: The "URL and Filename safe" Base 64 Alphabet</a>) into their 6-bit positive integer equivalents. Characters that are not in the Base64 URL-safe 290 * alphabet but fall within the bounds of the array are translated to -1. This decoding table handles only the URL-safe base64 characters, such as '-' and 291 * '_'. The standard characters such as '+' and '/' are not supported by the table. 292 */ 293 private static final byte[] URL_SAFE_DECODE_TABLE = { 294 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 295 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 296 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 297 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, // 20-2f - 298 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 299 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O 300 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _ 301 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o 302 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z 303 }; 304 305 /** 306 * Base64 uses 6-bit fields. 307 */ 308 309 /** Mask used to extract 6 bits, used when encoding */ 310 private static final int MASK_6_BITS = 0x3f; 311 312 // The static final fields above are used for the original static byte[] methods on Base64. 313 // The private member fields below are used with the new streaming approach, which requires 314 // some state be preserved between calls of encode() and decode(). 315 316 /** Mask used to extract 4 bits, used when decoding final trailing character. */ 317 private static final int MASK_4_BITS = 0xf; 318 319 /** Mask used to extract 2 bits, used when decoding final trailing character. */ 320 private static final int MASK_2_BITS = 0x3; 321 322 /** 323 * Creates a new Builder. 324 * 325 * <p> 326 * To configure a new instance, use a {@link Builder}. For example: 327 * </p> 328 * 329 * <pre> 330 * Base64 base64 = Base64.builder() 331 * .setDecodingPolicy(CodecPolicy.LENIENT) // default is lenient, null resets to default 332 * .setEncodeTable(customEncodeTable) // default is built in, null resets to default 333 * .setLineLength(0) // default is none 334 * .setLineSeparator('\r', '\n') // default is CR LF, null resets to default 335 * .setPadding('=') // default is '=' 336 * .setUrlSafe(false) // default is false 337 * .get() 338 * </pre> 339 * 340 * @return a new Builder. 341 * @since 1.17.0 342 */ 343 public static Builder builder() { 344 return new Builder(); 345 } 346 347 /** 348 * Calculates a decode table for a given encode table. 349 * 350 * @param encodeTable that is used to determine decode lookup table. 351 * @return A new decode table. 352 */ 353 private static byte[] calculateDecodeTable(final byte[] encodeTable) { 354 final byte[] decodeTable = new byte[DECODING_TABLE_LENGTH]; 355 Arrays.fill(decodeTable, (byte) -1); 356 for (int i = 0; i < encodeTable.length; i++) { 357 decodeTable[encodeTable[i]] = (byte) i; 358 } 359 return decodeTable; 360 } 361 362 /** 363 * Decodes Base64 data into octets. 364 * <p> 365 * This method seamlessly handles data encoded in URL-safe or normal mode. For enforcing verification against strict standard Base64 or Base64 URL-safe 366 * tables, please use {@link #decodeBase64Standard(byte[])} or {@link #decodeBase64UrlSafe(byte[])} methods respectively. This method skips unknown or 367 * unsupported bytes. 368 * </p> 369 * 370 * @param base64Data Byte array containing Base64 data. 371 * @return New array containing decoded data. 372 */ 373 public static byte[] decodeBase64(final byte[] base64Data) { 374 return new Base64().decode(base64Data); 375 } 376 377 /** 378 * Decodes a Base64 String into octets. 379 * <p> 380 * This method seamlessly handles data encoded in URL-safe or normal mode. For enforcing verification against strict standard Base64 or Base64 URL-safe 381 * tables, please use {@link #decodeBase64Standard(String)} or {@link #decodeBase64UrlSafe(String)} methods respectively. This method skips unknown or 382 * unsupported bytes. 383 * </p> 384 * 385 * @param base64String String containing Base64 data. 386 * @return New array containing decoded data. 387 * @since 1.4 388 */ 389 public static byte[] decodeBase64(final String base64String) { 390 return new Base64().decode(base64String); 391 } 392 393 /** 394 * Decodes standard Base64 data into octets. 395 * <p> 396 * This implementation is aligned with the <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The 397 * Base64 Alphabet</a>. This method skips unknown or unsupported bytes. 398 * </p> 399 * 400 * @param base64Data Byte array containing Base64 data. 401 * @return New array containing decoded data. 402 * @since 1.21 403 */ 404 public static byte[] decodeBase64Standard(final byte[] base64Data) { 405 return builder().setDecodeTableFormat(DecodeTableFormat.STANDARD).get().decode(base64Data); 406 } 407 408 /** 409 * Decodes a standard Base64 String into octets. 410 * <p> 411 * This implementation is aligned with the <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The 412 * Base64 Alphabet</a>. This method skips unknown or unsupported characters. 413 * </p> 414 * 415 * @param base64String String containing Base64 data. 416 * @return New array containing decoded data. 417 * @since 1.21 418 */ 419 public static byte[] decodeBase64Standard(final String base64String) { 420 return builder().setDecodeTableFormat(DecodeTableFormat.STANDARD).get().decode(base64String); 421 } 422 423 /** 424 * Decodes URL-safe Base64 data into octets. 425 * <p> 426 * This implementation is aligned with 427 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#:~:text=Table%202%3A%20The%20%22URL%20and%20Filename%20safe%22%20Base%2064%20Alphabet">RFC 4648 428 * Table 2: The "URL and Filename safe" Base 64 Alphabet</a>. This method skips unknown or unsupported characters. 429 * </p> 430 * 431 * @param base64Data Byte array containing Base64 data. 432 * @return New array containing decoded data. 433 * @since 1.21 434 */ 435 public static byte[] decodeBase64UrlSafe(final byte[] base64Data) { 436 return builder().setDecodeTableFormat(DecodeTableFormat.URL_SAFE).get().decode(base64Data); 437 } 438 439 /** 440 * Decodes a URL-safe Base64 String into octets. 441 * <p> 442 * This implementation is aligned with 443 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#:~:text=Table%202%3A%20The%20%22URL%20and%20Filename%20safe%22%20Base%2064%20Alphabet">RFC 4648 444 * Table 2: The "URL and Filename safe" Base 64 Alphabet</a>. This method skips unknown or unsupported characters. 445 * </p> 446 * 447 * @param base64String String containing Base64 data. 448 * @return New array containing decoded data. 449 * @since 1.21 450 */ 451 public static byte[] decodeBase64UrlSafe(final String base64String) { 452 return builder().setDecodeTableFormat(DecodeTableFormat.URL_SAFE).get().decode(base64String); 453 } 454 455 /** 456 * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature. 457 * 458 * @param array a byte array containing base64 character data. 459 * @return A BigInteger. 460 * @since 1.4 461 */ 462 public static BigInteger decodeInteger(final byte[] array) { 463 return new BigInteger(1, decodeBase64(array)); 464 } 465 466 /** 467 * Encodes binary data using the base64 algorithm but does not chunk the output. 468 * 469 * @param binaryData binary data to encode. 470 * @return byte[] containing Base64 characters in their UTF-8 representation. 471 */ 472 public static byte[] encodeBase64(final byte[] binaryData) { 473 return encodeBase64(binaryData, false); 474 } 475 476 /** 477 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. 478 * 479 * @param binaryData Array containing binary data to encode. 480 * @param isChunked if {@code true} this encoder will chunk the base64 output into 76 character blocks. 481 * @return Base64-encoded data. 482 * @throws IllegalArgumentException Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}. 483 */ 484 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked) { 485 return encodeBase64(binaryData, isChunked, false); 486 } 487 488 /** 489 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. 490 * 491 * @param binaryData Array containing binary data to encode. 492 * @param isChunked if {@code true} this encoder will chunk the base64 output into 76 character blocks. 493 * @param urlSafe if {@code true} this encoder will emit - and _ instead of the usual + and / characters. <strong>No padding is added when encoding using 494 * the URL-safe alphabet.</strong> 495 * @return Base64-encoded data. 496 * @throws IllegalArgumentException Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}. 497 * @since 1.4 498 */ 499 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe) { 500 return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE); 501 } 502 503 /** 504 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. 505 * 506 * @param binaryData Array containing binary data to encode. 507 * @param isChunked if {@code true} this encoder will chunk the base64 output into 76 character blocks. 508 * @param urlSafe if {@code true} this encoder will emit - and _ instead of the usual + and / characters. <strong>No padding is added when encoding 509 * using the URL-safe alphabet.</strong> 510 * @param maxResultSize The maximum result size to accept. 511 * @return Base64-encoded data. 512 * @throws IllegalArgumentException Thrown when the input array needs an output array bigger than maxResultSize. 513 * @since 1.4 514 */ 515 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe, final int maxResultSize) { 516 if (BinaryCodec.isEmpty(binaryData)) { 517 return binaryData; 518 } 519 // Create this so can use the super-class method 520 // Also ensures that the same roundings are performed by the ctor and the code 521 final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe); 522 final long len = b64.getEncodedLength(binaryData); 523 if (len > maxResultSize) { 524 throw new IllegalArgumentException( 525 "Input array too big, the output array would be bigger (" + len + ") than the specified maximum size of " + maxResultSize); 526 } 527 return b64.encode(binaryData); 528 } 529 530 /** 531 * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks 532 * 533 * @param binaryData binary data to encode. 534 * @return Base64 characters chunked in 76 character blocks. 535 */ 536 public static byte[] encodeBase64Chunked(final byte[] binaryData) { 537 return encodeBase64(binaryData, true); 538 } 539 540 /** 541 * Encodes binary data using the base64 algorithm but does not chunk the output. 542 * <p> 543 * <strong> We changed the behavior of this method from multi-line chunking (1.4) to single-line non-chunking (1.5).</strong> 544 * </p> 545 * 546 * @param binaryData binary data to encode. 547 * @return String containing Base64 characters. 548 * @since 1.4 (NOTE: 1.4 chunked the output, whereas 1.5 does not). 549 */ 550 public static String encodeBase64String(final byte[] binaryData) { 551 return StringUtils.newStringUsAscii(encodeBase64(binaryData, false)); 552 } 553 554 /** 555 * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The url-safe variation emits - and _ instead of + 556 * and / characters. <strong>No padding is added.</strong> 557 * 558 * @param binaryData binary data to encode. 559 * @return byte[] containing Base64 characters in their UTF-8 representation. 560 * @since 1.4 561 */ 562 public static byte[] encodeBase64URLSafe(final byte[] binaryData) { 563 return encodeBase64(binaryData, false, true); 564 } 565 566 /** 567 * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The url-safe variation emits - and _ instead of + 568 * and / characters. <strong>No padding is added.</strong> 569 * 570 * @param binaryData binary data to encode. 571 * @return String containing Base64 characters. 572 * @since 1.4 573 */ 574 public static String encodeBase64URLSafeString(final byte[] binaryData) { 575 return StringUtils.newStringUsAscii(encodeBase64(binaryData, false, true)); 576 } 577 578 /** 579 * Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature. 580 * 581 * @param bigInteger a BigInteger. 582 * @return A byte array containing base64 character data. 583 * @throws NullPointerException if null is passed in. 584 * @since 1.4 585 */ 586 public static byte[] encodeInteger(final BigInteger bigInteger) { 587 Objects.requireNonNull(bigInteger, "bigInteger"); 588 return encodeBase64(toIntegerBytes(bigInteger), false); 589 } 590 591 /** 592 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the method treats whitespace as valid. 593 * 594 * @param arrayOctet byte array to test. 595 * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty; {@code false}, otherwise. 596 * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0. 597 */ 598 @Deprecated 599 public static boolean isArrayByteBase64(final byte[] arrayOctet) { 600 return isBase64(arrayOctet); 601 } 602 603 /** 604 * Tests whether or not the {@code octet} is in the Base64 alphabet. 605 * <p> 606 * This method threats all characters included within standard base64 and base64url encodings as valid base64 characters. This includes the '+' and '/' 607 * (standard base64), as well as '-' and '_' (URL-safe base64) characters. For enforcing verification against strict standard Base64 or Base64 URL-safe 608 * tables, please use {@link #isBase64Standard(byte)} or {@link #isBase64Url(byte)} methods respectively. 609 * </p> 610 * 611 * @param octet The value to test. 612 * @return {@code true} if the value is defined in the Base64 alphabet, {@code false} otherwise. 613 * @since 1.4 614 */ 615 public static boolean isBase64(final byte octet) { 616 return octet == PAD_DEFAULT || octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1; 617 } 618 619 /** 620 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the method treats whitespace as valid. 621 * <p> 622 * This method treats all characters included within standard base64 and base64url encodings as valid base64 characters. This includes the '+' and '/' 623 * (standard base64), as well as '-' and '_' (URL-safe base64) characters. For enforcing verification against strict standard Base64 or Base64 URL-safe 624 * tables, please use {@link #isBase64Standard(byte[])} or {@link #isBase64Url(byte[])} methods respectively. 625 * </p> 626 * 627 * @param arrayOctet byte array to test. 628 * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty; {@code false}, otherwise. 629 * @since 1.5 630 */ 631 public static boolean isBase64(final byte[] arrayOctet) { 632 for (final byte element : arrayOctet) { 633 if (!isBase64(element) && !Character.isWhitespace(element)) { 634 return false; 635 } 636 } 637 return true; 638 } 639 640 /** 641 * Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the method treats whitespace as valid. 642 * <p> 643 * This method threats all characters included within standard base64 and base64url encodings as valid base64 characters. This includes the '+' and '/' 644 * (standard base64), as well as '-' and '_' (URL-safe base64) characters. For enforcing verification against strict standard Base64 or Base64 URL-safe 645 * tables, please use {@link #isBase64Standard(String)} or {@link #isBase64Url(String)} methods respectively. 646 * </p> 647 * 648 * @param base64 String to test. 649 * @return {@code true} if all characters in the String are valid characters in the Base64 alphabet or if the String is empty; {@code false}, otherwise. 650 * @since 1.5 651 */ 652 public static boolean isBase64(final String base64) { 653 return isBase64(StringUtils.getBytesUtf8(base64)); 654 } 655 656 /** 657 * Tests whether or not the {@code octet} is in the standard Base64 alphabet. 658 * <p> 659 * This implementation is aligned with <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The 660 * Base64 Alphabet</a>. 661 * </p> 662 * 663 * @param octet The value to test. 664 * @return {@code true} if the value is defined in the standard Base64 alphabet, {@code false} otherwise. 665 * @since 1.21 666 */ 667 public static boolean isBase64Standard(final byte octet) { 668 return octet == PAD_DEFAULT || octet >= 0 && octet < STANDARD_DECODE_TABLE.length && STANDARD_DECODE_TABLE[octet] != -1; 669 } 670 671 /** 672 * Tests a given byte array to see if it contains only valid characters within the standard Base64 alphabet. The method treats whitespace as valid. 673 * <p> 674 * This implementation is aligned with <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The 675 * Base64 Alphabet</a>. 676 * </p> 677 * 678 * @param arrayOctet byte array to test. 679 * @return {@code true} if all bytes are valid characters in the standard Base64 alphabet. {@code false}, otherwise. 680 * @since 1.21 681 */ 682 public static boolean isBase64Standard(final byte[] arrayOctet) { 683 for (final byte element : arrayOctet) { 684 if (!isBase64Standard(element) && !Character.isWhitespace(element)) { 685 return false; 686 } 687 } 688 return true; 689 } 690 691 /** 692 * Tests a given String to see if it contains only valid characters within the standard Base64 alphabet. The method treats whitespace as valid. 693 * <p> 694 * This implementation is aligned with <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The 695 * Base64 Alphabet</a>. 696 * </p> 697 * 698 * @param base64 String to test. 699 * @return {@code true} if all characters in the String are valid characters in the standard Base64 alphabet or if the String is empty; {@code false}, 700 * otherwise. 701 * @since 1.21 702 */ 703 public static boolean isBase64Standard(final String base64) { 704 return isBase64Standard(StringUtils.getBytesUtf8(base64)); 705 } 706 707 /** 708 * Tests whether or not the {@code octet} is in the URL-safe Base64 alphabet. 709 * <p> 710 * This implementation is aligned with 711 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#:~:text=Table%202%3A%20The%20%22URL%20and%20Filename%20safe%22%20Base%2064%20Alphabet">RFC 4648 712 * Table 2: The "URL and Filename safe" Base 64 Alphabet</a>. 713 * </p> 714 * 715 * @param octet The value to test. 716 * @return {@code true} if the value is defined in the URL-safe Base64 alphabet, {@code false} otherwise. 717 * @since 1.21 718 */ 719 public static boolean isBase64Url(final byte octet) { 720 return octet == PAD_DEFAULT || octet >= 0 && octet < URL_SAFE_DECODE_TABLE.length && URL_SAFE_DECODE_TABLE[octet] != -1; 721 } 722 723 /** 724 * Tests a given byte array to see if it contains only valid characters within the URL-safe Base64 alphabet. The method treats whitespace as valid. 725 * <p> 726 * This implementation is aligned with 727 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#:~:text=Table%202%3A%20The%20%22URL%20and%20Filename%20safe%22%20Base%2064%20Alphabet">RFC 4648 728 * Table 2: The "URL and Filename safe" Base 64 Alphabet</a>. 729 * </p> 730 * 731 * @param arrayOctet byte array to test. 732 * @return {@code true} if all bytes are valid characters in the URL-safe Base64 alphabet, {@code false}, otherwise. 733 * @since 1.21 734 */ 735 public static boolean isBase64Url(final byte[] arrayOctet) { 736 for (final byte element : arrayOctet) { 737 if (!isBase64Url(element) && !Character.isWhitespace(element)) { 738 return false; 739 } 740 } 741 return true; 742 } 743 744 /** 745 * Tests a given String to see if it contains only valid characters within the URL-safe Base64 alphabet. The method treats whitespace as valid. 746 * <p> 747 * This implementation is aligned with 748 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#:~:text=Table%202%3A%20The%20%22URL%20and%20Filename%20safe%22%20Base%2064%20Alphabet">RFC 4648 749 * Table 2: The "URL and Filename safe" Base 64 Alphabet</a>. 750 * </p> 751 * 752 * @param base64 String to test. 753 * @return {@code true} if all characters in the String are valid characters in the URL-safe Base64 alphabet or if the String is empty; {@code false}, 754 * otherwise. 755 * @since 1.21 756 */ 757 public static boolean isBase64Url(final String base64) { 758 return isBase64Url(StringUtils.getBytesUtf8(base64)); 759 } 760 761 /** 762 * Returns a byte-array representation of a {@code BigInteger} without sign bit. 763 * 764 * @param bigInt {@code BigInteger} to be converted. 765 * @return a byte array representation of the BigInteger parameter. 766 */ 767 static byte[] toIntegerBytes(final BigInteger bigInt) { 768 int bitlen = bigInt.bitLength(); 769 // round bitlen 770 bitlen = bitlen + 7 >> 3 << 3; 771 final byte[] bigBytes = bigInt.toByteArray(); 772 if (bigInt.bitLength() % 8 != 0 && bigInt.bitLength() / 8 + 1 == bitlen / 8) { 773 return bigBytes; 774 } 775 // set up params for copying everything but sign bit 776 int startSrc = 0; 777 int len = bigBytes.length; 778 // if bigInt is exactly byte-aligned, just skip signbit in copy 779 if (bigInt.bitLength() % 8 == 0) { 780 startSrc = 1; 781 len--; 782 } 783 final int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec 784 final byte[] resizedBytes = new byte[bitlen / 8]; 785 System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len); 786 return resizedBytes; 787 } 788 789 static byte[] toUrlSafeEncodeTable(final boolean urlSafe) { 790 return urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE; 791 } 792 793 /** 794 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0. 795 */ 796 private final byte[] lineSeparator; 797 798 /** 799 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. {@code encodeSize = 4 + lineSeparator.length;} 800 */ 801 private final int encodeSize; 802 private final boolean isUrlSafe; 803 private final boolean isStandardEncodeTable; 804 805 /** 806 * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 807 * <p> 808 * When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE. 809 * </p> 810 * <p> 811 * When decoding all variants are supported. 812 * </p> 813 */ 814 public Base64() { 815 this(0); 816 } 817 818 /** 819 * Constructs a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode. 820 * <p> 821 * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE. 822 * </p> 823 * <p> 824 * When decoding all variants are supported. 825 * </p> 826 * 827 * @param urlSafe if {@code true}, URL-safe encoding is used. In most cases this should be set to {@code false}. 828 * @since 1.4 829 * @deprecated Use {@link #builder()} and {@link Builder}. 830 */ 831 @Deprecated 832 public Base64(final boolean urlSafe) { 833 this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe); 834 } 835 836 private Base64(final Builder builder) { 837 super(builder); 838 final byte[] encTable = builder.getEncodeTable(); 839 if (encTable.length != STANDARD_ENCODE_TABLE.length) { 840 throw new IllegalArgumentException("encodeTable must have exactly 64 entries."); 841 } 842 this.isStandardEncodeTable = Arrays.equals(encTable, STANDARD_ENCODE_TABLE); 843 this.isUrlSafe = Arrays.equals(encTable, URL_SAFE_ENCODE_TABLE); 844 // TODO could be simplified if there is no requirement to reject invalid line sep when length <=0 845 // @see test case Base64Test.testConstructors() 846 if (builder.getLineSeparator().length > 0) { 847 final byte[] lineSeparatorB = builder.getLineSeparator(); 848 if (containsAlphabetOrPad(lineSeparatorB)) { 849 final String sep = StringUtils.newStringUtf8(lineSeparatorB); 850 throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]"); 851 } 852 if (builder.getLineLength() > 0) { // null line-sep forces no chunking rather than throwing IAE 853 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparatorB.length; 854 this.lineSeparator = lineSeparatorB; 855 } else { 856 this.encodeSize = BYTES_PER_ENCODED_BLOCK; 857 this.lineSeparator = null; 858 } 859 } else { 860 this.encodeSize = BYTES_PER_ENCODED_BLOCK; 861 this.lineSeparator = null; 862 } 863 } 864 865 /** 866 * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 867 * <p> 868 * When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE. 869 * </p> 870 * <p> 871 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 872 * </p> 873 * <p> 874 * When decoding all variants are supported. 875 * </p> 876 * 877 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 4). If lineLength <= 0, then 878 * the output will not be divided into lines (chunks). Ignored when decoding. 879 * @since 1.4 880 * @deprecated Use {@link #builder()} and {@link Builder}. 881 */ 882 @Deprecated 883 public Base64(final int lineLength) { 884 this(lineLength, CHUNK_SEPARATOR); 885 } 886 887 /** 888 * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 889 * <p> 890 * When encoding the line length and line separator are given in the constructor, and the encoding table is STANDARD_ENCODE_TABLE. 891 * </p> 892 * <p> 893 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 894 * </p> 895 * <p> 896 * When decoding all variants are supported. 897 * </p> 898 * 899 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 4). If lineLength <= 0, 900 * then the output will not be divided into lines (chunks). Ignored when decoding. 901 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 902 * @throws IllegalArgumentException Thrown when the provided lineSeparator included some base64 characters. 903 * @since 1.4 904 * @deprecated Use {@link #builder()} and {@link Builder}. 905 */ 906 @Deprecated 907 public Base64(final int lineLength, final byte[] lineSeparator) { 908 this(lineLength, lineSeparator, false); 909 } 910 911 /** 912 * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 913 * <p> 914 * When encoding the line length and line separator are given in the constructor, and the encoding table is STANDARD_ENCODE_TABLE. 915 * </p> 916 * <p> 917 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 918 * </p> 919 * <p> 920 * When decoding all variants are supported. 921 * </p> 922 * 923 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 4). If lineLength <= 0, 924 * then the output will not be divided into lines (chunks). Ignored when decoding. 925 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 926 * @param urlSafe Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode operations. Decoding seamlessly 927 * handles both modes. <strong>No padding is added when using the URL-safe alphabet.</strong> 928 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base64 characters. 929 * @since 1.4 930 * @deprecated Use {@link #builder()} and {@link Builder}. 931 */ 932 @Deprecated 933 public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe) { 934 this(builder().setLineLength(lineLength).setLineSeparator(lineSeparator != null ? lineSeparator : EMPTY_BYTE_ARRAY).setPadding(PAD_DEFAULT) 935 .setEncodeTableRaw(toUrlSafeEncodeTable(urlSafe)).setDecodingPolicy(DECODING_POLICY_DEFAULT)); 936 } 937 938 /** 939 * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 940 * <p> 941 * When encoding the line length and line separator are given in the constructor, and the encoding table is STANDARD_ENCODE_TABLE. 942 * </p> 943 * <p> 944 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 945 * </p> 946 * <p> 947 * When decoding all variants are supported. 948 * </p> 949 * 950 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 4). If lineLength <= 0, 951 * then the output will not be divided into lines (chunks). Ignored when decoding. 952 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 953 * @param urlSafe Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode operations. Decoding seamlessly 954 * handles both modes. <strong>No padding is added when using the URL-safe alphabet.</strong> 955 * @param decodingPolicy The decoding policy. 956 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base64 characters. 957 * @since 1.15 958 * @deprecated Use {@link #builder()} and {@link Builder}. 959 */ 960 @Deprecated 961 public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe, final CodecPolicy decodingPolicy) { 962 this(builder().setLineLength(lineLength).setLineSeparator(lineSeparator).setPadding(PAD_DEFAULT).setEncodeTableRaw(toUrlSafeEncodeTable(urlSafe)) 963 .setDecodingPolicy(decodingPolicy)); 964 } 965 966 /** 967 * <p> 968 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once with the data to decode, and once with 969 * inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" call is not necessary when decoding, but it doesn't hurt, either. 970 * </p> 971 * <p> 972 * Ignores all non-base64 characters. This is how chunked (for example 76 character) data is handled, since CR and LF are silently ignored, but has 973 * implications for other bytes, too. This method subscribes to the garbage-in, garbage-out philosophy: it will not check the provided data for validity. 974 * </p> 975 * <p> 976 * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. 977 * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 978 * </p> 979 * 980 * @param input byte[] array of ASCII data to base64 decode. 981 * @param inPos Position to start reading data from. 982 * @param inAvail Amount of bytes available from input for decoding. 983 * @param context the context to be used. 984 */ 985 @Override 986 void decode(final byte[] input, int inPos, final int inAvail, final Context context) { 987 if (context.eof) { 988 return; 989 } 990 if (inAvail < 0) { 991 context.eof = true; 992 } 993 final int decodeSize = this.encodeSize - 1; 994 for (int i = 0; i < inAvail; i++) { 995 final byte[] buffer = ensureBufferSize(decodeSize, context); 996 final byte b = input[inPos++]; 997 if (b == pad) { 998 // We're done. 999 context.eof = true; 1000 break; 1001 } 1002 if (b >= 0 && b < decodeTable.length) { 1003 final int result = decodeTable[b]; 1004 if (result >= 0) { 1005 context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK; 1006 context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result; 1007 if (context.modulus == 0) { 1008 buffer[context.pos++] = (byte) (context.ibitWorkArea >> 16 & MASK_8BITS); 1009 buffer[context.pos++] = (byte) (context.ibitWorkArea >> 8 & MASK_8BITS); 1010 buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS); 1011 } 1012 } 1013 } 1014 } 1015 1016 // Two forms of EOF as far as base64 decoder is concerned: actual 1017 // EOF (-1) and first time '=' character is encountered in stream. 1018 // This approach makes the '=' padding characters completely optional. 1019 if (context.eof && context.modulus != 0) { 1020 final byte[] buffer = ensureBufferSize(decodeSize, context); 1021 1022 // We have some spare bits remaining 1023 // Output all whole multiples of 8 bits and ignore the rest 1024 switch (context.modulus) { 1025// case 0 : // impossible, as excluded above 1026 case 1 : // 6 bits - either ignore entirely, or raise an exception 1027 validateTrailingCharacter(); 1028 break; 1029 case 2 : // 12 bits = 8 + 4 1030 validateCharacter(MASK_4_BITS, context); 1031 context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits 1032 buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS); 1033 break; 1034 case 3 : // 18 bits = 8 + 8 + 2 1035 validateCharacter(MASK_2_BITS, context); 1036 context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits 1037 buffer[context.pos++] = (byte) (context.ibitWorkArea >> 8 & MASK_8BITS); 1038 buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS); 1039 break; 1040 default: 1041 throw new IllegalStateException("Impossible modulus " + context.modulus); 1042 } 1043 } 1044 } 1045 1046 /** 1047 * <p> 1048 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with the data to encode, and once with 1049 * inAvail set to "-1" to alert encoder that EOF has been reached, to flush last remaining bytes (if not multiple of 3). 1050 * </p> 1051 * <p> 1052 * <strong>No padding is added when encoding using the URL-safe alphabet.</strong> 1053 * </p> 1054 * <p> 1055 * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. 1056 * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 1057 * </p> 1058 * 1059 * @param in byte[] array of binary data to base64 encode. 1060 * @param inPos Position to start reading data from. 1061 * @param inAvail Amount of bytes available from input for encoding. 1062 * @param context the context to be used. 1063 */ 1064 @Override 1065 void encode(final byte[] in, int inPos, final int inAvail, final Context context) { 1066 if (context.eof) { 1067 return; 1068 } 1069 // inAvail < 0 is how we're informed of EOF in the underlying data we're 1070 // encoding. 1071 if (inAvail < 0) { 1072 context.eof = true; 1073 if (0 == context.modulus && lineLength == 0) { 1074 return; // no leftovers to process and not using chunking 1075 } 1076 final byte[] buffer = ensureBufferSize(encodeSize, context); 1077 final int savedPos = context.pos; 1078 switch (context.modulus) { // 0-2 1079 case 0 : // nothing to do here 1080 break; 1081 case 1 : // 8 bits = 6 + 2 1082 // top 6 bits: 1083 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 2 & MASK_6_BITS]; 1084 // remaining 2: 1085 buffer[context.pos++] = encodeTable[context.ibitWorkArea << 4 & MASK_6_BITS]; 1086 // URL-SAFE skips the padding to further reduce size. 1087 if (isStandardEncodeTable) { 1088 buffer[context.pos++] = pad; 1089 buffer[context.pos++] = pad; 1090 } 1091 break; 1092 1093 case 2 : // 16 bits = 6 + 6 + 4 1094 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 10 & MASK_6_BITS]; 1095 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 4 & MASK_6_BITS]; 1096 buffer[context.pos++] = encodeTable[context.ibitWorkArea << 2 & MASK_6_BITS]; 1097 // URL-SAFE skips the padding to further reduce size. 1098 if (isStandardEncodeTable) { 1099 buffer[context.pos++] = pad; 1100 } 1101 break; 1102 default: 1103 throw new IllegalStateException("Impossible modulus " + context.modulus); 1104 } 1105 context.currentLinePos += context.pos - savedPos; // keep track of current line position 1106 // if currentPos == 0 we are at the start of a line, so don't add CRLF 1107 if (lineLength > 0 && context.currentLinePos > 0) { 1108 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); 1109 context.pos += lineSeparator.length; 1110 } 1111 } else { 1112 for (int i = 0; i < inAvail; i++) { 1113 final byte[] buffer = ensureBufferSize(encodeSize, context); 1114 context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK; 1115 int b = in[inPos++]; 1116 if (b < 0) { 1117 b += 256; 1118 } 1119 context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE 1120 if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract 1121 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 18 & MASK_6_BITS]; 1122 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 12 & MASK_6_BITS]; 1123 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 6 & MASK_6_BITS]; 1124 buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6_BITS]; 1125 context.currentLinePos += BYTES_PER_ENCODED_BLOCK; 1126 if (lineLength > 0 && lineLength <= context.currentLinePos) { 1127 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); 1128 context.pos += lineSeparator.length; 1129 context.currentLinePos = 0; 1130 } 1131 } 1132 } 1133 } 1134 } 1135 1136 /** 1137 * Gets the line separator (for testing only). 1138 * 1139 * @return the line separator. 1140 */ 1141 byte[] getLineSeparator() { 1142 return lineSeparator; 1143 } 1144 1145 /** 1146 * Returns whether or not the {@code octet} is in the Base64 alphabet. 1147 * 1148 * @param octet The value to test. 1149 * @return {@code true} if the value is defined in the Base64 alphabet {@code false} otherwise. 1150 */ 1151 @Override 1152 protected boolean isInAlphabet(final byte octet) { 1153 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1; 1154 } 1155 1156 /** 1157 * Returns our current encode mode. True if we're URL-safe, false otherwise. 1158 * 1159 * @return true if we're in URL-safe mode, false otherwise. 1160 * @since 1.4 1161 */ 1162 public boolean isUrlSafe() { 1163 return isUrlSafe; 1164 } 1165 1166 /** 1167 * Validates whether decoding the final trailing character is possible in the context of the set of possible Base64 values. 1168 * <p> 1169 * The character is valid if the lower bits within the provided mask are zero. This is used to test the final trailing base-64 digit is zero in the bits 1170 * that will be discarded. 1171 * </p> 1172 * 1173 * @param emptyBitsMask The mask of the lower bits that should be empty. 1174 * @param context the context to be used. 1175 * @throws IllegalArgumentException if the bits being checked contain any non-zero value. 1176 */ 1177 private void validateCharacter(final int emptyBitsMask, final Context context) { 1178 if (isStrictDecoding() && (context.ibitWorkArea & emptyBitsMask) != 0) { 1179 throw new IllegalArgumentException("Strict decoding: Last encoded character (before the paddings if any) is a valid " + 1180 "Base64 alphabet but not a possible encoding. Expected the discarded bits from the character to be zero."); 1181 } 1182 } 1183 1184 /** 1185 * Validates whether decoding allows an entire final trailing character that cannot be used for a complete byte. 1186 * 1187 * @throws IllegalArgumentException if strict decoding is enabled. 1188 */ 1189 private void validateTrailingCharacter() { 1190 if (isStrictDecoding()) { 1191 throw new IllegalArgumentException("Strict decoding: Last encoded character (before the paddings if any) is a valid " + 1192 "Base64 alphabet but not a possible encoding. Decoding requires at least two trailing 6-bit characters to create bytes."); 1193 } 1194 } 1195 1196}