001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.codec.binary; 019 020 /** 021 * Provides Base32 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>. 022 * 023 * <p> 024 * The class can be parameterized in the following manner with various constructors: 025 * <ul> 026 * <li>Whether to use the "base32hex" variant instead of the default "base32"</li> 027 * <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of 028 * 8 in the encoded data. 029 * <li>Line separator: Default is CRLF ("\r\n")</li> 030 * </ul> 031 * </p> 032 * <p> 033 * This class operates directly on byte streams, and not character streams. 034 * </p> 035 * <p> 036 * This class is thread-safe. 037 * </p> 038 * 039 * @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a> 040 * 041 * @since 1.5 042 * @version $Id: Base32.html 889935 2013-12-11 05:05:13Z ggregory $ 043 */ 044 public class Base32 extends BaseNCodec { 045 046 /** 047 * BASE32 characters are 5 bits in length. 048 * They are formed by taking a block of five octets to form a 40-bit string, 049 * which is converted into eight BASE32 characters. 050 */ 051 private static final int BITS_PER_ENCODED_BYTE = 5; 052 private static final int BYTES_PER_ENCODED_BLOCK = 8; 053 private static final int BYTES_PER_UNENCODED_BLOCK = 5; 054 055 /** 056 * Chunk separator per RFC 2045 section 2.1. 057 * 058 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> 059 */ 060 private static final byte[] CHUNK_SEPARATOR = {'\r', '\n'}; 061 062 /** 063 * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified 064 * in Table 3 of RFC 2045) into their 5-bit positive integer equivalents. Characters that are not in the Base32 065 * alphabet but fall within the bounds of the array are translated to -1. 066 */ 067 private static final byte[] DECODE_TABLE = { 068 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 069 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 070 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 071 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 63, // 20-2f 072 -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7 073 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-N 074 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 50-5a O-Z 075 }; 076 077 /** 078 * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet" 079 * equivalents as specified in Table 3 of RFC 2045. 080 */ 081 private static final byte[] ENCODE_TABLE = { 082 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 083 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 084 '2', '3', '4', '5', '6', '7', 085 }; 086 087 /** 088 * This array is a lookup table that translates Unicode characters drawn from the "Base32 |Hex Alphabet" (as 089 * specified in Table 3 of RFC 2045) into their 5-bit positive integer equivalents. Characters that are not in the 090 * Base32 Hex alphabet but fall within the bounds of the array are translated to -1. 091 */ 092 private static final byte[] HEX_DECODE_TABLE = { 093 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 094 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 095 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 096 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 63, // 20-2f 097 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 2-7 098 -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-N 099 25, 26, 27, 28, 29, 30, 31, 32, // 50-57 O-V 100 }; 101 102 /** 103 * This array is a lookup table that translates 5-bit positive integer index values into their 104 * "Base32 Hex Alphabet" equivalents as specified in Table 3 of RFC 2045. 105 */ 106 private static final byte[] HEX_ENCODE_TABLE = { 107 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 108 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 109 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 110 }; 111 112 /** Mask used to extract 5 bits, used when encoding Base32 bytes */ 113 private static final int MASK_5BITS = 0x1f; 114 115 // The static final fields above are used for the original static byte[] methods on Base32. 116 // The private member fields below are used with the new streaming approach, which requires 117 // some state be preserved between calls of encode() and decode(). 118 119 /** 120 * Place holder for the bytes we're dealing with for our based logic. 121 * Bitwise operations store and extract the encoding or decoding from this variable. 122 */ 123 124 /** 125 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. 126 * <code>decodeSize = {@link #BYTES_PER_ENCODED_BLOCK} - 1 + lineSeparator.length;</code> 127 */ 128 private final int decodeSize; 129 130 /** 131 * Decode table to use. 132 */ 133 private final byte[] decodeTable; 134 135 /** 136 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. 137 * <code>encodeSize = {@link #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;</code> 138 */ 139 private final int encodeSize; 140 141 /** 142 * Encode table to use. 143 */ 144 private final byte[] encodeTable; 145 146 /** 147 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0. 148 */ 149 private final byte[] lineSeparator; 150 151 /** 152 * Creates a Base32 codec used for decoding and encoding. 153 * <p> 154 * When encoding the line length is 0 (no chunking). 155 * </p> 156 * 157 */ 158 public Base32() { 159 this(false); 160 } 161 162 /** 163 * Creates a Base32 codec used for decoding and encoding. 164 * <p> 165 * When encoding the line length is 0 (no chunking). 166 * </p> 167 * @param useHex if {@code true} then use Base32 Hex alphabet 168 */ 169 public Base32(boolean useHex) { 170 this(0, null, useHex); 171 } 172 173 /** 174 * Creates a Base32 codec used for decoding and encoding. 175 * <p> 176 * When encoding the line length is given in the constructor, the line separator is CRLF. 177 * </p> 178 * 179 * @param lineLength 180 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 181 * 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when 182 * decoding. 183 */ 184 public Base32(int lineLength) { 185 this(lineLength, CHUNK_SEPARATOR); 186 } 187 188 /** 189 * Creates a Base32 codec used for decoding and encoding. 190 * <p> 191 * When encoding the line length and line separator are given in the constructor. 192 * </p> 193 * <p> 194 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 195 * </p> 196 * 197 * @param lineLength 198 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 199 * 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when 200 * decoding. 201 * @param lineSeparator 202 * Each line of encoded data will end with this sequence of bytes. 203 * @throws IllegalArgumentException 204 * The provided lineSeparator included some Base32 characters. That's not going to work! 205 */ 206 public Base32(int lineLength, byte[] lineSeparator) { 207 this(lineLength, lineSeparator, false); 208 } 209 210 /** 211 * Creates a Base32 / Base32 Hex codec used for decoding and encoding. 212 * <p> 213 * When encoding the line length and line separator are given in the constructor. 214 * </p> 215 * <p> 216 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 217 * </p> 218 * 219 * @param lineLength 220 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 221 * 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when 222 * decoding. 223 * @param lineSeparator 224 * Each line of encoded data will end with this sequence of bytes. 225 * @param useHex 226 * if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet 227 * @throws IllegalArgumentException 228 * The provided lineSeparator included some Base32 characters. That's not going to work! Or the 229 * lineLength > 0 and lineSeparator is null. 230 */ 231 public Base32(int lineLength, byte[] lineSeparator, boolean useHex) { 232 super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 233 lineLength, 234 lineSeparator == null ? 0 : lineSeparator.length); 235 if (useHex){ 236 this.encodeTable = HEX_ENCODE_TABLE; 237 this.decodeTable = HEX_DECODE_TABLE; 238 } else { 239 this.encodeTable = ENCODE_TABLE; 240 this.decodeTable = DECODE_TABLE; 241 } 242 if (lineLength > 0) { 243 if (lineSeparator == null) { 244 throw new IllegalArgumentException("lineLength "+lineLength+" > 0, but lineSeparator is null"); 245 } 246 // Must be done after initializing the tables 247 if (containsAlphabetOrPad(lineSeparator)) { 248 String sep = StringUtils.newStringUtf8(lineSeparator); 249 throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]"); 250 } 251 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length; 252 this.lineSeparator = new byte[lineSeparator.length]; 253 System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length); 254 } else { 255 this.encodeSize = BYTES_PER_ENCODED_BLOCK; 256 this.lineSeparator = null; 257 } 258 this.decodeSize = this.encodeSize - 1; 259 } 260 261 /** 262 * <p> 263 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once 264 * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" 265 * call is not necessary when decoding, but it doesn't hurt, either. 266 * </p> 267 * <p> 268 * Ignores all non-Base32 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are 269 * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in, 270 * garbage-out philosophy: it will not check the provided data for validity. 271 * </p> 272 * 273 * @param in 274 * byte[] array of ascii data to Base32 decode. 275 * @param inPos 276 * Position to start reading data from. 277 * @param inAvail 278 * Amount of bytes available from input for encoding. 279 * @param context the context to be used 280 * 281 * Output is written to {@link Context#buffer} as 8-bit octets, using {@link Context#pos} as the buffer position 282 */ 283 @Override 284 void decode(byte[] in, int inPos, int inAvail, Context context) { // package protected for access from I/O streams 285 if (context.eof) { 286 return; 287 } 288 if (inAvail < 0) { 289 context.eof = true; 290 } 291 for (int i = 0; i < inAvail; i++) { 292 final byte b = in[inPos++]; 293 if (b == PAD) { 294 // We're done. 295 context.eof = true; 296 break; 297 } else { 298 final byte[] buffer = ensureBufferSize(decodeSize, context); 299 if (b >= 0 && b < this.decodeTable.length) { 300 final int result = this.decodeTable[b]; 301 if (result >= 0) { 302 context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK; 303 // collect decoded bytes 304 context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result; 305 if (context.modulus == 0) { // we can output the 5 bytes 306 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 32) & MASK_8BITS); 307 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 24) & MASK_8BITS); 308 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS); 309 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); 310 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 311 } 312 } 313 } 314 } 315 } 316 317 // Two forms of EOF as far as Base32 decoder is concerned: actual 318 // EOF (-1) and first time '=' character is encountered in stream. 319 // This approach makes the '=' padding characters completely optional. 320 if (context.eof && context.modulus >= 2) { // if modulus < 2, nothing to do 321 final byte[] buffer = ensureBufferSize(decodeSize, context); 322 323 // we ignore partial bytes, i.e. only multiples of 8 count 324 switch (context.modulus) { 325 case 2 : // 10 bits, drop 2 and output one byte 326 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 2) & MASK_8BITS); 327 break; 328 case 3 : // 15 bits, drop 7 and output 1 byte 329 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 7) & MASK_8BITS); 330 break; 331 case 4 : // 20 bits = 2*8 + 4 332 context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits 333 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); 334 buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS); 335 break; 336 case 5 : // 25bits = 3*8 + 1 337 context.lbitWorkArea = context.lbitWorkArea >> 1; 338 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS); 339 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); 340 buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS); 341 break; 342 case 6 : // 30bits = 3*8 + 6 343 context.lbitWorkArea = context.lbitWorkArea >> 6; 344 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS); 345 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); 346 buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS); 347 break; 348 case 7 : // 35 = 4*8 +3 349 context.lbitWorkArea = context.lbitWorkArea >> 3; 350 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 24) & MASK_8BITS); 351 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS); 352 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); 353 buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS); 354 break; 355 default: 356 // modulus can be 0-7, and we excluded 0,1 already 357 throw new IllegalStateException("Impossible modulus "+context.modulus); 358 } 359 } 360 } 361 362 /** 363 * <p> 364 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with 365 * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, so flush last 366 * remaining bytes (if not multiple of 5). 367 * </p> 368 * 369 * @param in 370 * byte[] array of binary data to Base32 encode. 371 * @param inPos 372 * Position to start reading data from. 373 * @param inAvail 374 * Amount of bytes available from input for encoding. 375 * @param context the context to be used 376 */ 377 @Override 378 void encode(byte[] in, int inPos, int inAvail, Context context) { // package protected for access from I/O streams 379 if (context.eof) { 380 return; 381 } 382 // inAvail < 0 is how we're informed of EOF in the underlying data we're 383 // encoding. 384 if (inAvail < 0) { 385 context.eof = true; 386 if (0 == context.modulus && lineLength == 0) { 387 return; // no leftovers to process and not using chunking 388 } 389 final byte[] buffer = ensureBufferSize(encodeSize, context); 390 final int savedPos = context.pos; 391 switch (context.modulus) { // % 5 392 case 0 : 393 break; 394 case 1 : // Only 1 octet; take top 5 bits then remainder 395 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 3) & MASK_5BITS]; // 8-1*5 = 3 396 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 2) & MASK_5BITS]; // 5-3=2 397 buffer[context.pos++] = PAD; 398 buffer[context.pos++] = PAD; 399 buffer[context.pos++] = PAD; 400 buffer[context.pos++] = PAD; 401 buffer[context.pos++] = PAD; 402 buffer[context.pos++] = PAD; 403 break; 404 case 2 : // 2 octets = 16 bits to use 405 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 11) & MASK_5BITS]; // 16-1*5 = 11 406 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 6) & MASK_5BITS]; // 16-2*5 = 6 407 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 1) & MASK_5BITS]; // 16-3*5 = 1 408 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 4) & MASK_5BITS]; // 5-1 = 4 409 buffer[context.pos++] = PAD; 410 buffer[context.pos++] = PAD; 411 buffer[context.pos++] = PAD; 412 buffer[context.pos++] = PAD; 413 break; 414 case 3 : // 3 octets = 24 bits to use 415 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 19) & MASK_5BITS]; // 24-1*5 = 19 416 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 14) & MASK_5BITS]; // 24-2*5 = 14 417 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 9) & MASK_5BITS]; // 24-3*5 = 9 418 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 4) & MASK_5BITS]; // 24-4*5 = 4 419 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 1) & MASK_5BITS]; // 5-4 = 1 420 buffer[context.pos++] = PAD; 421 buffer[context.pos++] = PAD; 422 buffer[context.pos++] = PAD; 423 break; 424 case 4 : // 4 octets = 32 bits to use 425 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 27) & MASK_5BITS]; // 32-1*5 = 27 426 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 22) & MASK_5BITS]; // 32-2*5 = 22 427 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 17) & MASK_5BITS]; // 32-3*5 = 17 428 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 12) & MASK_5BITS]; // 32-4*5 = 12 429 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 7) & MASK_5BITS]; // 32-5*5 = 7 430 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 2) & MASK_5BITS]; // 32-6*5 = 2 431 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 3) & MASK_5BITS]; // 5-2 = 3 432 buffer[context.pos++] = PAD; 433 break; 434 default: 435 throw new IllegalStateException("Impossible modulus "+context.modulus); 436 } 437 context.currentLinePos += context.pos - savedPos; // keep track of current line position 438 // if currentPos == 0 we are at the start of a line, so don't add CRLF 439 if (lineLength > 0 && context.currentLinePos > 0){ // add chunk separator if required 440 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); 441 context.pos += lineSeparator.length; 442 } 443 } else { 444 for (int i = 0; i < inAvail; i++) { 445 final byte[] buffer = ensureBufferSize(encodeSize, context); 446 context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK; 447 int b = in[inPos++]; 448 if (b < 0) { 449 b += 256; 450 } 451 context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE 452 if (0 == context.modulus) { // we have enough bytes to create our output 453 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 35) & MASK_5BITS]; 454 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 30) & MASK_5BITS]; 455 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 25) & MASK_5BITS]; 456 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 20) & MASK_5BITS]; 457 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 15) & MASK_5BITS]; 458 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 10) & MASK_5BITS]; 459 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 5) & MASK_5BITS]; 460 buffer[context.pos++] = encodeTable[(int)context.lbitWorkArea & MASK_5BITS]; 461 context.currentLinePos += BYTES_PER_ENCODED_BLOCK; 462 if (lineLength > 0 && lineLength <= context.currentLinePos) { 463 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); 464 context.pos += lineSeparator.length; 465 context.currentLinePos = 0; 466 } 467 } 468 } 469 } 470 } 471 472 /** 473 * Returns whether or not the <code>octet</code> is in the Base32 alphabet. 474 * 475 * @param octet 476 * The value to test 477 * @return {@code true} if the value is defined in the the Base32 alphabet {@code false} otherwise. 478 */ 479 @Override 480 public boolean isInAlphabet(byte octet) { 481 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1; 482 } 483 }