001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020/** 021 * Provides Base32 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>. 022 * 023 * <p> 024 * The class can be parameterized in the following manner with various constructors: 025 * <ul> 026 * <li>Whether to use the "base32hex" variant instead of the default "base32"</li> 027 * <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of 028 * 8 in the encoded data. 029 * <li>Line separator: Default is CRLF ("\r\n")</li> 030 * </ul> 031 * </p> 032 * <p> 033 * This class operates directly on byte streams, and not character streams. 034 * </p> 035 * <p> 036 * This class is thread-safe. 037 * </p> 038 * 039 * @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a> 040 * 041 * @since 1.5 042 * @version $Id: Base32.html 891688 2013-12-24 20:49:46Z ggregory $ 043 */ 044public class Base32 extends BaseNCodec { 045 046 /** 047 * BASE32 characters are 5 bits in length. 048 * They are formed by taking a block of five octets to form a 40-bit string, 049 * which is converted into eight BASE32 characters. 050 */ 051 private static final int BITS_PER_ENCODED_BYTE = 5; 052 private static final int BYTES_PER_ENCODED_BLOCK = 8; 053 private static final int BYTES_PER_UNENCODED_BLOCK = 5; 054 055 /** 056 * Chunk separator per RFC 2045 section 2.1. 057 * 058 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> 059 */ 060 private static final byte[] CHUNK_SEPARATOR = {'\r', '\n'}; 061 062 /** 063 * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified 064 * in Table 3 of RFC 4648) into their 5-bit positive integer equivalents. Characters that are not in the Base32 065 * alphabet but fall within the bounds of the array are translated to -1. 066 */ 067 private static final byte[] DECODE_TABLE = { 068 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 069 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 070 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 071 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f 072 -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7 073 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-N 074 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 50-5a O-Z 075 }; 076 077 /** 078 * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet" 079 * equivalents as specified in Table 3 of RFC 4648. 080 */ 081 private static final byte[] ENCODE_TABLE = { 082 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 083 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 084 '2', '3', '4', '5', '6', '7', 085 }; 086 087 /** 088 * This array is a lookup table that translates Unicode characters drawn from the "Base32 |Hex Alphabet" (as 089 * specified in Table 3 of RFC 4648) into their 5-bit positive integer equivalents. Characters that are not in the 090 * Base32 Hex alphabet but fall within the bounds of the array are translated to -1. 091 */ 092 private static final byte[] HEX_DECODE_TABLE = { 093 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 094 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 095 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 096 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f 097 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 2-7 098 -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-N 099 25, 26, 27, 28, 29, 30, 31, 32, // 50-57 O-V 100 }; 101 102 /** 103 * This array is a lookup table that translates 5-bit positive integer index values into their 104 * "Base32 Hex Alphabet" equivalents as specified in Table 3 of RFC 4648. 105 */ 106 private static final byte[] HEX_ENCODE_TABLE = { 107 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 108 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 109 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 110 }; 111 112 /** Mask used to extract 5 bits, used when encoding Base32 bytes */ 113 private static final int MASK_5BITS = 0x1f; 114 115 // The static final fields above are used for the original static byte[] methods on Base32. 116 // The private member fields below are used with the new streaming approach, which requires 117 // some state be preserved between calls of encode() and decode(). 118 119 /** 120 * Place holder for the bytes we're dealing with for our based logic. 121 * Bitwise operations store and extract the encoding or decoding from this variable. 122 */ 123 124 /** 125 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. 126 * <code>decodeSize = {@link #BYTES_PER_ENCODED_BLOCK} - 1 + lineSeparator.length;</code> 127 */ 128 private final int decodeSize; 129 130 /** 131 * Decode table to use. 132 */ 133 private final byte[] decodeTable; 134 135 /** 136 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. 137 * <code>encodeSize = {@link #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;</code> 138 */ 139 private final int encodeSize; 140 141 /** 142 * Encode table to use. 143 */ 144 private final byte[] encodeTable; 145 146 /** 147 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0. 148 */ 149 private final byte[] lineSeparator; 150 151 /** 152 * Creates a Base32 codec used for decoding and encoding. 153 * <p> 154 * When encoding the line length is 0 (no chunking). 155 * </p> 156 * 157 */ 158 public Base32() { 159 this(false); 160 } 161 162 /** 163 * Creates a Base32 codec used for decoding and encoding. 164 * <p> 165 * When encoding the line length is 0 (no chunking). 166 * </p> 167 * @param useHex if {@code true} then use Base32 Hex alphabet 168 */ 169 public Base32(final boolean useHex) { 170 this(0, null, useHex); 171 } 172 173 /** 174 * Creates a Base32 codec used for decoding and encoding. 175 * <p> 176 * When encoding the line length is given in the constructor, the line separator is CRLF. 177 * </p> 178 * 179 * @param lineLength 180 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 181 * 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when 182 * decoding. 183 */ 184 public Base32(final int lineLength) { 185 this(lineLength, CHUNK_SEPARATOR); 186 } 187 188 /** 189 * Creates a Base32 codec used for decoding and encoding. 190 * <p> 191 * When encoding the line length and line separator are given in the constructor. 192 * </p> 193 * <p> 194 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 195 * </p> 196 * 197 * @param lineLength 198 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 199 * 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when 200 * decoding. 201 * @param lineSeparator 202 * Each line of encoded data will end with this sequence of bytes. 203 * @throws IllegalArgumentException 204 * The provided lineSeparator included some Base32 characters. That's not going to work! 205 */ 206 public Base32(final int lineLength, final byte[] lineSeparator) { 207 this(lineLength, lineSeparator, false); 208 } 209 210 /** 211 * Creates a Base32 / Base32 Hex codec used for decoding and encoding. 212 * <p> 213 * When encoding the line length and line separator are given in the constructor. 214 * </p> 215 * <p> 216 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 217 * </p> 218 * 219 * @param lineLength 220 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 221 * 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when 222 * decoding. 223 * @param lineSeparator 224 * Each line of encoded data will end with this sequence of bytes. 225 * @param useHex 226 * if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet 227 * @throws IllegalArgumentException 228 * The provided lineSeparator included some Base32 characters. That's not going to work! Or the 229 * lineLength > 0 and lineSeparator is null. 230 */ 231 public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex) { 232 super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 233 lineLength, 234 lineSeparator == null ? 0 : lineSeparator.length); 235 if (useHex){ 236 this.encodeTable = HEX_ENCODE_TABLE; 237 this.decodeTable = HEX_DECODE_TABLE; 238 } else { 239 this.encodeTable = ENCODE_TABLE; 240 this.decodeTable = DECODE_TABLE; 241 } 242 if (lineLength > 0) { 243 if (lineSeparator == null) { 244 throw new IllegalArgumentException("lineLength "+lineLength+" > 0, but lineSeparator is null"); 245 } 246 // Must be done after initializing the tables 247 if (containsAlphabetOrPad(lineSeparator)) { 248 final String sep = StringUtils.newStringUtf8(lineSeparator); 249 throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]"); 250 } 251 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length; 252 this.lineSeparator = new byte[lineSeparator.length]; 253 System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length); 254 } else { 255 this.encodeSize = BYTES_PER_ENCODED_BLOCK; 256 this.lineSeparator = null; 257 } 258 this.decodeSize = this.encodeSize - 1; 259 } 260 261 /** 262 * <p> 263 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once 264 * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" 265 * call is not necessary when decoding, but it doesn't hurt, either. 266 * </p> 267 * <p> 268 * Ignores all non-Base32 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are 269 * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in, 270 * garbage-out philosophy: it will not check the provided data for validity. 271 * </p> 272 * 273 * @param in 274 * byte[] array of ascii data to Base32 decode. 275 * @param inPos 276 * Position to start reading data from. 277 * @param inAvail 278 * Amount of bytes available from input for encoding. 279 * @param context the context to be used 280 * 281 * Output is written to {@link Context#buffer} as 8-bit octets, using {@link Context#pos} as the buffer position 282 */ 283 @Override 284 void decode(final byte[] in, int inPos, final int inAvail, final Context context) { 285 // package protected for access from I/O streams 286 287 if (context.eof) { 288 return; 289 } 290 if (inAvail < 0) { 291 context.eof = true; 292 } 293 for (int i = 0; i < inAvail; i++) { 294 final byte b = in[inPos++]; 295 if (b == PAD) { 296 // We're done. 297 context.eof = true; 298 break; 299 } else { 300 final byte[] buffer = ensureBufferSize(decodeSize, context); 301 if (b >= 0 && b < this.decodeTable.length) { 302 final int result = this.decodeTable[b]; 303 if (result >= 0) { 304 context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK; 305 // collect decoded bytes 306 context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result; 307 if (context.modulus == 0) { // we can output the 5 bytes 308 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 32) & MASK_8BITS); 309 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 24) & MASK_8BITS); 310 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS); 311 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); 312 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 313 } 314 } 315 } 316 } 317 } 318 319 // Two forms of EOF as far as Base32 decoder is concerned: actual 320 // EOF (-1) and first time '=' character is encountered in stream. 321 // This approach makes the '=' padding characters completely optional. 322 if (context.eof && context.modulus >= 2) { // if modulus < 2, nothing to do 323 final byte[] buffer = ensureBufferSize(decodeSize, context); 324 325 // we ignore partial bytes, i.e. only multiples of 8 count 326 switch (context.modulus) { 327 case 2 : // 10 bits, drop 2 and output one byte 328 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 2) & MASK_8BITS); 329 break; 330 case 3 : // 15 bits, drop 7 and output 1 byte 331 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 7) & MASK_8BITS); 332 break; 333 case 4 : // 20 bits = 2*8 + 4 334 context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits 335 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); 336 buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS); 337 break; 338 case 5 : // 25bits = 3*8 + 1 339 context.lbitWorkArea = context.lbitWorkArea >> 1; 340 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS); 341 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); 342 buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS); 343 break; 344 case 6 : // 30bits = 3*8 + 6 345 context.lbitWorkArea = context.lbitWorkArea >> 6; 346 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS); 347 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); 348 buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS); 349 break; 350 case 7 : // 35 = 4*8 +3 351 context.lbitWorkArea = context.lbitWorkArea >> 3; 352 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 24) & MASK_8BITS); 353 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS); 354 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); 355 buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS); 356 break; 357 default: 358 // modulus can be 0-7, and we excluded 0,1 already 359 throw new IllegalStateException("Impossible modulus "+context.modulus); 360 } 361 } 362 } 363 364 /** 365 * <p> 366 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with 367 * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, so flush last 368 * remaining bytes (if not multiple of 5). 369 * </p> 370 * 371 * @param in 372 * byte[] array of binary data to Base32 encode. 373 * @param inPos 374 * Position to start reading data from. 375 * @param inAvail 376 * Amount of bytes available from input for encoding. 377 * @param context the context to be used 378 */ 379 @Override 380 void encode(final byte[] in, int inPos, final int inAvail, final Context context) { 381 // package protected for access from I/O streams 382 383 if (context.eof) { 384 return; 385 } 386 // inAvail < 0 is how we're informed of EOF in the underlying data we're 387 // encoding. 388 if (inAvail < 0) { 389 context.eof = true; 390 if (0 == context.modulus && lineLength == 0) { 391 return; // no leftovers to process and not using chunking 392 } 393 final byte[] buffer = ensureBufferSize(encodeSize, context); 394 final int savedPos = context.pos; 395 switch (context.modulus) { // % 5 396 case 0 : 397 break; 398 case 1 : // Only 1 octet; take top 5 bits then remainder 399 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 3) & MASK_5BITS]; // 8-1*5 = 3 400 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 2) & MASK_5BITS]; // 5-3=2 401 buffer[context.pos++] = PAD; 402 buffer[context.pos++] = PAD; 403 buffer[context.pos++] = PAD; 404 buffer[context.pos++] = PAD; 405 buffer[context.pos++] = PAD; 406 buffer[context.pos++] = PAD; 407 break; 408 case 2 : // 2 octets = 16 bits to use 409 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 11) & MASK_5BITS]; // 16-1*5 = 11 410 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 6) & MASK_5BITS]; // 16-2*5 = 6 411 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 1) & MASK_5BITS]; // 16-3*5 = 1 412 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 4) & MASK_5BITS]; // 5-1 = 4 413 buffer[context.pos++] = PAD; 414 buffer[context.pos++] = PAD; 415 buffer[context.pos++] = PAD; 416 buffer[context.pos++] = PAD; 417 break; 418 case 3 : // 3 octets = 24 bits to use 419 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 19) & MASK_5BITS]; // 24-1*5 = 19 420 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 14) & MASK_5BITS]; // 24-2*5 = 14 421 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 9) & MASK_5BITS]; // 24-3*5 = 9 422 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 4) & MASK_5BITS]; // 24-4*5 = 4 423 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 1) & MASK_5BITS]; // 5-4 = 1 424 buffer[context.pos++] = PAD; 425 buffer[context.pos++] = PAD; 426 buffer[context.pos++] = PAD; 427 break; 428 case 4 : // 4 octets = 32 bits to use 429 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 27) & MASK_5BITS]; // 32-1*5 = 27 430 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 22) & MASK_5BITS]; // 32-2*5 = 22 431 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 17) & MASK_5BITS]; // 32-3*5 = 17 432 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 12) & MASK_5BITS]; // 32-4*5 = 12 433 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 7) & MASK_5BITS]; // 32-5*5 = 7 434 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 2) & MASK_5BITS]; // 32-6*5 = 2 435 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 3) & MASK_5BITS]; // 5-2 = 3 436 buffer[context.pos++] = PAD; 437 break; 438 default: 439 throw new IllegalStateException("Impossible modulus "+context.modulus); 440 } 441 context.currentLinePos += context.pos - savedPos; // keep track of current line position 442 // if currentPos == 0 we are at the start of a line, so don't add CRLF 443 if (lineLength > 0 && context.currentLinePos > 0){ // add chunk separator if required 444 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); 445 context.pos += lineSeparator.length; 446 } 447 } else { 448 for (int i = 0; i < inAvail; i++) { 449 final byte[] buffer = ensureBufferSize(encodeSize, context); 450 context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK; 451 int b = in[inPos++]; 452 if (b < 0) { 453 b += 256; 454 } 455 context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE 456 if (0 == context.modulus) { // we have enough bytes to create our output 457 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 35) & MASK_5BITS]; 458 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 30) & MASK_5BITS]; 459 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 25) & MASK_5BITS]; 460 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 20) & MASK_5BITS]; 461 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 15) & MASK_5BITS]; 462 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 10) & MASK_5BITS]; 463 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 5) & MASK_5BITS]; 464 buffer[context.pos++] = encodeTable[(int)context.lbitWorkArea & MASK_5BITS]; 465 context.currentLinePos += BYTES_PER_ENCODED_BLOCK; 466 if (lineLength > 0 && lineLength <= context.currentLinePos) { 467 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); 468 context.pos += lineSeparator.length; 469 context.currentLinePos = 0; 470 } 471 } 472 } 473 } 474 } 475 476 /** 477 * Returns whether or not the <code>octet</code> is in the Base32 alphabet. 478 * 479 * @param octet 480 * The value to test 481 * @return {@code true} if the value is defined in the the Base32 alphabet {@code false} otherwise. 482 */ 483 @Override 484 public boolean isInAlphabet(final byte octet) { 485 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1; 486 } 487}