001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.util.Arrays; 021 022import org.apache.commons.codec.CodecPolicy; 023 024/** 025 * Provides Base32 encoding and decoding as defined by <a href="https://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>. 026 * 027 * <p> 028 * The class can be parameterized in the following manner with various constructors: 029 * </p> 030 * <ul> 031 * <li>Whether to use the "base32hex" variant instead of the default "base32"</li> 032 * <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 033 * <li>Line separator: Default is CRLF ("\r\n")</li> 034 * </ul> 035 * <p> 036 * This class operates directly on byte streams, and not character streams. 037 * </p> 038 * <p> 039 * This class is thread-safe. 040 * </p> 041 * <p> 042 * To configure a new instance, use a {@link Builder}. For example: 043 * </p> 044 * <pre> 045 * Base32 base32 = Base32.builder() 046 * .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient 047 * .setLineLength(0) // default is none 048 * .setLineSeparator('\r', '\n') // default is CR LF 049 * .setPadding('=') // default is '=' 050 * .setEncodeTable(customEncodeTable) // default is RFC 4648 Section 6, Table 3: The Base 32 Alphabet 051 * .get() 052 * </pre> 053 * 054 * @see Base32InputStream 055 * @see Base32OutputStream 056 * @see <a href="https://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a> 057 * @since 1.5 058 */ 059public class Base32 extends BaseNCodec { 060 061 /** 062 * Builds {@link Base32} instances. 063 * 064 * <p> 065 * To configure a new instance, use a {@link Builder}. For example: 066 * </p> 067 * 068 * <pre> 069 * Base32 base32 = Base32.builder() 070 * .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient 071 * .setLineLength(0) // default is none 072 * .setLineSeparator('\r', '\n') // default is CR LF 073 * .setPadding('=') // default is '=' 074 * .setEncodeTable(customEncodeTable) // default is RFC 4648 Section 6, Table 3: The Base 32 Alphabet 075 * .get() 076 * </pre> 077 * 078 * @since 1.17.0 079 */ 080 public static class Builder extends AbstractBuilder<Base32, Builder> { 081 082 /** 083 * Constructs a new instance using <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32 084 * Alphabet</a>. 085 */ 086 public Builder() { 087 super(ENCODE_TABLE); 088 setDecodeTableRaw(DECODE_TABLE); 089 setEncodeTableRaw(ENCODE_TABLE); 090 setEncodedBlockSize(BYTES_PER_ENCODED_BLOCK); 091 setUnencodedBlockSize(BYTES_PER_UNENCODED_BLOCK); 092 } 093 094 @Override 095 public Base32 get() { 096 return new Base32(this); 097 } 098 099 @Override 100 public Builder setEncodeTable(final byte... encodeTable) { 101 super.setDecodeTableRaw(Arrays.equals(encodeTable, HEX_ENCODE_TABLE) ? HEX_DECODE_TABLE : DECODE_TABLE); 102 return super.setEncodeTable(encodeTable); 103 } 104 105 /** 106 * Sets the decode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet. 107 * <p> 108 * This overrides a value previously set with {@link #setEncodeTable(byte...)}. 109 * </p> 110 * 111 * @param useHex use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet. 112 * @return {@code this} instance. 113 * @since 1.18.0 114 */ 115 public Builder setHexDecodeTable(final boolean useHex) { 116 return setEncodeTable(decodeTable(useHex)); 117 } 118 119 /** 120 * Sets the encode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet. 121 * <p> 122 * This overrides a value previously set with {@link #setEncodeTable(byte...)}. 123 * </p> 124 * 125 * @param useHex 126 * <ul> 127 * <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding 128 * with Extended Hex Alphabet</a></li> 129 * <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32 130 * Alphabet</a></li> 131 * </ul> 132 * @return {@code this} instance. 133 * @since 1.18.0 134 */ 135 public Builder setHexEncodeTable(final boolean useHex) { 136 return setEncodeTable(encodeTable(useHex)); 137 } 138 } 139 140 /** 141 * BASE32 characters are 5 bits in length. They are formed by taking a block of five octets to form a 40-bit string, which is converted into eight BASE32 142 * characters. 143 */ 144 private static final int BITS_PER_ENCODED_BYTE = 5; 145 146 private static final int BYTES_PER_ENCODED_BLOCK = 8; 147 private static final int BYTES_PER_UNENCODED_BLOCK = 5; 148 149 /** 150 * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified in Table 3 of RFC 4648) into their 5-bit 151 * positive integer equivalents. Characters that are not in the Base32 alphabet but fall within the bounds of the array are translated to -1. 152 */ 153 // @formatter:off 154 private static final byte[] DECODE_TABLE = { 155 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 156 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 157 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 158 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f 159 -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7 160 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O 161 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 50-5a P-Z 162 -1, -1, -1, -1, -1, // 5b-5f 163 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 60-6f a-o 164 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 70-7a p-z 165 }; 166 // @formatter:on 167 168 /** 169 * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet" equivalents as specified in 170 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32 Alphabet</a>. 171 * 172 * @see <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32 Alphabet</a> 173 */ 174 // @formatter:off 175 private static final byte[] ENCODE_TABLE = { 176 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 177 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 178 '2', '3', '4', '5', '6', '7', 179 }; 180 // @formatter:on 181 182 /** 183 * This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as specified in Table 4 of RFC 4648) into their 184 * 5-bit positive integer equivalents. Characters that are not in the Base32 Hex alphabet but fall within the bounds of the array are translated to -1. 185 */ 186 // @formatter:off 187 private static final byte[] HEX_DECODE_TABLE = { 188 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 189 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 190 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 191 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f 192 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 193 -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O 194 25, 26, 27, 28, 29, 30, 31, // 50-56 P-V 195 -1, -1, -1, -1, -1, -1, -1, -1, -1, // 57-5f 196 -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60-6f a-o 197 25, 26, 27, 28, 29, 30, 31 // 70-76 p-v 198 }; 199 // @formatter:on 200 201 /** 202 * This array is a lookup table that translates 5-bit positive integer index values into their "Base 32 Encoding with Extended Hex Alphabet" equivalents as 203 * specified in <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with Extended Hex 204 * Alphabet</a>. 205 * 206 * @see <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with Extended Hex Alphabet</a> 207 */ 208 // @formatter:off 209 private static final byte[] HEX_ENCODE_TABLE = { 210 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 211 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 212 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 213 }; 214 // @formatter:on 215 216 /** Mask used to extract 5 bits, used when encoding Base32 bytes */ 217 private static final int MASK_5_BITS = 0x1f; 218 219 /** Mask used to extract 4 bits, used when decoding final trailing character. */ 220 private static final long MASK_4_BITS = 0x0fL; 221 222 /** Mask used to extract 3 bits, used when decoding final trailing character. */ 223 private static final long MASK_3_BITS = 0x07L; 224 225 /** Mask used to extract 2 bits, used when decoding final trailing character. */ 226 private static final long MASK_2_BITS = 0x03L; 227 228 /** Mask used to extract 1 bits, used when decoding final trailing character. */ 229 private static final long MASK_1_BITS = 0x01L; 230 231 // The static final fields above are used for the original static byte[] methods on Base32. 232 // The private member fields below are used with the new streaming approach, which requires 233 // some state be preserved between calls of encode() and decode(). 234 235 /** 236 * Creates a new Builder. 237 * 238 * <p> 239 * To configure a new instance, use a {@link Builder}. For example: 240 * </p> 241 * 242 * <pre> 243 * Base32 base32 = Base32.builder() 244 * .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient 245 * .setLineLength(0) // default is none 246 * .setLineSeparator('\r', '\n') // default is CR LF 247 * .setPadding('=') // default is '=' 248 * .setEncodeTable(customEncodeTable) // default is RFC 4648 Section 6, Table 3: The Base 32 Alphabet 249 * .get() 250 * </pre> 251 * 252 * @return a new Builder. 253 * @since 1.17.0 254 */ 255 public static Builder builder() { 256 return new Builder(); 257 } 258 259 private static byte[] decodeTable(final boolean useHex) { 260 return useHex ? HEX_DECODE_TABLE : DECODE_TABLE; 261 } 262 263 /** 264 * Gets the encoding table that matches {@code useHex}. 265 * 266 * @param useHex 267 * <ul> 268 * <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with 269 * Extended Hex Alphabet</a></li> 270 * <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32 271 * Alphabet</a></li> 272 * </ul> 273 * @return the encoding table that matches {@code useHex}. 274 */ 275 private static byte[] encodeTable(final boolean useHex) { 276 return useHex ? HEX_ENCODE_TABLE : ENCODE_TABLE; 277 } 278 279 /** 280 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. {@code encodeSize = {@link 281 * #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;} 282 */ 283 private final int encodeSize; 284 285 /** 286 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0. 287 */ 288 private final byte[] lineSeparator; 289 290 /** 291 * Constructs a Base32 codec used for decoding and encoding. 292 * <p> 293 * When encoding the line length is 0 (no chunking). 294 * </p> 295 */ 296 public Base32() { 297 this(false); 298 } 299 300 /** 301 * Constructs a Base32 codec used for decoding and encoding. 302 * <p> 303 * When encoding the line length is 0 (no chunking). 304 * </p> 305 * 306 * @param useHex 307 * <ul> 308 * <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with 309 * Extended Hex Alphabet</a></li> 310 * <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32 311 * Alphabet</a></li> 312 * </ul> 313 * @deprecated Use {@link #builder()} and {@link Builder}. 314 */ 315 @Deprecated 316 public Base32(final boolean useHex) { 317 this(0, null, useHex, PAD_DEFAULT); 318 } 319 320 /** 321 * Constructs a Base32 codec used for decoding and encoding. 322 * <p> 323 * When encoding the line length is 0 (no chunking). 324 * </p> 325 * 326 * @param useHex 327 * <ul> 328 * <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with 329 * Extended Hex Alphabet</a></li> 330 * <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32 331 * Alphabet</a></li> 332 * </ul> 333 * @param padding byte used as padding byte. 334 * @deprecated Use {@link #builder()} and {@link Builder}. 335 */ 336 @Deprecated 337 public Base32(final boolean useHex, final byte padding) { 338 this(0, null, useHex, padding); 339 } 340 341 private Base32(final Builder builder) { 342 super(builder); 343 if (builder.getLineLength() > 0) { 344 final byte[] lineSeparator = builder.getLineSeparator(); 345 // Must be done after initializing the tables 346 if (containsAlphabetOrPad(lineSeparator)) { 347 final String sep = StringUtils.newStringUtf8(lineSeparator); 348 throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]"); 349 } 350 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length; 351 this.lineSeparator = lineSeparator; 352 } else { 353 this.encodeSize = BYTES_PER_ENCODED_BLOCK; 354 this.lineSeparator = null; 355 } 356 if (isInAlphabet(builder.getPadding()) || Character.isWhitespace(builder.getPadding())) { 357 throw new IllegalArgumentException("pad must not be in alphabet or whitespace"); 358 } 359 } 360 361 /** 362 * Constructs a Base32 codec used for decoding and encoding. 363 * <p> 364 * When encoding the line length is 0 (no chunking). 365 * </p> 366 * 367 * @param pad byte used as padding byte. 368 * @deprecated Use {@link #builder()} and {@link Builder}. 369 */ 370 @Deprecated 371 public Base32(final byte pad) { 372 this(false, pad); 373 } 374 375 /** 376 * Constructs a Base32 codec used for decoding and encoding. 377 * <p> 378 * When encoding the line length is given in the constructor, the line separator is CRLF. 379 * </p> 380 * 381 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, then 382 * the output will not be divided into lines (chunks). Ignored when decoding. 383 * @deprecated Use {@link #builder()} and {@link Builder}. 384 */ 385 @Deprecated 386 public Base32(final int lineLength) { 387 this(lineLength, CHUNK_SEPARATOR); 388 } 389 390 /** 391 * Constructs a Base32 codec used for decoding and encoding. 392 * <p> 393 * When encoding the line length and line separator are given in the constructor. 394 * </p> 395 * <p> 396 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 397 * </p> 398 * 399 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, 400 * then the output will not be divided into lines (chunks). Ignored when decoding. 401 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 402 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. 403 * @deprecated Use {@link #builder()} and {@link Builder}. 404 */ 405 @Deprecated 406 public Base32(final int lineLength, final byte[] lineSeparator) { 407 this(lineLength, lineSeparator, false, PAD_DEFAULT); 408 } 409 410 /** 411 * Constructs a Base32 / Base32 Hex codec used for decoding and encoding. 412 * <p> 413 * When encoding the line length and line separator are given in the constructor. 414 * </p> 415 * <p> 416 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 417 * </p> 418 * 419 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, 420 * then the output will not be divided into lines (chunks). Ignored when decoding. 421 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 422 * @param useHex 423 * <ul> 424 * <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with 425 * Extended Hex Alphabet</a></li> 426 * <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32 427 * Alphabet</a></li> 428 * </ul> 429 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength > 0 and lineSeparator is null. 430 * @deprecated Use {@link #builder()} and {@link Builder}. 431 */ 432 @Deprecated 433 public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex) { 434 this(lineLength, lineSeparator, useHex, PAD_DEFAULT); 435 } 436 437 /** 438 * Constructs a Base32 / Base32 Hex codec used for decoding and encoding. 439 * <p> 440 * When encoding the line length and line separator are given in the constructor. 441 * </p> 442 * <p> 443 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 444 * </p> 445 * 446 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, 447 * then the output will not be divided into lines (chunks). Ignored when decoding. 448 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 449 * @param useHex 450 * <ul> 451 * <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with 452 * Extended Hex Alphabet</a></li> 453 * <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32 454 * Alphabet</a></li> 455 * </ul> 456 * @param padding padding byte. 457 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength > 0 and lineSeparator is null. 458 * @deprecated Use {@link #builder()} and {@link Builder}. 459 */ 460 @Deprecated 461 public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding) { 462 this(lineLength, lineSeparator, useHex, padding, DECODING_POLICY_DEFAULT); 463 } 464 465 /** 466 * Constructs a Base32 / Base32 Hex codec used for decoding and encoding. 467 * <p> 468 * When encoding the line length and line separator are given in the constructor. 469 * </p> 470 * <p> 471 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 472 * </p> 473 * 474 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 8). If lineLength <= 0, 475 * then the output will not be divided into lines (chunks). Ignored when decoding. 476 * @param lineSeparator Each line of encoded data will end with this sequence of bytes. 477 * @param useHex 478 * <ul> 479 * <li>If true, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-7">RFC 4648 Section 7, Table 4: Base 32 Encoding with 480 * Extended Hex Alphabet</a></li> 481 * <li>If false, then use <a href="https://datatracker.ietf.org/doc/html/rfc4648#section-6">RFC 4648 Section 6, Table 3: The Base 32 482 * Alphabet</a></li> 483 * </ul> 484 * @param padding padding byte. 485 * @param decodingPolicy The decoding policy. 486 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base32 characters. Or the lineLength > 0 and lineSeparator is null. 487 * @since 1.15 488 * @deprecated Use {@link #builder()} and {@link Builder}. 489 */ 490 @Deprecated 491 public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte padding, final CodecPolicy decodingPolicy) { 492 // @formatter:off 493 this(builder() 494 .setLineLength(lineLength) 495 .setLineSeparator(lineSeparator != null ? lineSeparator : EMPTY_BYTE_ARRAY) 496 .setDecodeTable(decodeTable(useHex)) 497 .setEncodeTableRaw(encodeTable(useHex)) 498 .setPadding(padding) 499 .setDecodingPolicy(decodingPolicy)); 500 // @formatter:on 501 } 502 503 /** 504 * <p> 505 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once with the data to decode, and once with 506 * inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" call is not necessary when decoding, but it doesn't hurt, either. 507 * </p> 508 * <p> 509 * Ignores all non-Base32 characters. This is how chunked (for example 76 character) data is handled, since CR and LF are silently ignored, but has implications 510 * for other bytes, too. This method subscribes to the garbage-in, garbage-out philosophy: it will not check the provided data for validity. 511 * </p> 512 * <p> 513 * Output is written to {@link org.apache.commons.codec.binary.BaseNCodec.Context#buffer Context#buffer} as 8-bit octets, using 514 * {@link org.apache.commons.codec.binary.BaseNCodec.Context#pos Context#pos} as the buffer position 515 * </p> 516 * 517 * @param input byte[] array of ASCII data to Base32 decode. 518 * @param inPos Position to start reading data from. 519 * @param inAvail Amount of bytes available from input for decoding. 520 * @param context the context to be used 521 */ 522 @Override 523 void decode(final byte[] input, int inPos, final int inAvail, final Context context) { 524 // package protected for access from I/O streams 525 if (context.eof) { 526 return; 527 } 528 if (inAvail < 0) { 529 context.eof = true; 530 } 531 final int decodeSize = this.encodeSize - 1; 532 for (int i = 0; i < inAvail; i++) { 533 final byte b = input[inPos++]; 534 if (b == pad) { 535 // We're done. 536 context.eof = true; 537 break; 538 } 539 final byte[] buffer = ensureBufferSize(decodeSize, context); 540 if (b >= 0 && b < this.decodeTable.length) { 541 final int result = this.decodeTable[b]; 542 if (result >= 0) { 543 context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK; 544 // collect decoded bytes 545 context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result; 546 if (context.modulus == 0) { // we can output the 5 bytes 547 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 32 & MASK_8BITS); 548 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS); 549 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS); 550 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS); 551 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 552 } 553 } 554 } 555 } 556 // Two forms of EOF as far as Base32 decoder is concerned: actual 557 // EOF (-1) and first time '=' character is encountered in stream. 558 // This approach makes the '=' padding characters completely optional. 559 if (context.eof && context.modulus > 0) { // if modulus == 0, nothing to do 560 final byte[] buffer = ensureBufferSize(decodeSize, context); 561 // We ignore partial bytes, i.e. only multiples of 8 count. 562 // Any combination not part of a valid encoding is either partially decoded 563 // or will raise an exception. Possible trailing characters are 2, 4, 5, 7. 564 // It is not possible to encode with 1, 3, 6 trailing characters. 565 // For backwards compatibility 3 & 6 chars are decoded anyway rather than discarded. 566 // See the encode(byte[]) method EOF section. 567 switch (context.modulus) { 568// case 0 : // impossible, as excluded above 569 case 1: // 5 bits - either ignore entirely, or raise an exception 570 validateTrailingCharacters(); 571 // falls-through 572 case 2: // 10 bits, drop 2 and output one byte 573 validateCharacter(MASK_2_BITS, context); 574 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 2 & MASK_8BITS); 575 break; 576 case 3: // 15 bits, drop 7 and output 1 byte, or raise an exception 577 validateTrailingCharacters(); 578 // Not possible from a valid encoding but decode anyway 579 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 7 & MASK_8BITS); 580 break; 581 case 4: // 20 bits = 2*8 + 4 582 validateCharacter(MASK_4_BITS, context); 583 context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits 584 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS); 585 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 586 break; 587 case 5: // 25 bits = 3*8 + 1 588 validateCharacter(MASK_1_BITS, context); 589 context.lbitWorkArea = context.lbitWorkArea >> 1; 590 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS); 591 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS); 592 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 593 break; 594 case 6: // 30 bits = 3*8 + 6, or raise an exception 595 validateTrailingCharacters(); 596 // Not possible from a valid encoding but decode anyway 597 context.lbitWorkArea = context.lbitWorkArea >> 6; 598 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS); 599 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS); 600 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 601 break; 602 case 7: // 35 bits = 4*8 +3 603 validateCharacter(MASK_3_BITS, context); 604 context.lbitWorkArea = context.lbitWorkArea >> 3; 605 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 24 & MASK_8BITS); 606 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 16 & MASK_8BITS); 607 buffer[context.pos++] = (byte) (context.lbitWorkArea >> 8 & MASK_8BITS); 608 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); 609 break; 610 default: 611 // modulus can be 0-7, and we excluded 0,1 already 612 throw new IllegalStateException("Impossible modulus " + context.modulus); 613 } 614 } 615 } 616 617 /** 618 * <p> 619 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with the data to encode, and once with 620 * inAvail set to "-1" to alert encoder that EOF has been reached, so flush last remaining bytes (if not multiple of 5). 621 * </p> 622 * 623 * @param input byte[] array of binary data to Base32 encode. 624 * @param inPos Position to start reading data from. 625 * @param inAvail Amount of bytes available from input for encoding. 626 * @param context the context to be used 627 */ 628 @Override 629 void encode(final byte[] input, int inPos, final int inAvail, final Context context) { 630 // package protected for access from I/O streams 631 if (context.eof) { 632 return; 633 } 634 // inAvail < 0 is how we're informed of EOF in the underlying data we're 635 // encoding. 636 if (inAvail < 0) { 637 context.eof = true; 638 if (0 == context.modulus && lineLength == 0) { 639 return; // no leftovers to process and not using chunking 640 } 641 final byte[] buffer = ensureBufferSize(encodeSize, context); 642 final int savedPos = context.pos; 643 switch (context.modulus) { // % 5 644 case 0: 645 break; 646 case 1: // Only 1 octet; take top 5 bits then remainder 647 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 3) & MASK_5_BITS]; // 8-1*5 = 3 648 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 2) & MASK_5_BITS]; // 5-3=2 649 buffer[context.pos++] = pad; 650 buffer[context.pos++] = pad; 651 buffer[context.pos++] = pad; 652 buffer[context.pos++] = pad; 653 buffer[context.pos++] = pad; 654 buffer[context.pos++] = pad; 655 break; 656 case 2: // 2 octets = 16 bits to use 657 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 11) & MASK_5_BITS]; // 16-1*5 = 11 658 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 6) & MASK_5_BITS]; // 16-2*5 = 6 659 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 1) & MASK_5_BITS]; // 16-3*5 = 1 660 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 4) & MASK_5_BITS]; // 5-1 = 4 661 buffer[context.pos++] = pad; 662 buffer[context.pos++] = pad; 663 buffer[context.pos++] = pad; 664 buffer[context.pos++] = pad; 665 break; 666 case 3: // 3 octets = 24 bits to use 667 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 19) & MASK_5_BITS]; // 24-1*5 = 19 668 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 14) & MASK_5_BITS]; // 24-2*5 = 14 669 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 9) & MASK_5_BITS]; // 24-3*5 = 9 670 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 4) & MASK_5_BITS]; // 24-4*5 = 4 671 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 1) & MASK_5_BITS]; // 5-4 = 1 672 buffer[context.pos++] = pad; 673 buffer[context.pos++] = pad; 674 buffer[context.pos++] = pad; 675 break; 676 case 4: // 4 octets = 32 bits to use 677 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 27) & MASK_5_BITS]; // 32-1*5 = 27 678 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 22) & MASK_5_BITS]; // 32-2*5 = 22 679 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 17) & MASK_5_BITS]; // 32-3*5 = 17 680 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 12) & MASK_5_BITS]; // 32-4*5 = 12 681 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 7) & MASK_5_BITS]; // 32-5*5 = 7 682 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 2) & MASK_5_BITS]; // 32-6*5 = 2 683 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea << 3) & MASK_5_BITS]; // 5-2 = 3 684 buffer[context.pos++] = pad; 685 break; 686 default: 687 throw new IllegalStateException("Impossible modulus " + context.modulus); 688 } 689 context.currentLinePos += context.pos - savedPos; // keep track of current line position 690 // if currentPos == 0 we are at the start of a line, so don't add CRLF 691 if (lineLength > 0 && context.currentLinePos > 0) { // add chunk separator if required 692 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); 693 context.pos += lineSeparator.length; 694 } 695 } else { 696 for (int i = 0; i < inAvail; i++) { 697 final byte[] buffer = ensureBufferSize(encodeSize, context); 698 context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK; 699 int b = input[inPos++]; 700 if (b < 0) { 701 b += 256; 702 } 703 context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE 704 if (0 == context.modulus) { // we have enough bytes to create our output 705 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 35) & MASK_5_BITS]; 706 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 30) & MASK_5_BITS]; 707 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 25) & MASK_5_BITS]; 708 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 20) & MASK_5_BITS]; 709 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 15) & MASK_5_BITS]; 710 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 10) & MASK_5_BITS]; 711 buffer[context.pos++] = encodeTable[(int) (context.lbitWorkArea >> 5) & MASK_5_BITS]; 712 buffer[context.pos++] = encodeTable[(int) context.lbitWorkArea & MASK_5_BITS]; 713 context.currentLinePos += BYTES_PER_ENCODED_BLOCK; 714 if (lineLength > 0 && lineLength <= context.currentLinePos) { 715 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); 716 context.pos += lineSeparator.length; 717 context.currentLinePos = 0; 718 } 719 } 720 } 721 } 722 } 723 724 /** 725 * Gets the line separator (for testing only). 726 * 727 * @return the line separator. 728 */ 729 byte[] getLineSeparator() { 730 return lineSeparator; 731 } 732 733 /** 734 * Returns whether or not the {@code octet} is in the Base32 alphabet. 735 * 736 * @param octet The value to test 737 * @return {@code true} if the value is defined in the Base32 alphabet {@code false} otherwise. 738 */ 739 @Override 740 public boolean isInAlphabet(final byte octet) { 741 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1; 742 } 743 744 /** 745 * Validates whether decoding the final trailing character is possible in the context of the set of possible base 32 values. 746 * <p> 747 * The character is valid if the lower bits within the provided mask are zero. This is used to test the final trailing base-32 digit is zero in the bits 748 * that will be discarded. 749 * </p> 750 * 751 * @param emptyBitsMask The mask of the lower bits that should be empty 752 * @param context the context to be used 753 * @throws IllegalArgumentException if the bits being checked contain any non-zero value 754 */ 755 private void validateCharacter(final long emptyBitsMask, final Context context) { 756 // Use the long bit work area 757 if (isStrictDecoding() && (context.lbitWorkArea & emptyBitsMask) != 0) { 758 throw new IllegalArgumentException("Strict decoding: Last encoded character (before the paddings if any) is a valid " + 759 "base 32 alphabet but not a possible encoding. Expected the discarded bits from the character to be zero."); 760 } 761 } 762 763 /** 764 * Validates whether decoding allows final trailing characters that cannot be created during encoding. 765 * 766 * @throws IllegalArgumentException if strict decoding is enabled 767 */ 768 private void validateTrailingCharacters() { 769 if (isStrictDecoding()) { 770 throw new IllegalArgumentException("Strict decoding: Last encoded character(s) (before the paddings if any) are valid " + 771 "base 32 alphabet but not a possible encoding. Decoding requires either 2, 4, 5, or 7 trailing 5-bit characters to create bytes."); 772 } 773 } 774}