1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.codec.binary; 19 20 import java.util.Arrays; 21 import java.util.Objects; 22 import java.util.function.Supplier; 23 24 import org.apache.commons.codec.BinaryDecoder; 25 import org.apache.commons.codec.BinaryEncoder; 26 import org.apache.commons.codec.CodecPolicy; 27 import org.apache.commons.codec.DecoderException; 28 import org.apache.commons.codec.EncoderException; 29 30 /** 31 * Abstract superclass for Base-N encoders and decoders. 32 * 33 * <p> 34 * This class is thread-safe. 35 * </p> 36 * <p> 37 * You can set the decoding behavior when the input bytes contain leftover trailing bits that cannot be created by a 38 * valid encoding. These can be bits that are unused from the final character or entire characters. The default mode is 39 * lenient decoding. 40 * </p> 41 * <ul> 42 * <li>Lenient: Any trailing bits are composed into 8-bit bytes where possible. The remainder are discarded. 43 * <li>Strict: The decoding will raise an {@link IllegalArgumentException} if trailing bits are not part of a valid 44 * encoding. Any unused bits from the final character must be zero. Impossible counts of entire final characters are not 45 * allowed. 46 * </ul> 47 * <p> 48 * When strict decoding is enabled it is expected that the decoded bytes will be re-encoded to a byte array that matches 49 * the original, i.e. no changes occur on the final character. This requires that the input bytes use the same padding 50 * and alphabet as the encoder. 51 * </p> 52 */ 53 public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder { 54 55 /** 56 * Builds {@link Base64} instances. 57 * 58 * @param <T> the codec type to build. 59 * @param <B> the codec builder subtype. 60 * @since 1.17.0 61 */ 62 public abstract static class AbstractBuilder<T, B extends AbstractBuilder<T, B>> implements Supplier<T> { 63 64 private CodecPolicy decodingPolicy = DECODING_POLICY_DEFAULT; 65 private int lineLength; 66 private byte[] lineSeparator = CHUNK_SEPARATOR; 67 private final byte[] defaultEncodeTable; 68 private byte[] encodeTable; 69 /** Padding byte. */ 70 private byte padding = PAD_DEFAULT; 71 72 AbstractBuilder(final byte[] defaultEncodeTable) { 73 this.defaultEncodeTable = defaultEncodeTable; 74 this.encodeTable = defaultEncodeTable; 75 } 76 77 /** 78 * Returns this instance typed as the subclass type {@code B}. 79 * <p> 80 * This is the same as the expression: 81 * </p> 82 * <pre> 83 * (B) this 84 * </pre> 85 * 86 * @return this instance typed as the subclass type {@code B}. 87 */ 88 @SuppressWarnings("unchecked") 89 B asThis() { 90 return (B) this; 91 } 92 93 CodecPolicy getDecodingPolicy() { 94 return decodingPolicy; 95 } 96 97 byte[] getEncodeTable() { 98 return encodeTable; 99 } 100 101 int getLineLength() { 102 return lineLength; 103 } 104 105 byte[] getLineSeparator() { 106 return lineSeparator; 107 } 108 109 byte getPadding() { 110 return padding; 111 } 112 113 /** 114 * Sets the decoding policy. 115 * 116 * @param decodingPolicy the decoding policy, null resets to the default. 117 * @return {@code this} instance. 118 */ 119 public B setDecodingPolicy(final CodecPolicy decodingPolicy) { 120 this.decodingPolicy = decodingPolicy != null ? decodingPolicy : DECODING_POLICY_DEFAULT; 121 return asThis(); 122 } 123 124 /** 125 * Sets the encode table. 126 * 127 * @param encodeTable the encode table, null resets to the default. 128 * @return {@code this} instance. 129 */ 130 public B setEncodeTable(final byte... encodeTable) { 131 this.encodeTable = encodeTable != null ? encodeTable.clone() : defaultEncodeTable; 132 return asThis(); 133 } 134 135 /** 136 * Sets the line length. 137 * 138 * @param lineLength the line length, less than 0 resets to the default. 139 * @return {@code this} instance. 140 */ 141 public B setLineLength(final int lineLength) { 142 this.lineLength = Math.max(0, lineLength); 143 return asThis(); 144 } 145 146 /** 147 * Sets the line separator. 148 * 149 * @param lineSeparator the line separator, null resets to the default. 150 * @return {@code this} instance. 151 */ 152 public B setLineSeparator(final byte... lineSeparator) { 153 this.lineSeparator = lineSeparator != null ? lineSeparator.clone() : CHUNK_SEPARATOR; 154 return asThis(); 155 } 156 157 /** 158 * Sets the padding byte. 159 * 160 * @param padding the padding byte. 161 * @return {@code this} instance. 162 */ 163 public B setPadding(final byte padding) { 164 this.padding = padding; 165 return asThis(); 166 } 167 168 } 169 170 /** 171 * Holds thread context so classes can be thread-safe. 172 * 173 * This class is not itself thread-safe; each thread must allocate its own copy. 174 */ 175 static class Context { 176 177 /** 178 * Placeholder for the bytes we're dealing with for our based logic. 179 * Bitwise operations store and extract the encoding or decoding from this variable. 180 */ 181 int ibitWorkArea; 182 183 /** 184 * Placeholder for the bytes we're dealing with for our based logic. 185 * Bitwise operations store and extract the encoding or decoding from this variable. 186 */ 187 long lbitWorkArea; 188 189 /** 190 * Buffer for streaming. 191 */ 192 byte[] buffer; 193 194 /** 195 * Position where next character should be written in the buffer. 196 */ 197 int pos; 198 199 /** 200 * Position where next character should be read from the buffer. 201 */ 202 int readPos; 203 204 /** 205 * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless, 206 * and must be thrown away. 207 */ 208 boolean eof; 209 210 /** 211 * Variable tracks how many characters have been written to the current line. Only used when encoding. We use 212 * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0). 213 */ 214 int currentLinePos; 215 216 /** 217 * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This 218 * variable helps track that. 219 */ 220 int modulus; 221 222 /** 223 * Returns a String useful for debugging (especially within a debugger.) 224 * 225 * @return a String useful for debugging. 226 */ 227 @Override 228 public String toString() { 229 return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " + 230 "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer), 231 currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos); 232 } 233 } 234 235 /** 236 * End-of-file marker. 237 * 238 * @since 1.7 239 */ 240 static final int EOF = -1; 241 242 /** 243 * MIME chunk size per RFC 2045 section 6.8. 244 * 245 * <p> 246 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 247 * equal signs. 248 * </p> 249 * 250 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> 251 */ 252 public static final int MIME_CHUNK_SIZE = 76; 253 254 /** 255 * PEM chunk size per RFC 1421 section 4.3.2.4. 256 * 257 * <p> 258 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 259 * equal signs. 260 * </p> 261 * 262 * @see <a href="https://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a> 263 */ 264 public static final int PEM_CHUNK_SIZE = 64; 265 266 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; 267 268 /** 269 * Defines the default buffer size - currently {@value} 270 * - must be large enough for at least one encoded block+separator 271 */ 272 private static final int DEFAULT_BUFFER_SIZE = 8192; 273 274 /** 275 * The maximum size buffer to allocate. 276 * 277 * <p>This is set to the same size used in the JDK {@link java.util.ArrayList}:</p> 278 * <blockquote> 279 * Some VMs reserve some header words in an array. 280 * Attempts to allocate larger arrays may result in 281 * OutOfMemoryError: Requested array size exceeds VM limit. 282 * </blockquote> 283 */ 284 private static final int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8; 285 286 /** Mask used to extract 8 bits, used in decoding bytes */ 287 protected static final int MASK_8BITS = 0xff; 288 289 /** 290 * Byte used to pad output. 291 */ 292 protected static final byte PAD_DEFAULT = '='; // Allow static access to default 293 294 /** 295 * The default decoding policy. 296 * 297 * @since 1.15 298 */ 299 protected static final CodecPolicy DECODING_POLICY_DEFAULT = CodecPolicy.LENIENT; 300 301 /** 302 * Chunk separator per RFC 2045 section 2.1. 303 * 304 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> 305 */ 306 static final byte[] CHUNK_SEPARATOR = {'\r', '\n'}; 307 308 /** 309 * Create a positive capacity at least as large the minimum required capacity. 310 * If the minimum capacity is negative then this throws an OutOfMemoryError as no array 311 * can be allocated. 312 * 313 * @param minCapacity the minimum capacity 314 * @return the capacity 315 * @throws OutOfMemoryError if the {@code minCapacity} is negative 316 */ 317 private static int createPositiveCapacity(final int minCapacity) { 318 if (minCapacity < 0) { 319 // overflow 320 throw new OutOfMemoryError("Unable to allocate array size: " + (minCapacity & 0xffffffffL)); 321 } 322 // This is called when we require buffer expansion to a very big array. 323 // Use the conservative maximum buffer size if possible, otherwise the biggest required. 324 // 325 // Note: In this situation JDK 1.8 java.util.ArrayList returns Integer.MAX_VALUE. 326 // This excludes some VMs that can exceed MAX_BUFFER_SIZE but not allocate a full 327 // Integer.MAX_VALUE length array. 328 // The result is that we may have to allocate an array of this size more than once if 329 // the capacity must be expanded again. 330 return Math.max(minCapacity, MAX_BUFFER_SIZE); 331 } 332 333 /** 334 * Gets a copy of the chunk separator per RFC 2045 section 2.1. 335 * 336 * @return the chunk separator 337 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> 338 * @since 1.15 339 */ 340 public static byte[] getChunkSeparator() { 341 return CHUNK_SEPARATOR.clone(); 342 } 343 344 /** 345 * Checks if a byte value is whitespace or not. 346 * 347 * @param byteToCheck the byte to check 348 * @return true if byte is whitespace, false otherwise 349 * @see Character#isWhitespace(int) 350 * @deprecated Use {@link Character#isWhitespace(int)}. 351 */ 352 @Deprecated 353 protected static boolean isWhiteSpace(final byte byteToCheck) { 354 return Character.isWhitespace(byteToCheck); 355 } 356 357 /** 358 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. 359 * 360 * @param context the context to be used 361 * @param minCapacity the minimum required capacity 362 * @return the resized byte[] buffer 363 * @throws OutOfMemoryError if the {@code minCapacity} is negative 364 */ 365 private static byte[] resizeBuffer(final Context context, final int minCapacity) { 366 // Overflow-conscious code treats the min and new capacity as unsigned. 367 final int oldCapacity = context.buffer.length; 368 int newCapacity = oldCapacity * DEFAULT_BUFFER_RESIZE_FACTOR; 369 if (Integer.compareUnsigned(newCapacity, minCapacity) < 0) { 370 newCapacity = minCapacity; 371 } 372 if (Integer.compareUnsigned(newCapacity, MAX_BUFFER_SIZE) > 0) { 373 newCapacity = createPositiveCapacity(minCapacity); 374 } 375 final byte[] b = Arrays.copyOf(context.buffer, newCapacity); 376 context.buffer = b; 377 return b; 378 } 379 380 /** 381 * Gets the array length or 0 if null. 382 * 383 * @param array the array or null. 384 * @return the array length or 0 if null. 385 */ 386 static int toLength(final byte[] array) { 387 return array == null ? 0 : array.length; 388 } 389 390 /** 391 * Deprecated: Will be removed in 2.0. 392 * <p> 393 * Instance variable just in case it needs to vary later 394 * </p> 395 * 396 * @deprecated Use {@link #pad}. Will be removed in 2.0. 397 */ 398 @Deprecated 399 protected final byte PAD = PAD_DEFAULT; 400 401 /** Pad byte. Instance variable just in case it needs to vary later. */ 402 protected final byte pad; 403 404 /** Number of bytes in each full block of unencoded data, for example 4 for Base64 and 5 for Base32 */ 405 private final int unencodedBlockSize; 406 407 /** Number of bytes in each full block of encoded data, for example 3 for Base64 and 8 for Base32 */ 408 private final int encodedBlockSize; 409 410 /** 411 * Chunksize for encoding. Not used when decoding. 412 * A value of zero or less implies no chunking of the encoded data. 413 * Rounded down to the nearest multiple of encodedBlockSize. 414 */ 415 protected final int lineLength; 416 417 /** 418 * Size of chunk separator. Not used unless {@link #lineLength} > 0. 419 */ 420 private final int chunkSeparatorLength; 421 422 /** 423 * Defines the decoding behavior when the input bytes contain leftover trailing bits that 424 * cannot be created by a valid encoding. These can be bits that are unused from the final 425 * character or entire characters. The default mode is lenient decoding. Set this to 426 * {@code true} to enable strict decoding. 427 * <ul> 428 * <li>Lenient: Any trailing bits are composed into 8-bit bytes where possible. 429 * The remainder are discarded. 430 * <li>Strict: The decoding will raise an {@link IllegalArgumentException} if trailing bits 431 * are not part of a valid encoding. Any unused bits from the final character must 432 * be zero. Impossible counts of entire final characters are not allowed. 433 * </ul> 434 * <p> 435 * When strict decoding is enabled it is expected that the decoded bytes will be re-encoded 436 * to a byte array that matches the original, i.e. no changes occur on the final 437 * character. This requires that the input bytes use the same padding and alphabet 438 * as the encoder. 439 * </p> 440 */ 441 private final CodecPolicy decodingPolicy; 442 443 /** 444 * Constructs a new instance. 445 * <p> 446 * Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size. 447 * If {@code chunkSeparatorLength} is zero, then chunking is disabled. 448 * </p> 449 * 450 * @param unencodedBlockSize the size of an unencoded block (for example Base64 = 3) 451 * @param encodedBlockSize the size of an encoded block (for example Base64 = 4) 452 * @param lineLength if > 0, use chunking with a length {@code lineLength} 453 * @param chunkSeparatorLength the chunk separator length, if relevant 454 */ 455 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, final int lineLength, final int chunkSeparatorLength) { 456 this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT); 457 } 458 459 /** 460 * Constructs a new instance. 461 * <p> 462 * Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size. 463 * If {@code chunkSeparatorLength} is zero, then chunking is disabled. 464 * </p> 465 * 466 * @param unencodedBlockSize the size of an unencoded block (for example Base64 = 3) 467 * @param encodedBlockSize the size of an encoded block (for example Base64 = 4) 468 * @param lineLength if > 0, use chunking with a length {@code lineLength} 469 * @param chunkSeparatorLength the chunk separator length, if relevant 470 * @param pad byte used as padding byte. 471 */ 472 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, final int lineLength, final int chunkSeparatorLength, final byte pad) { 473 this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, pad, DECODING_POLICY_DEFAULT); 474 } 475 476 /** 477 * Constructs a new instance. 478 * <p> 479 * Note {@code lineLength} is rounded down to the nearest multiple of the encoded block size. 480 * If {@code chunkSeparatorLength} is zero, then chunking is disabled. 481 * </p> 482 * 483 * @param unencodedBlockSize the size of an unencoded block (for example Base64 = 3) 484 * @param encodedBlockSize the size of an encoded block (for example Base64 = 4) 485 * @param lineLength if > 0, use chunking with a length {@code lineLength} 486 * @param chunkSeparatorLength the chunk separator length, if relevant 487 * @param pad byte used as padding byte. 488 * @param decodingPolicy Decoding policy. 489 * @since 1.15 490 */ 491 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, final int lineLength, final int chunkSeparatorLength, final byte pad, 492 final CodecPolicy decodingPolicy) { 493 this.unencodedBlockSize = unencodedBlockSize; 494 this.encodedBlockSize = encodedBlockSize; 495 final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0; 496 this.lineLength = useChunking ? lineLength / encodedBlockSize * encodedBlockSize : 0; 497 this.chunkSeparatorLength = chunkSeparatorLength; 498 this.pad = pad; 499 this.decodingPolicy = Objects.requireNonNull(decodingPolicy, "codecPolicy"); 500 } 501 502 /** 503 * Returns the amount of buffered data available for reading. 504 * 505 * @param context the context to be used 506 * @return The amount of buffered data available for reading. 507 */ 508 int available(final Context context) { // package protected for access from I/O streams 509 return hasData(context) ? context.pos - context.readPos : 0; 510 } 511 512 /** 513 * Tests a given byte array to see if it contains any characters within the alphabet or PAD. 514 * 515 * Intended for use in checking line-ending arrays 516 * 517 * @param arrayOctet 518 * byte array to test 519 * @return {@code true} if any byte is a valid character in the alphabet or PAD; {@code false} otherwise 520 */ 521 protected boolean containsAlphabetOrPad(final byte[] arrayOctet) { 522 if (arrayOctet != null) { 523 for (final byte element : arrayOctet) { 524 if (pad == element || isInAlphabet(element)) { 525 return true; 526 } 527 } 528 } 529 return false; 530 } 531 532 /** 533 * Decodes a byte[] containing characters in the Base-N alphabet. 534 * 535 * @param array 536 * A byte array containing Base-N character data 537 * @return a byte array containing binary data 538 */ 539 @Override 540 public byte[] decode(final byte[] array) { 541 if (BinaryCodec.isEmpty(array)) { 542 return array; 543 } 544 final Context context = new Context(); 545 decode(array, 0, array.length, context); 546 decode(array, 0, EOF, context); // Notify decoder of EOF. 547 final byte[] result = new byte[context.pos]; 548 readResults(result, 0, result.length, context); 549 return result; 550 } 551 552 // package protected for access from I/O streams 553 abstract void decode(byte[] array, int i, int length, Context context); 554 555 /** 556 * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of 557 * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String. 558 * 559 * @param obj 560 * Object to decode 561 * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String 562 * supplied. 563 * @throws DecoderException 564 * if the parameter supplied is not of type byte[] 565 */ 566 @Override 567 public Object decode(final Object obj) throws DecoderException { 568 if (obj instanceof byte[]) { 569 return decode((byte[]) obj); 570 } 571 if (obj instanceof String) { 572 return decode((String) obj); 573 } 574 throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String"); 575 } 576 577 /** 578 * Decodes a String containing characters in the Base-N alphabet. 579 * 580 * @param array 581 * A String containing Base-N character data 582 * @return a byte array containing binary data 583 */ 584 public byte[] decode(final String array) { 585 return decode(StringUtils.getBytesUtf8(array)); 586 } 587 588 /** 589 * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet. 590 * 591 * @param array 592 * a byte array containing binary data 593 * @return A byte array containing only the base N alphabetic character data 594 */ 595 @Override 596 public byte[] encode(final byte[] array) { 597 if (BinaryCodec.isEmpty(array)) { 598 return array; 599 } 600 return encode(array, 0, array.length); 601 } 602 603 /** 604 * Encodes a byte[] containing binary data, into a byte[] containing 605 * characters in the alphabet. 606 * 607 * @param array 608 * a byte array containing binary data 609 * @param offset 610 * initial offset of the subarray. 611 * @param length 612 * length of the subarray. 613 * @return A byte array containing only the base N alphabetic character data 614 * @since 1.11 615 */ 616 public byte[] encode(final byte[] array, final int offset, final int length) { 617 if (BinaryCodec.isEmpty(array)) { 618 return array; 619 } 620 final Context context = new Context(); 621 encode(array, offset, length, context); 622 encode(array, offset, EOF, context); // Notify encoder of EOF. 623 final byte[] buf = new byte[context.pos - context.readPos]; 624 readResults(buf, 0, buf.length, context); 625 return buf; 626 } 627 628 // package protected for access from I/O streams 629 abstract void encode(byte[] array, int i, int length, Context context); 630 631 /** 632 * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of 633 * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[]. 634 * 635 * @param obj 636 * Object to encode 637 * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied. 638 * @throws EncoderException 639 * if the parameter supplied is not of type byte[] 640 */ 641 @Override 642 public Object encode(final Object obj) throws EncoderException { 643 if (!(obj instanceof byte[])) { 644 throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]"); 645 } 646 return encode((byte[]) obj); 647 } 648 649 /** 650 * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet. 651 * Uses UTF8 encoding. 652 * <p> 653 * This is a duplicate of {@link #encodeToString(byte[])}; it was merged during refactoring. 654 * </p> 655 * 656 * @param array a byte array containing binary data 657 * @return String containing only character data in the appropriate alphabet. 658 * @since 1.5 659 */ 660 public String encodeAsString(final byte[] array) { 661 return StringUtils.newStringUtf8(encode(array)); 662 } 663 664 /** 665 * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet. 666 * Uses UTF8 encoding. 667 * 668 * @param array 669 * a byte array containing binary data 670 * @return A String containing only Base-N character data 671 */ 672 public String encodeToString(final byte[] array) { 673 return StringUtils.newStringUtf8(encode(array)); 674 } 675 676 /** 677 * Ensures that the buffer has room for {@code size} bytes 678 * 679 * @param size minimum spare space required 680 * @param context the context to be used 681 * @return the buffer 682 */ 683 protected byte[] ensureBufferSize(final int size, final Context context) { 684 if (context.buffer == null) { 685 context.buffer = new byte[Math.max(size, getDefaultBufferSize())]; 686 context.pos = 0; 687 context.readPos = 0; 688 // Overflow-conscious: 689 // x + y > z == x + y - z > 0 690 } else if (context.pos + size - context.buffer.length > 0) { 691 return resizeBuffer(context, context.pos + size); 692 } 693 return context.buffer; 694 } 695 696 /** 697 * Gets the decoding behavior policy. 698 * 699 * <p> 700 * The default is lenient. If the decoding policy is strict, then decoding will raise an 701 * {@link IllegalArgumentException} if trailing bits are not part of a valid encoding. Decoding will compose 702 * trailing bits into 8-bit bytes and discard the remainder. 703 * </p> 704 * 705 * @return true if using strict decoding 706 * @since 1.15 707 */ 708 public CodecPolicy getCodecPolicy() { 709 return decodingPolicy; 710 } 711 712 /** 713 * Gets the default buffer size. Can be overridden. 714 * 715 * @return the default buffer size. 716 */ 717 protected int getDefaultBufferSize() { 718 return DEFAULT_BUFFER_SIZE; 719 } 720 721 /** 722 * Gets the amount of space needed to encode the supplied array. 723 * 724 * @param array byte[] array which will later be encoded 725 * @return amount of space needed to encode the supplied array. 726 * Returns a long since a max-len array will require > Integer.MAX_VALUE 727 */ 728 public long getEncodedLength(final byte[] array) { 729 // Calculate non-chunked size - rounded up to allow for padding 730 // cast to long is needed to avoid possibility of overflow 731 long len = (array.length + unencodedBlockSize - 1) / unencodedBlockSize * (long) encodedBlockSize; 732 if (lineLength > 0) { // We're using chunking 733 // Round up to nearest multiple 734 len += (len + lineLength - 1) / lineLength * chunkSeparatorLength; 735 } 736 return len; 737 } 738 739 /** 740 * Tests whether this object has buffered data for reading. 741 * 742 * @param context the context to be used 743 * @return true if there is data still available for reading. 744 */ 745 boolean hasData(final Context context) { // package protected for access from I/O streams 746 return context.pos > context.readPos; 747 } 748 749 /** 750 * Tests whether or not the {@code octet} is in the current alphabet. 751 * Does not allow whitespace or pad. 752 * 753 * @param value The value to test 754 * @return {@code true} if the value is defined in the current alphabet, {@code false} otherwise. 755 */ 756 protected abstract boolean isInAlphabet(byte value); 757 758 /** 759 * Tests a given byte array to see if it contains only valid characters within the alphabet. 760 * The method optionally treats whitespace and pad as valid. 761 * 762 * @param arrayOctet byte array to test 763 * @param allowWSPad if {@code true}, then whitespace and PAD are also allowed 764 * @return {@code true} if all bytes are valid characters in the alphabet or if the byte array is empty; 765 * {@code false}, otherwise 766 */ 767 public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) { 768 for (final byte octet : arrayOctet) { 769 if (!isInAlphabet(octet) && (!allowWSPad || octet != pad && !Character.isWhitespace(octet))) { 770 return false; 771 } 772 } 773 return true; 774 } 775 776 /** 777 * Tests a given String to see if it contains only valid characters within the alphabet. 778 * The method treats whitespace and PAD as valid. 779 * 780 * @param basen String to test 781 * @return {@code true} if all characters in the String are valid characters in the alphabet or if 782 * the String is empty; {@code false}, otherwise 783 * @see #isInAlphabet(byte[], boolean) 784 */ 785 public boolean isInAlphabet(final String basen) { 786 return isInAlphabet(StringUtils.getBytesUtf8(basen), true); 787 } 788 789 /** 790 * Tests true if decoding behavior is strict. Decoding will raise an {@link IllegalArgumentException} if trailing 791 * bits are not part of a valid encoding. 792 * 793 * <p> 794 * The default is false for lenient decoding. Decoding will compose trailing bits into 8-bit bytes and discard the 795 * remainder. 796 * </p> 797 * 798 * @return true if using strict decoding 799 * @since 1.15 800 */ 801 public boolean isStrictDecoding() { 802 return decodingPolicy == CodecPolicy.STRICT; 803 } 804 805 /** 806 * Reads buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail 807 * bytes. Returns how many bytes were actually extracted. 808 * <p> 809 * Package private for access from I/O streams. 810 * </p> 811 * 812 * @param b 813 * byte[] array to extract the buffered data into. 814 * @param bPos 815 * position in byte[] array to start extraction at. 816 * @param bAvail 817 * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). 818 * @param context 819 * the context to be used 820 * @return The number of bytes successfully extracted into the provided byte[] array. 821 */ 822 int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) { 823 if (hasData(context)) { 824 final int len = Math.min(available(context), bAvail); 825 System.arraycopy(context.buffer, context.readPos, b, bPos, len); 826 context.readPos += len; 827 if (!hasData(context)) { 828 // All data read. 829 // Reset position markers but do not set buffer to null to allow its reuse. 830 // hasData(context) will still return false, and this method will return 0 until 831 // more data is available, or -1 if EOF. 832 context.pos = context.readPos = 0; 833 } 834 return len; 835 } 836 return context.eof ? EOF : 0; 837 } 838 }