001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.util.Arrays; 021 022import org.apache.commons.codec.BinaryDecoder; 023import org.apache.commons.codec.BinaryEncoder; 024import org.apache.commons.codec.DecoderException; 025import org.apache.commons.codec.EncoderException; 026 027/** 028 * Abstract superclass for Base-N encoders and decoders. 029 * 030 * <p> 031 * This class is thread-safe. 032 * </p> 033 * 034 */ 035public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder { 036 037 /** 038 * Holds thread context so classes can be thread-safe. 039 * 040 * This class is not itself thread-safe; each thread must allocate its own copy. 041 * 042 * @since 1.7 043 */ 044 static class Context { 045 046 /** 047 * Place holder for the bytes we're dealing with for our based logic. 048 * Bitwise operations store and extract the encoding or decoding from this variable. 049 */ 050 int ibitWorkArea; 051 052 /** 053 * Place holder for the bytes we're dealing with for our based logic. 054 * Bitwise operations store and extract the encoding or decoding from this variable. 055 */ 056 long lbitWorkArea; 057 058 /** 059 * Buffer for streaming. 060 */ 061 byte[] buffer; 062 063 /** 064 * Position where next character should be written in the buffer. 065 */ 066 int pos; 067 068 /** 069 * Position where next character should be read from the buffer. 070 */ 071 int readPos; 072 073 /** 074 * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless, 075 * and must be thrown away. 076 */ 077 boolean eof; 078 079 /** 080 * Variable tracks how many characters have been written to the current line. Only used when encoding. We use 081 * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0). 082 */ 083 int currentLinePos; 084 085 /** 086 * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This 087 * variable helps track that. 088 */ 089 int modulus; 090 091 Context() { 092 } 093 094 /** 095 * Returns a String useful for debugging (especially within a debugger.) 096 * 097 * @return a String useful for debugging. 098 */ 099 @SuppressWarnings("boxing") // OK to ignore boxing here 100 @Override 101 public String toString() { 102 return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " + 103 "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer), 104 currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos); 105 } 106 } 107 108 /** 109 * EOF 110 * 111 * @since 1.7 112 */ 113 static final int EOF = -1; 114 115 /** 116 * MIME chunk size per RFC 2045 section 6.8. 117 * 118 * <p> 119 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 120 * equal signs. 121 * </p> 122 * 123 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> 124 */ 125 public static final int MIME_CHUNK_SIZE = 76; 126 127 /** 128 * PEM chunk size per RFC 1421 section 4.3.2.4. 129 * 130 * <p> 131 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 132 * equal signs. 133 * </p> 134 * 135 * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a> 136 */ 137 public static final int PEM_CHUNK_SIZE = 64; 138 139 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; 140 141 /** 142 * Defines the default buffer size - currently {@value} 143 * - must be large enough for at least one encoded block+separator 144 */ 145 private static final int DEFAULT_BUFFER_SIZE = 8192; 146 147 /** Mask used to extract 8 bits, used in decoding bytes */ 148 protected static final int MASK_8BITS = 0xff; 149 150 /** 151 * Byte used to pad output. 152 */ 153 protected static final byte PAD_DEFAULT = '='; // Allow static access to default 154 155 /** 156 * @deprecated Use {@link #pad}. Will be removed in 2.0. 157 */ 158 @Deprecated 159 protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later 160 161 protected final byte pad; // instance variable just in case it needs to vary later 162 163 /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */ 164 private final int unencodedBlockSize; 165 166 /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */ 167 private final int encodedBlockSize; 168 169 /** 170 * Chunksize for encoding. Not used when decoding. 171 * A value of zero or less implies no chunking of the encoded data. 172 * Rounded down to nearest multiple of encodedBlockSize. 173 */ 174 protected final int lineLength; 175 176 /** 177 * Size of chunk separator. Not used unless {@link #lineLength} > 0. 178 */ 179 private final int chunkSeparatorLength; 180 181 /** 182 * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize} 183 * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled. 184 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) 185 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) 186 * @param lineLength if > 0, use chunking with a length <code>lineLength</code> 187 * @param chunkSeparatorLength the chunk separator length, if relevant 188 */ 189 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, 190 final int lineLength, final int chunkSeparatorLength) { 191 this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT); 192 } 193 194 /** 195 * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize} 196 * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled. 197 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) 198 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) 199 * @param lineLength if > 0, use chunking with a length <code>lineLength</code> 200 * @param chunkSeparatorLength the chunk separator length, if relevant 201 * @param pad byte used as padding byte. 202 */ 203 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, 204 final int lineLength, final int chunkSeparatorLength, final byte pad) { 205 this.unencodedBlockSize = unencodedBlockSize; 206 this.encodedBlockSize = encodedBlockSize; 207 final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0; 208 this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0; 209 this.chunkSeparatorLength = chunkSeparatorLength; 210 211 this.pad = pad; 212 } 213 214 /** 215 * Returns true if this object has buffered data for reading. 216 * 217 * @param context the context to be used 218 * @return true if there is data still available for reading. 219 */ 220 boolean hasData(final Context context) { // package protected for access from I/O streams 221 return context.buffer != null; 222 } 223 224 /** 225 * Returns the amount of buffered data available for reading. 226 * 227 * @param context the context to be used 228 * @return The amount of buffered data available for reading. 229 */ 230 int available(final Context context) { // package protected for access from I/O streams 231 return context.buffer != null ? context.pos - context.readPos : 0; 232 } 233 234 /** 235 * Get the default buffer size. Can be overridden. 236 * 237 * @return {@link #DEFAULT_BUFFER_SIZE} 238 */ 239 protected int getDefaultBufferSize() { 240 return DEFAULT_BUFFER_SIZE; 241 } 242 243 /** 244 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. 245 * @param context the context to be used 246 */ 247 private byte[] resizeBuffer(final Context context) { 248 if (context.buffer == null) { 249 context.buffer = new byte[getDefaultBufferSize()]; 250 context.pos = 0; 251 context.readPos = 0; 252 } else { 253 final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR]; 254 System.arraycopy(context.buffer, 0, b, 0, context.buffer.length); 255 context.buffer = b; 256 } 257 return context.buffer; 258 } 259 260 /** 261 * Ensure that the buffer has room for <code>size</code> bytes 262 * 263 * @param size minimum spare space required 264 * @param context the context to be used 265 * @return the buffer 266 */ 267 protected byte[] ensureBufferSize(final int size, final Context context){ 268 if ((context.buffer == null) || (context.buffer.length < context.pos + size)){ 269 return resizeBuffer(context); 270 } 271 return context.buffer; 272 } 273 274 /** 275 * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail 276 * bytes. Returns how many bytes were actually extracted. 277 * <p> 278 * Package protected for access from I/O streams. 279 * 280 * @param b 281 * byte[] array to extract the buffered data into. 282 * @param bPos 283 * position in byte[] array to start extraction at. 284 * @param bAvail 285 * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). 286 * @param context 287 * the context to be used 288 * @return The number of bytes successfully extracted into the provided byte[] array. 289 */ 290 int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) { 291 if (context.buffer != null) { 292 final int len = Math.min(available(context), bAvail); 293 System.arraycopy(context.buffer, context.readPos, b, bPos, len); 294 context.readPos += len; 295 if (context.readPos >= context.pos) { 296 context.buffer = null; // so hasData() will return false, and this method can return -1 297 } 298 return len; 299 } 300 return context.eof ? EOF : 0; 301 } 302 303 /** 304 * Checks if a byte value is whitespace or not. 305 * Whitespace is taken to mean: space, tab, CR, LF 306 * @param byteToCheck 307 * the byte to check 308 * @return true if byte is whitespace, false otherwise 309 */ 310 protected static boolean isWhiteSpace(final byte byteToCheck) { 311 switch (byteToCheck) { 312 case ' ' : 313 case '\n' : 314 case '\r' : 315 case '\t' : 316 return true; 317 default : 318 return false; 319 } 320 } 321 322 /** 323 * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of 324 * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[]. 325 * 326 * @param obj 327 * Object to encode 328 * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied. 329 * @throws EncoderException 330 * if the parameter supplied is not of type byte[] 331 */ 332 @Override 333 public Object encode(final Object obj) throws EncoderException { 334 if (!(obj instanceof byte[])) { 335 throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]"); 336 } 337 return encode((byte[]) obj); 338 } 339 340 /** 341 * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet. 342 * Uses UTF8 encoding. 343 * 344 * @param pArray 345 * a byte array containing binary data 346 * @return A String containing only Base-N character data 347 */ 348 public String encodeToString(final byte[] pArray) { 349 return StringUtils.newStringUtf8(encode(pArray)); 350 } 351 352 /** 353 * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet. 354 * Uses UTF8 encoding. 355 * 356 * @param pArray a byte array containing binary data 357 * @return String containing only character data in the appropriate alphabet. 358 * @since 1.5 359 * This is a duplicate of {@link #encodeToString(byte[])}; it was merged during refactoring. 360 */ 361 public String encodeAsString(final byte[] pArray){ 362 return StringUtils.newStringUtf8(encode(pArray)); 363 } 364 365 /** 366 * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of 367 * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String. 368 * 369 * @param obj 370 * Object to decode 371 * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String 372 * supplied. 373 * @throws DecoderException 374 * if the parameter supplied is not of type byte[] 375 */ 376 @Override 377 public Object decode(final Object obj) throws DecoderException { 378 if (obj instanceof byte[]) { 379 return decode((byte[]) obj); 380 } else if (obj instanceof String) { 381 return decode((String) obj); 382 } else { 383 throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String"); 384 } 385 } 386 387 /** 388 * Decodes a String containing characters in the Base-N alphabet. 389 * 390 * @param pArray 391 * A String containing Base-N character data 392 * @return a byte array containing binary data 393 */ 394 public byte[] decode(final String pArray) { 395 return decode(StringUtils.getBytesUtf8(pArray)); 396 } 397 398 /** 399 * Decodes a byte[] containing characters in the Base-N alphabet. 400 * 401 * @param pArray 402 * A byte array containing Base-N character data 403 * @return a byte array containing binary data 404 */ 405 @Override 406 public byte[] decode(final byte[] pArray) { 407 if (pArray == null || pArray.length == 0) { 408 return pArray; 409 } 410 final Context context = new Context(); 411 decode(pArray, 0, pArray.length, context); 412 decode(pArray, 0, EOF, context); // Notify decoder of EOF. 413 final byte[] result = new byte[context.pos]; 414 readResults(result, 0, result.length, context); 415 return result; 416 } 417 418 /** 419 * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet. 420 * 421 * @param pArray 422 * a byte array containing binary data 423 * @return A byte array containing only the base N alphabetic character data 424 */ 425 @Override 426 public byte[] encode(final byte[] pArray) { 427 if (pArray == null || pArray.length == 0) { 428 return pArray; 429 } 430 return encode(pArray, 0, pArray.length); 431 } 432 433 /** 434 * Encodes a byte[] containing binary data, into a byte[] containing 435 * characters in the alphabet. 436 * 437 * @param pArray 438 * a byte array containing binary data 439 * @param offset 440 * initial offset of the subarray. 441 * @param length 442 * length of the subarray. 443 * @return A byte array containing only the base N alphabetic character data 444 * @since 1.11 445 */ 446 public byte[] encode(final byte[] pArray, final int offset, final int length) { 447 if (pArray == null || pArray.length == 0) { 448 return pArray; 449 } 450 final Context context = new Context(); 451 encode(pArray, offset, length, context); 452 encode(pArray, offset, EOF, context); // Notify encoder of EOF. 453 final byte[] buf = new byte[context.pos - context.readPos]; 454 readResults(buf, 0, buf.length, context); 455 return buf; 456 } 457 458 // package protected for access from I/O streams 459 abstract void encode(byte[] pArray, int i, int length, Context context); 460 461 // package protected for access from I/O streams 462 abstract void decode(byte[] pArray, int i, int length, Context context); 463 464 /** 465 * Returns whether or not the <code>octet</code> is in the current alphabet. 466 * Does not allow whitespace or pad. 467 * 468 * @param value The value to test 469 * 470 * @return <code>true</code> if the value is defined in the current alphabet, <code>false</code> otherwise. 471 */ 472 protected abstract boolean isInAlphabet(byte value); 473 474 /** 475 * Tests a given byte array to see if it contains only valid characters within the alphabet. 476 * The method optionally treats whitespace and pad as valid. 477 * 478 * @param arrayOctet byte array to test 479 * @param allowWSPad if <code>true</code>, then whitespace and PAD are also allowed 480 * 481 * @return <code>true</code> if all bytes are valid characters in the alphabet or if the byte array is empty; 482 * <code>false</code>, otherwise 483 */ 484 public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) { 485 for (final byte octet : arrayOctet) { 486 if (!isInAlphabet(octet) && 487 (!allowWSPad || (octet != pad) && !isWhiteSpace(octet))) { 488 return false; 489 } 490 } 491 return true; 492 } 493 494 /** 495 * Tests a given String to see if it contains only valid characters within the alphabet. 496 * The method treats whitespace and PAD as valid. 497 * 498 * @param basen String to test 499 * @return <code>true</code> if all characters in the String are valid characters in the alphabet or if 500 * the String is empty; <code>false</code>, otherwise 501 * @see #isInAlphabet(byte[], boolean) 502 */ 503 public boolean isInAlphabet(final String basen) { 504 return isInAlphabet(StringUtils.getBytesUtf8(basen), true); 505 } 506 507 /** 508 * Tests a given byte array to see if it contains any characters within the alphabet or PAD. 509 * 510 * Intended for use in checking line-ending arrays 511 * 512 * @param arrayOctet 513 * byte array to test 514 * @return <code>true</code> if any byte is a valid character in the alphabet or PAD; <code>false</code> otherwise 515 */ 516 protected boolean containsAlphabetOrPad(final byte[] arrayOctet) { 517 if (arrayOctet == null) { 518 return false; 519 } 520 for (final byte element : arrayOctet) { 521 if (pad == element || isInAlphabet(element)) { 522 return true; 523 } 524 } 525 return false; 526 } 527 528 /** 529 * Calculates the amount of space needed to encode the supplied array. 530 * 531 * @param pArray byte[] array which will later be encoded 532 * 533 * @return amount of space needed to encoded the supplied array. 534 * Returns a long since a max-len array will require > Integer.MAX_VALUE 535 */ 536 public long getEncodedLength(final byte[] pArray) { 537 // Calculate non-chunked size - rounded up to allow for padding 538 // cast to long is needed to avoid possibility of overflow 539 long len = ((pArray.length + unencodedBlockSize-1) / unencodedBlockSize) * (long) encodedBlockSize; 540 if (lineLength > 0) { // We're using chunking 541 // Round up to nearest multiple 542 len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength; 543 } 544 return len; 545 } 546}