001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.util.Arrays; 021 022import org.apache.commons.codec.BinaryDecoder; 023import org.apache.commons.codec.BinaryEncoder; 024import org.apache.commons.codec.DecoderException; 025import org.apache.commons.codec.EncoderException; 026 027/** 028 * Abstract superclass for Base-N encoders and decoders. 029 * 030 * <p> 031 * This class is thread-safe. 032 * </p> 033 * 034 * @version $Id: BaseNCodec.html 928559 2014-11-10 02:53:54Z ggregory $ 035 */ 036public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder { 037 038 /** 039 * Holds thread context so classes can be thread-safe. 040 * 041 * This class is not itself thread-safe; each thread must allocate its own copy. 042 * 043 * @since 1.7 044 */ 045 static class Context { 046 047 /** 048 * Place holder for the bytes we're dealing with for our based logic. 049 * Bitwise operations store and extract the encoding or decoding from this variable. 050 */ 051 int ibitWorkArea; 052 053 /** 054 * Place holder for the bytes we're dealing with for our based logic. 055 * Bitwise operations store and extract the encoding or decoding from this variable. 056 */ 057 long lbitWorkArea; 058 059 /** 060 * Buffer for streaming. 061 */ 062 byte[] buffer; 063 064 /** 065 * Position where next character should be written in the buffer. 066 */ 067 int pos; 068 069 /** 070 * Position where next character should be read from the buffer. 071 */ 072 int readPos; 073 074 /** 075 * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless, 076 * and must be thrown away. 077 */ 078 boolean eof; 079 080 /** 081 * Variable tracks how many characters have been written to the current line. Only used when encoding. We use 082 * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0). 083 */ 084 int currentLinePos; 085 086 /** 087 * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This 088 * variable helps track that. 089 */ 090 int modulus; 091 092 Context() { 093 } 094 095 /** 096 * Returns a String useful for debugging (especially within a debugger.) 097 * 098 * @return a String useful for debugging. 099 */ 100 @SuppressWarnings("boxing") // OK to ignore boxing here 101 @Override 102 public String toString() { 103 return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " + 104 "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer), 105 currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos); 106 } 107 } 108 109 /** 110 * EOF 111 * 112 * @since 1.7 113 */ 114 static final int EOF = -1; 115 116 /** 117 * MIME chunk size per RFC 2045 section 6.8. 118 * 119 * <p> 120 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 121 * equal signs. 122 * </p> 123 * 124 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> 125 */ 126 public static final int MIME_CHUNK_SIZE = 76; 127 128 /** 129 * PEM chunk size per RFC 1421 section 4.3.2.4. 130 * 131 * <p> 132 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 133 * equal signs. 134 * </p> 135 * 136 * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a> 137 */ 138 public static final int PEM_CHUNK_SIZE = 64; 139 140 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; 141 142 /** 143 * Defines the default buffer size - currently {@value} 144 * - must be large enough for at least one encoded block+separator 145 */ 146 private static final int DEFAULT_BUFFER_SIZE = 8192; 147 148 /** Mask used to extract 8 bits, used in decoding bytes */ 149 protected static final int MASK_8BITS = 0xff; 150 151 /** 152 * Byte used to pad output. 153 */ 154 protected static final byte PAD_DEFAULT = '='; // Allow static access to default 155 156 /** 157 * @deprecated Use {@link #pad}. Will be removed in 2.0. 158 */ 159 @Deprecated 160 protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later 161 162 protected final byte pad; // instance variable just in case it needs to vary later 163 164 /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */ 165 private final int unencodedBlockSize; 166 167 /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */ 168 private final int encodedBlockSize; 169 170 /** 171 * Chunksize for encoding. Not used when decoding. 172 * A value of zero or less implies no chunking of the encoded data. 173 * Rounded down to nearest multiple of encodedBlockSize. 174 */ 175 protected final int lineLength; 176 177 /** 178 * Size of chunk separator. Not used unless {@link #lineLength} > 0. 179 */ 180 private final int chunkSeparatorLength; 181 182 /** 183 * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize} 184 * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled. 185 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) 186 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) 187 * @param lineLength if > 0, use chunking with a length <code>lineLength</code> 188 * @param chunkSeparatorLength the chunk separator length, if relevant 189 */ 190 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, 191 final int lineLength, final int chunkSeparatorLength) { 192 this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT); 193 } 194 195 /** 196 * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize} 197 * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled. 198 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) 199 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) 200 * @param lineLength if > 0, use chunking with a length <code>lineLength</code> 201 * @param chunkSeparatorLength the chunk separator length, if relevant 202 * @param pad byte used as padding byte. 203 */ 204 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, 205 final int lineLength, final int chunkSeparatorLength, final byte pad) { 206 this.unencodedBlockSize = unencodedBlockSize; 207 this.encodedBlockSize = encodedBlockSize; 208 final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0; 209 this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0; 210 this.chunkSeparatorLength = chunkSeparatorLength; 211 212 this.pad = pad; 213 } 214 215 /** 216 * Returns true if this object has buffered data for reading. 217 * 218 * @param context the context to be used 219 * @return true if there is data still available for reading. 220 */ 221 boolean hasData(final Context context) { // package protected for access from I/O streams 222 return context.buffer != null; 223 } 224 225 /** 226 * Returns the amount of buffered data available for reading. 227 * 228 * @param context the context to be used 229 * @return The amount of buffered data available for reading. 230 */ 231 int available(final Context context) { // package protected for access from I/O streams 232 return context.buffer != null ? context.pos - context.readPos : 0; 233 } 234 235 /** 236 * Get the default buffer size. Can be overridden. 237 * 238 * @return {@link #DEFAULT_BUFFER_SIZE} 239 */ 240 protected int getDefaultBufferSize() { 241 return DEFAULT_BUFFER_SIZE; 242 } 243 244 /** 245 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. 246 * @param context the context to be used 247 */ 248 private byte[] resizeBuffer(final Context context) { 249 if (context.buffer == null) { 250 context.buffer = new byte[getDefaultBufferSize()]; 251 context.pos = 0; 252 context.readPos = 0; 253 } else { 254 final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR]; 255 System.arraycopy(context.buffer, 0, b, 0, context.buffer.length); 256 context.buffer = b; 257 } 258 return context.buffer; 259 } 260 261 /** 262 * Ensure that the buffer has room for <code>size</code> bytes 263 * 264 * @param size minimum spare space required 265 * @param context the context to be used 266 * @return the buffer 267 */ 268 protected byte[] ensureBufferSize(final int size, final Context context){ 269 if ((context.buffer == null) || (context.buffer.length < context.pos + size)){ 270 return resizeBuffer(context); 271 } 272 return context.buffer; 273 } 274 275 /** 276 * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail 277 * bytes. Returns how many bytes were actually extracted. 278 * <p> 279 * Package protected for access from I/O streams. 280 * 281 * @param b 282 * byte[] array to extract the buffered data into. 283 * @param bPos 284 * position in byte[] array to start extraction at. 285 * @param bAvail 286 * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). 287 * @param context 288 * the context to be used 289 * @return The number of bytes successfully extracted into the provided byte[] array. 290 */ 291 int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) { 292 if (context.buffer != null) { 293 final int len = Math.min(available(context), bAvail); 294 System.arraycopy(context.buffer, context.readPos, b, bPos, len); 295 context.readPos += len; 296 if (context.readPos >= context.pos) { 297 context.buffer = null; // so hasData() will return false, and this method can return -1 298 } 299 return len; 300 } 301 return context.eof ? EOF : 0; 302 } 303 304 /** 305 * Checks if a byte value is whitespace or not. 306 * Whitespace is taken to mean: space, tab, CR, LF 307 * @param byteToCheck 308 * the byte to check 309 * @return true if byte is whitespace, false otherwise 310 */ 311 protected static boolean isWhiteSpace(final byte byteToCheck) { 312 switch (byteToCheck) { 313 case ' ' : 314 case '\n' : 315 case '\r' : 316 case '\t' : 317 return true; 318 default : 319 return false; 320 } 321 } 322 323 /** 324 * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of 325 * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[]. 326 * 327 * @param obj 328 * Object to encode 329 * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied. 330 * @throws EncoderException 331 * if the parameter supplied is not of type byte[] 332 */ 333 @Override 334 public Object encode(final Object obj) throws EncoderException { 335 if (!(obj instanceof byte[])) { 336 throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]"); 337 } 338 return encode((byte[]) obj); 339 } 340 341 /** 342 * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet. 343 * Uses UTF8 encoding. 344 * 345 * @param pArray 346 * a byte array containing binary data 347 * @return A String containing only Base-N character data 348 */ 349 public String encodeToString(final byte[] pArray) { 350 return StringUtils.newStringUtf8(encode(pArray)); 351 } 352 353 /** 354 * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet. 355 * Uses UTF8 encoding. 356 * 357 * @param pArray a byte array containing binary data 358 * @return String containing only character data in the appropriate alphabet. 359 */ 360 public String encodeAsString(final byte[] pArray){ 361 return StringUtils.newStringUtf8(encode(pArray)); 362 } 363 364 /** 365 * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of 366 * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String. 367 * 368 * @param obj 369 * Object to decode 370 * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String 371 * supplied. 372 * @throws DecoderException 373 * if the parameter supplied is not of type byte[] 374 */ 375 @Override 376 public Object decode(final Object obj) throws DecoderException { 377 if (obj instanceof byte[]) { 378 return decode((byte[]) obj); 379 } else if (obj instanceof String) { 380 return decode((String) obj); 381 } else { 382 throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String"); 383 } 384 } 385 386 /** 387 * Decodes a String containing characters in the Base-N alphabet. 388 * 389 * @param pArray 390 * A String containing Base-N character data 391 * @return a byte array containing binary data 392 */ 393 public byte[] decode(final String pArray) { 394 return decode(StringUtils.getBytesUtf8(pArray)); 395 } 396 397 /** 398 * Decodes a byte[] containing characters in the Base-N alphabet. 399 * 400 * @param pArray 401 * A byte array containing Base-N character data 402 * @return a byte array containing binary data 403 */ 404 @Override 405 public byte[] decode(final byte[] pArray) { 406 if (pArray == null || pArray.length == 0) { 407 return pArray; 408 } 409 final Context context = new Context(); 410 decode(pArray, 0, pArray.length, context); 411 decode(pArray, 0, EOF, context); // Notify decoder of EOF. 412 final byte[] result = new byte[context.pos]; 413 readResults(result, 0, result.length, context); 414 return result; 415 } 416 417 /** 418 * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet. 419 * 420 * @param pArray 421 * a byte array containing binary data 422 * @return A byte array containing only the basen alphabetic character data 423 */ 424 @Override 425 public byte[] encode(final byte[] pArray) { 426 if (pArray == null || pArray.length == 0) { 427 return pArray; 428 } 429 final Context context = new Context(); 430 encode(pArray, 0, pArray.length, context); 431 encode(pArray, 0, EOF, context); // Notify encoder of EOF. 432 final byte[] buf = new byte[context.pos - context.readPos]; 433 readResults(buf, 0, buf.length, context); 434 return buf; 435 } 436 437 // package protected for access from I/O streams 438 abstract void encode(byte[] pArray, int i, int length, Context context); 439 440 // package protected for access from I/O streams 441 abstract void decode(byte[] pArray, int i, int length, Context context); 442 443 /** 444 * Returns whether or not the <code>octet</code> is in the current alphabet. 445 * Does not allow whitespace or pad. 446 * 447 * @param value The value to test 448 * 449 * @return <code>true</code> if the value is defined in the current alphabet, <code>false</code> otherwise. 450 */ 451 protected abstract boolean isInAlphabet(byte value); 452 453 /** 454 * Tests a given byte array to see if it contains only valid characters within the alphabet. 455 * The method optionally treats whitespace and pad as valid. 456 * 457 * @param arrayOctet byte array to test 458 * @param allowWSPad if <code>true</code>, then whitespace and PAD are also allowed 459 * 460 * @return <code>true</code> if all bytes are valid characters in the alphabet or if the byte array is empty; 461 * <code>false</code>, otherwise 462 */ 463 public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) { 464 for (int i = 0; i < arrayOctet.length; i++) { 465 if (!isInAlphabet(arrayOctet[i]) && 466 (!allowWSPad || (arrayOctet[i] != pad) && !isWhiteSpace(arrayOctet[i]))) { 467 return false; 468 } 469 } 470 return true; 471 } 472 473 /** 474 * Tests a given String to see if it contains only valid characters within the alphabet. 475 * The method treats whitespace and PAD as valid. 476 * 477 * @param basen String to test 478 * @return <code>true</code> if all characters in the String are valid characters in the alphabet or if 479 * the String is empty; <code>false</code>, otherwise 480 * @see #isInAlphabet(byte[], boolean) 481 */ 482 public boolean isInAlphabet(final String basen) { 483 return isInAlphabet(StringUtils.getBytesUtf8(basen), true); 484 } 485 486 /** 487 * Tests a given byte array to see if it contains any characters within the alphabet or PAD. 488 * 489 * Intended for use in checking line-ending arrays 490 * 491 * @param arrayOctet 492 * byte array to test 493 * @return <code>true</code> if any byte is a valid character in the alphabet or PAD; <code>false</code> otherwise 494 */ 495 protected boolean containsAlphabetOrPad(final byte[] arrayOctet) { 496 if (arrayOctet == null) { 497 return false; 498 } 499 for (final byte element : arrayOctet) { 500 if (pad == element || isInAlphabet(element)) { 501 return true; 502 } 503 } 504 return false; 505 } 506 507 /** 508 * Calculates the amount of space needed to encode the supplied array. 509 * 510 * @param pArray byte[] array which will later be encoded 511 * 512 * @return amount of space needed to encoded the supplied array. 513 * Returns a long since a max-len array will require > Integer.MAX_VALUE 514 */ 515 public long getEncodedLength(final byte[] pArray) { 516 // Calculate non-chunked size - rounded up to allow for padding 517 // cast to long is needed to avoid possibility of overflow 518 long len = ((pArray.length + unencodedBlockSize-1) / unencodedBlockSize) * (long) encodedBlockSize; 519 if (lineLength > 0) { // We're using chunking 520 // Round up to nearest multiple 521 len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength; 522 } 523 return len; 524 } 525}