001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.codec.binary; 019 020 import org.apache.commons.codec.BinaryDecoder; 021 import org.apache.commons.codec.BinaryEncoder; 022 import org.apache.commons.codec.DecoderException; 023 import org.apache.commons.codec.EncoderException; 024 025 /** 026 * Abstract superclass for Base-N encoders and decoders. 027 * 028 * <p> 029 * This class is thread-safe. 030 * </p> 031 * 032 * @version $Id: BaseNCodec.html 889935 2013-12-11 05:05:13Z ggregory $ 033 */ 034 public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder { 035 036 /** 037 * Holds thread context so classes can be thread-safe. 038 * 039 * This class is not itself thread-safe; each thread must allocate its own copy. 040 * 041 * @since 1.7 042 */ 043 static class Context { 044 045 /** 046 * Place holder for the bytes we're dealing with for our based logic. 047 * Bitwise operations store and extract the encoding or decoding from this variable. 048 */ 049 int ibitWorkArea; 050 051 /** 052 * Place holder for the bytes we're dealing with for our based logic. 053 * Bitwise operations store and extract the encoding or decoding from this variable. 054 */ 055 long lbitWorkArea; 056 057 /** 058 * Buffer for streaming. 059 */ 060 byte[] buffer; 061 062 /** 063 * Position where next character should be written in the buffer. 064 */ 065 int pos; 066 067 /** 068 * Position where next character should be read from the buffer. 069 */ 070 int readPos; 071 072 /** 073 * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless, 074 * and must be thrown away. 075 */ 076 boolean eof; 077 078 /** 079 * Variable tracks how many characters have been written to the current line. Only used when encoding. We use 080 * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0). 081 */ 082 int currentLinePos; 083 084 /** 085 * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This 086 * variable helps track that. 087 */ 088 int modulus; 089 090 Context() { 091 } 092 093 /** 094 * Returns a String useful for debugging (especially within a debugger.) 095 * 096 * @return a String useful for debugging. 097 */ 098 @SuppressWarnings("boxing") // OK to ignore boxing here 099 @Override 100 public String toString() { 101 return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " + 102 "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), buffer, currentLinePos, eof, 103 ibitWorkArea, lbitWorkArea, modulus, pos, readPos); 104 } 105 } 106 107 /** 108 * EOF 109 * 110 * @since 1.7 111 */ 112 static final int EOF = -1; 113 114 /** 115 * MIME chunk size per RFC 2045 section 6.8. 116 * 117 * <p> 118 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 119 * equal signs. 120 * </p> 121 * 122 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> 123 */ 124 public static final int MIME_CHUNK_SIZE = 76; 125 126 /** 127 * PEM chunk size per RFC 1421 section 4.3.2.4. 128 * 129 * <p> 130 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 131 * equal signs. 132 * </p> 133 * 134 * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a> 135 */ 136 public static final int PEM_CHUNK_SIZE = 64; 137 138 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; 139 140 /** 141 * Defines the default buffer size - currently {@value} 142 * - must be large enough for at least one encoded block+separator 143 */ 144 private static final int DEFAULT_BUFFER_SIZE = 8192; 145 146 /** Mask used to extract 8 bits, used in decoding bytes */ 147 protected static final int MASK_8BITS = 0xff; 148 149 /** 150 * Byte used to pad output. 151 */ 152 protected static final byte PAD_DEFAULT = '='; // Allow static access to default 153 154 protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later 155 156 /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */ 157 private final int unencodedBlockSize; 158 159 /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */ 160 private final int encodedBlockSize; 161 162 /** 163 * Chunksize for encoding. Not used when decoding. 164 * A value of zero or less implies no chunking of the encoded data. 165 * Rounded down to nearest multiple of encodedBlockSize. 166 */ 167 protected final int lineLength; 168 169 /** 170 * Size of chunk separator. Not used unless {@link #lineLength} > 0. 171 */ 172 private final int chunkSeparatorLength; 173 174 /** 175 * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize} 176 * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled. 177 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) 178 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) 179 * @param lineLength if > 0, use chunking with a length <code>lineLength</code> 180 * @param chunkSeparatorLength the chunk separator length, if relevant 181 */ 182 protected BaseNCodec(int unencodedBlockSize, int encodedBlockSize, int lineLength, int chunkSeparatorLength) { 183 this.unencodedBlockSize = unencodedBlockSize; 184 this.encodedBlockSize = encodedBlockSize; 185 final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0; 186 this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0; 187 this.chunkSeparatorLength = chunkSeparatorLength; 188 } 189 190 /** 191 * Returns true if this object has buffered data for reading. 192 * 193 * @param context the context to be used 194 * @return true if there is data still available for reading. 195 */ 196 boolean hasData(Context context) { // package protected for access from I/O streams 197 return context.buffer != null; 198 } 199 200 /** 201 * Returns the amount of buffered data available for reading. 202 * 203 * @param context the context to be used 204 * @return The amount of buffered data available for reading. 205 */ 206 int available(Context context) { // package protected for access from I/O streams 207 return context.buffer != null ? context.pos - context.readPos : 0; 208 } 209 210 /** 211 * Get the default buffer size. Can be overridden. 212 * 213 * @return {@link #DEFAULT_BUFFER_SIZE} 214 */ 215 protected int getDefaultBufferSize() { 216 return DEFAULT_BUFFER_SIZE; 217 } 218 219 /** 220 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. 221 * @param context the context to be used 222 */ 223 private byte[] resizeBuffer(Context context) { 224 if (context.buffer == null) { 225 context.buffer = new byte[getDefaultBufferSize()]; 226 context.pos = 0; 227 context.readPos = 0; 228 } else { 229 byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR]; 230 System.arraycopy(context.buffer, 0, b, 0, context.buffer.length); 231 context.buffer = b; 232 } 233 return context.buffer; 234 } 235 236 /** 237 * Ensure that the buffer has room for <code>size</code> bytes 238 * 239 * @param size minimum spare space required 240 * @param context the context to be used 241 */ 242 protected byte[] ensureBufferSize(int size, Context context){ 243 if ((context.buffer == null) || (context.buffer.length < context.pos + size)){ 244 return resizeBuffer(context); 245 } 246 return context.buffer; 247 } 248 249 /** 250 * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail 251 * bytes. Returns how many bytes were actually extracted. 252 * <p> 253 * Package protected for access from I/O streams. 254 * 255 * @param b 256 * byte[] array to extract the buffered data into. 257 * @param bPos 258 * position in byte[] array to start extraction at. 259 * @param bAvail 260 * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). 261 * @param context 262 * the context to be used 263 * @return The number of bytes successfully extracted into the provided byte[] array. 264 */ 265 int readResults(byte[] b, int bPos, int bAvail, Context context) { 266 if (context.buffer != null) { 267 int len = Math.min(available(context), bAvail); 268 System.arraycopy(context.buffer, context.readPos, b, bPos, len); 269 context.readPos += len; 270 if (context.readPos >= context.pos) { 271 context.buffer = null; // so hasData() will return false, and this method can return -1 272 } 273 return len; 274 } 275 return context.eof ? EOF : 0; 276 } 277 278 /** 279 * Checks if a byte value is whitespace or not. 280 * Whitespace is taken to mean: space, tab, CR, LF 281 * @param byteToCheck 282 * the byte to check 283 * @return true if byte is whitespace, false otherwise 284 */ 285 protected static boolean isWhiteSpace(byte byteToCheck) { 286 switch (byteToCheck) { 287 case ' ' : 288 case '\n' : 289 case '\r' : 290 case '\t' : 291 return true; 292 default : 293 return false; 294 } 295 } 296 297 /** 298 * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of 299 * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[]. 300 * 301 * @param obj 302 * Object to encode 303 * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied. 304 * @throws EncoderException 305 * if the parameter supplied is not of type byte[] 306 */ 307 @Override 308 public Object encode(Object obj) throws EncoderException { 309 if (!(obj instanceof byte[])) { 310 throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]"); 311 } 312 return encode((byte[]) obj); 313 } 314 315 /** 316 * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet. 317 * Uses UTF8 encoding. 318 * 319 * @param pArray 320 * a byte array containing binary data 321 * @return A String containing only Base-N character data 322 */ 323 public String encodeToString(byte[] pArray) { 324 return StringUtils.newStringUtf8(encode(pArray)); 325 } 326 327 /** 328 * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet. 329 * Uses UTF8 encoding. 330 * 331 * @param pArray a byte array containing binary data 332 * @return String containing only character data in the appropriate alphabet. 333 */ 334 public String encodeAsString(byte[] pArray){ 335 return StringUtils.newStringUtf8(encode(pArray)); 336 } 337 338 /** 339 * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of 340 * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String. 341 * 342 * @param obj 343 * Object to decode 344 * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String 345 * supplied. 346 * @throws DecoderException 347 * if the parameter supplied is not of type byte[] 348 */ 349 @Override 350 public Object decode(Object obj) throws DecoderException { 351 if (obj instanceof byte[]) { 352 return decode((byte[]) obj); 353 } else if (obj instanceof String) { 354 return decode((String) obj); 355 } else { 356 throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String"); 357 } 358 } 359 360 /** 361 * Decodes a String containing characters in the Base-N alphabet. 362 * 363 * @param pArray 364 * A String containing Base-N character data 365 * @return a byte array containing binary data 366 */ 367 public byte[] decode(String pArray) { 368 return decode(StringUtils.getBytesUtf8(pArray)); 369 } 370 371 /** 372 * Decodes a byte[] containing characters in the Base-N alphabet. 373 * 374 * @param pArray 375 * A byte array containing Base-N character data 376 * @return a byte array containing binary data 377 */ 378 @Override 379 public byte[] decode(byte[] pArray) { 380 if (pArray == null || pArray.length == 0) { 381 return pArray; 382 } 383 Context context = new Context(); 384 decode(pArray, 0, pArray.length, context); 385 decode(pArray, 0, EOF, context); // Notify decoder of EOF. 386 byte[] result = new byte[context.pos]; 387 readResults(result, 0, result.length, context); 388 return result; 389 } 390 391 /** 392 * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet. 393 * 394 * @param pArray 395 * a byte array containing binary data 396 * @return A byte array containing only the basen alphabetic character data 397 */ 398 @Override 399 public byte[] encode(byte[] pArray) { 400 if (pArray == null || pArray.length == 0) { 401 return pArray; 402 } 403 Context context = new Context(); 404 encode(pArray, 0, pArray.length, context); 405 encode(pArray, 0, EOF, context); // Notify encoder of EOF. 406 byte[] buf = new byte[context.pos - context.readPos]; 407 readResults(buf, 0, buf.length, context); 408 return buf; 409 } 410 411 // package protected for access from I/O streams 412 abstract void encode(byte[] pArray, int i, int length, Context context); 413 414 // package protected for access from I/O streams 415 abstract void decode(byte[] pArray, int i, int length, Context context); 416 417 /** 418 * Returns whether or not the <code>octet</code> is in the current alphabet. 419 * Does not allow whitespace or pad. 420 * 421 * @param value The value to test 422 * 423 * @return {@code true} if the value is defined in the current alphabet, {@code false} otherwise. 424 */ 425 protected abstract boolean isInAlphabet(byte value); 426 427 /** 428 * Tests a given byte array to see if it contains only valid characters within the alphabet. 429 * The method optionally treats whitespace and pad as valid. 430 * 431 * @param arrayOctet byte array to test 432 * @param allowWSPad if {@code true}, then whitespace and PAD are also allowed 433 * 434 * @return {@code true} if all bytes are valid characters in the alphabet or if the byte array is empty; 435 * {@code false}, otherwise 436 */ 437 public boolean isInAlphabet(byte[] arrayOctet, boolean allowWSPad) { 438 for (int i = 0; i < arrayOctet.length; i++) { 439 if (!isInAlphabet(arrayOctet[i]) && 440 (!allowWSPad || (arrayOctet[i] != PAD) && !isWhiteSpace(arrayOctet[i]))) { 441 return false; 442 } 443 } 444 return true; 445 } 446 447 /** 448 * Tests a given String to see if it contains only valid characters within the alphabet. 449 * The method treats whitespace and PAD as valid. 450 * 451 * @param basen String to test 452 * @return {@code true} if all characters in the String are valid characters in the alphabet or if 453 * the String is empty; {@code false}, otherwise 454 * @see #isInAlphabet(byte[], boolean) 455 */ 456 public boolean isInAlphabet(String basen) { 457 return isInAlphabet(StringUtils.getBytesUtf8(basen), true); 458 } 459 460 /** 461 * Tests a given byte array to see if it contains any characters within the alphabet or PAD. 462 * 463 * Intended for use in checking line-ending arrays 464 * 465 * @param arrayOctet 466 * byte array to test 467 * @return {@code true} if any byte is a valid character in the alphabet or PAD; {@code false} otherwise 468 */ 469 protected boolean containsAlphabetOrPad(byte[] arrayOctet) { 470 if (arrayOctet == null) { 471 return false; 472 } 473 for (byte element : arrayOctet) { 474 if (PAD == element || isInAlphabet(element)) { 475 return true; 476 } 477 } 478 return false; 479 } 480 481 /** 482 * Calculates the amount of space needed to encode the supplied array. 483 * 484 * @param pArray byte[] array which will later be encoded 485 * 486 * @return amount of space needed to encoded the supplied array. 487 * Returns a long since a max-len array will require > Integer.MAX_VALUE 488 */ 489 public long getEncodedLength(byte[] pArray) { 490 // Calculate non-chunked size - rounded up to allow for padding 491 // cast to long is needed to avoid possibility of overflow 492 long len = ((pArray.length + unencodedBlockSize-1) / unencodedBlockSize) * (long) encodedBlockSize; 493 if (lineLength > 0) { // We're using chunking 494 // Round up to nearest multiple 495 len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength; 496 } 497 return len; 498 } 499 }