001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.binary;
019
020 import org.apache.commons.codec.BinaryDecoder;
021 import org.apache.commons.codec.BinaryEncoder;
022 import org.apache.commons.codec.DecoderException;
023 import org.apache.commons.codec.EncoderException;
024
025 /**
026 * Abstract superclass for Base-N encoders and decoders.
027 *
028 * <p>
029 * This class is thread-safe.
030 * </p>
031 *
032 * @version $Id: BaseNCodec.html 889935 2013-12-11 05:05:13Z ggregory $
033 */
034 public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
035
036 /**
037 * Holds thread context so classes can be thread-safe.
038 *
039 * This class is not itself thread-safe; each thread must allocate its own copy.
040 *
041 * @since 1.7
042 */
043 static class Context {
044
045 /**
046 * Place holder for the bytes we're dealing with for our based logic.
047 * Bitwise operations store and extract the encoding or decoding from this variable.
048 */
049 int ibitWorkArea;
050
051 /**
052 * Place holder for the bytes we're dealing with for our based logic.
053 * Bitwise operations store and extract the encoding or decoding from this variable.
054 */
055 long lbitWorkArea;
056
057 /**
058 * Buffer for streaming.
059 */
060 byte[] buffer;
061
062 /**
063 * Position where next character should be written in the buffer.
064 */
065 int pos;
066
067 /**
068 * Position where next character should be read from the buffer.
069 */
070 int readPos;
071
072 /**
073 * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless,
074 * and must be thrown away.
075 */
076 boolean eof;
077
078 /**
079 * Variable tracks how many characters have been written to the current line. Only used when encoding. We use
080 * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0).
081 */
082 int currentLinePos;
083
084 /**
085 * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This
086 * variable helps track that.
087 */
088 int modulus;
089
090 Context() {
091 }
092
093 /**
094 * Returns a String useful for debugging (especially within a debugger.)
095 *
096 * @return a String useful for debugging.
097 */
098 @SuppressWarnings("boxing") // OK to ignore boxing here
099 @Override
100 public String toString() {
101 return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " +
102 "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), buffer, currentLinePos, eof,
103 ibitWorkArea, lbitWorkArea, modulus, pos, readPos);
104 }
105 }
106
107 /**
108 * EOF
109 *
110 * @since 1.7
111 */
112 static final int EOF = -1;
113
114 /**
115 * MIME chunk size per RFC 2045 section 6.8.
116 *
117 * <p>
118 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
119 * equal signs.
120 * </p>
121 *
122 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
123 */
124 public static final int MIME_CHUNK_SIZE = 76;
125
126 /**
127 * PEM chunk size per RFC 1421 section 4.3.2.4.
128 *
129 * <p>
130 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
131 * equal signs.
132 * </p>
133 *
134 * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a>
135 */
136 public static final int PEM_CHUNK_SIZE = 64;
137
138 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
139
140 /**
141 * Defines the default buffer size - currently {@value}
142 * - must be large enough for at least one encoded block+separator
143 */
144 private static final int DEFAULT_BUFFER_SIZE = 8192;
145
146 /** Mask used to extract 8 bits, used in decoding bytes */
147 protected static final int MASK_8BITS = 0xff;
148
149 /**
150 * Byte used to pad output.
151 */
152 protected static final byte PAD_DEFAULT = '='; // Allow static access to default
153
154 protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later
155
156 /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */
157 private final int unencodedBlockSize;
158
159 /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */
160 private final int encodedBlockSize;
161
162 /**
163 * Chunksize for encoding. Not used when decoding.
164 * A value of zero or less implies no chunking of the encoded data.
165 * Rounded down to nearest multiple of encodedBlockSize.
166 */
167 protected final int lineLength;
168
169 /**
170 * Size of chunk separator. Not used unless {@link #lineLength} > 0.
171 */
172 private final int chunkSeparatorLength;
173
174 /**
175 * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
176 * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
177 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
178 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
179 * @param lineLength if > 0, use chunking with a length <code>lineLength</code>
180 * @param chunkSeparatorLength the chunk separator length, if relevant
181 */
182 protected BaseNCodec(int unencodedBlockSize, int encodedBlockSize, int lineLength, int chunkSeparatorLength) {
183 this.unencodedBlockSize = unencodedBlockSize;
184 this.encodedBlockSize = encodedBlockSize;
185 final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0;
186 this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
187 this.chunkSeparatorLength = chunkSeparatorLength;
188 }
189
190 /**
191 * Returns true if this object has buffered data for reading.
192 *
193 * @param context the context to be used
194 * @return true if there is data still available for reading.
195 */
196 boolean hasData(Context context) { // package protected for access from I/O streams
197 return context.buffer != null;
198 }
199
200 /**
201 * Returns the amount of buffered data available for reading.
202 *
203 * @param context the context to be used
204 * @return The amount of buffered data available for reading.
205 */
206 int available(Context context) { // package protected for access from I/O streams
207 return context.buffer != null ? context.pos - context.readPos : 0;
208 }
209
210 /**
211 * Get the default buffer size. Can be overridden.
212 *
213 * @return {@link #DEFAULT_BUFFER_SIZE}
214 */
215 protected int getDefaultBufferSize() {
216 return DEFAULT_BUFFER_SIZE;
217 }
218
219 /**
220 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
221 * @param context the context to be used
222 */
223 private byte[] resizeBuffer(Context context) {
224 if (context.buffer == null) {
225 context.buffer = new byte[getDefaultBufferSize()];
226 context.pos = 0;
227 context.readPos = 0;
228 } else {
229 byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
230 System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
231 context.buffer = b;
232 }
233 return context.buffer;
234 }
235
236 /**
237 * Ensure that the buffer has room for <code>size</code> bytes
238 *
239 * @param size minimum spare space required
240 * @param context the context to be used
241 */
242 protected byte[] ensureBufferSize(int size, Context context){
243 if ((context.buffer == null) || (context.buffer.length < context.pos + size)){
244 return resizeBuffer(context);
245 }
246 return context.buffer;
247 }
248
249 /**
250 * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail
251 * bytes. Returns how many bytes were actually extracted.
252 * <p>
253 * Package protected for access from I/O streams.
254 *
255 * @param b
256 * byte[] array to extract the buffered data into.
257 * @param bPos
258 * position in byte[] array to start extraction at.
259 * @param bAvail
260 * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
261 * @param context
262 * the context to be used
263 * @return The number of bytes successfully extracted into the provided byte[] array.
264 */
265 int readResults(byte[] b, int bPos, int bAvail, Context context) {
266 if (context.buffer != null) {
267 int len = Math.min(available(context), bAvail);
268 System.arraycopy(context.buffer, context.readPos, b, bPos, len);
269 context.readPos += len;
270 if (context.readPos >= context.pos) {
271 context.buffer = null; // so hasData() will return false, and this method can return -1
272 }
273 return len;
274 }
275 return context.eof ? EOF : 0;
276 }
277
278 /**
279 * Checks if a byte value is whitespace or not.
280 * Whitespace is taken to mean: space, tab, CR, LF
281 * @param byteToCheck
282 * the byte to check
283 * @return true if byte is whitespace, false otherwise
284 */
285 protected static boolean isWhiteSpace(byte byteToCheck) {
286 switch (byteToCheck) {
287 case ' ' :
288 case '\n' :
289 case '\r' :
290 case '\t' :
291 return true;
292 default :
293 return false;
294 }
295 }
296
297 /**
298 * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
299 * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
300 *
301 * @param obj
302 * Object to encode
303 * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied.
304 * @throws EncoderException
305 * if the parameter supplied is not of type byte[]
306 */
307 @Override
308 public Object encode(Object obj) throws EncoderException {
309 if (!(obj instanceof byte[])) {
310 throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]");
311 }
312 return encode((byte[]) obj);
313 }
314
315 /**
316 * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet.
317 * Uses UTF8 encoding.
318 *
319 * @param pArray
320 * a byte array containing binary data
321 * @return A String containing only Base-N character data
322 */
323 public String encodeToString(byte[] pArray) {
324 return StringUtils.newStringUtf8(encode(pArray));
325 }
326
327 /**
328 * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet.
329 * Uses UTF8 encoding.
330 *
331 * @param pArray a byte array containing binary data
332 * @return String containing only character data in the appropriate alphabet.
333 */
334 public String encodeAsString(byte[] pArray){
335 return StringUtils.newStringUtf8(encode(pArray));
336 }
337
338 /**
339 * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
340 * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
341 *
342 * @param obj
343 * Object to decode
344 * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String
345 * supplied.
346 * @throws DecoderException
347 * if the parameter supplied is not of type byte[]
348 */
349 @Override
350 public Object decode(Object obj) throws DecoderException {
351 if (obj instanceof byte[]) {
352 return decode((byte[]) obj);
353 } else if (obj instanceof String) {
354 return decode((String) obj);
355 } else {
356 throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String");
357 }
358 }
359
360 /**
361 * Decodes a String containing characters in the Base-N alphabet.
362 *
363 * @param pArray
364 * A String containing Base-N character data
365 * @return a byte array containing binary data
366 */
367 public byte[] decode(String pArray) {
368 return decode(StringUtils.getBytesUtf8(pArray));
369 }
370
371 /**
372 * Decodes a byte[] containing characters in the Base-N alphabet.
373 *
374 * @param pArray
375 * A byte array containing Base-N character data
376 * @return a byte array containing binary data
377 */
378 @Override
379 public byte[] decode(byte[] pArray) {
380 if (pArray == null || pArray.length == 0) {
381 return pArray;
382 }
383 Context context = new Context();
384 decode(pArray, 0, pArray.length, context);
385 decode(pArray, 0, EOF, context); // Notify decoder of EOF.
386 byte[] result = new byte[context.pos];
387 readResults(result, 0, result.length, context);
388 return result;
389 }
390
391 /**
392 * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet.
393 *
394 * @param pArray
395 * a byte array containing binary data
396 * @return A byte array containing only the basen alphabetic character data
397 */
398 @Override
399 public byte[] encode(byte[] pArray) {
400 if (pArray == null || pArray.length == 0) {
401 return pArray;
402 }
403 Context context = new Context();
404 encode(pArray, 0, pArray.length, context);
405 encode(pArray, 0, EOF, context); // Notify encoder of EOF.
406 byte[] buf = new byte[context.pos - context.readPos];
407 readResults(buf, 0, buf.length, context);
408 return buf;
409 }
410
411 // package protected for access from I/O streams
412 abstract void encode(byte[] pArray, int i, int length, Context context);
413
414 // package protected for access from I/O streams
415 abstract void decode(byte[] pArray, int i, int length, Context context);
416
417 /**
418 * Returns whether or not the <code>octet</code> is in the current alphabet.
419 * Does not allow whitespace or pad.
420 *
421 * @param value The value to test
422 *
423 * @return {@code true} if the value is defined in the current alphabet, {@code false} otherwise.
424 */
425 protected abstract boolean isInAlphabet(byte value);
426
427 /**
428 * Tests a given byte array to see if it contains only valid characters within the alphabet.
429 * The method optionally treats whitespace and pad as valid.
430 *
431 * @param arrayOctet byte array to test
432 * @param allowWSPad if {@code true}, then whitespace and PAD are also allowed
433 *
434 * @return {@code true} if all bytes are valid characters in the alphabet or if the byte array is empty;
435 * {@code false}, otherwise
436 */
437 public boolean isInAlphabet(byte[] arrayOctet, boolean allowWSPad) {
438 for (int i = 0; i < arrayOctet.length; i++) {
439 if (!isInAlphabet(arrayOctet[i]) &&
440 (!allowWSPad || (arrayOctet[i] != PAD) && !isWhiteSpace(arrayOctet[i]))) {
441 return false;
442 }
443 }
444 return true;
445 }
446
447 /**
448 * Tests a given String to see if it contains only valid characters within the alphabet.
449 * The method treats whitespace and PAD as valid.
450 *
451 * @param basen String to test
452 * @return {@code true} if all characters in the String are valid characters in the alphabet or if
453 * the String is empty; {@code false}, otherwise
454 * @see #isInAlphabet(byte[], boolean)
455 */
456 public boolean isInAlphabet(String basen) {
457 return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
458 }
459
460 /**
461 * Tests a given byte array to see if it contains any characters within the alphabet or PAD.
462 *
463 * Intended for use in checking line-ending arrays
464 *
465 * @param arrayOctet
466 * byte array to test
467 * @return {@code true} if any byte is a valid character in the alphabet or PAD; {@code false} otherwise
468 */
469 protected boolean containsAlphabetOrPad(byte[] arrayOctet) {
470 if (arrayOctet == null) {
471 return false;
472 }
473 for (byte element : arrayOctet) {
474 if (PAD == element || isInAlphabet(element)) {
475 return true;
476 }
477 }
478 return false;
479 }
480
481 /**
482 * Calculates the amount of space needed to encode the supplied array.
483 *
484 * @param pArray byte[] array which will later be encoded
485 *
486 * @return amount of space needed to encoded the supplied array.
487 * Returns a long since a max-len array will require > Integer.MAX_VALUE
488 */
489 public long getEncodedLength(byte[] pArray) {
490 // Calculate non-chunked size - rounded up to allow for padding
491 // cast to long is needed to avoid possibility of overflow
492 long len = ((pArray.length + unencodedBlockSize-1) / unencodedBlockSize) * (long) encodedBlockSize;
493 if (lineLength > 0) { // We're using chunking
494 // Round up to nearest multiple
495 len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength;
496 }
497 return len;
498 }
499 }