1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.binary;
19
20 import java.util.Arrays;
21
22 import org.apache.commons.codec.BinaryDecoder;
23 import org.apache.commons.codec.BinaryEncoder;
24 import org.apache.commons.codec.DecoderException;
25 import org.apache.commons.codec.EncoderException;
26
27 /**
28 * Abstract superclass for Base-N encoders and decoders.
29 *
30 * <p>
31 * This class is thread-safe.
32 * </p>
33 *
34 * @version $Id: BaseNCodec.java 1811344 2017-10-06 15:19:57Z ggregory $
35 */
36 public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
37
38 /**
39 * Holds thread context so classes can be thread-safe.
40 *
41 * This class is not itself thread-safe; each thread must allocate its own copy.
42 *
43 * @since 1.7
44 */
45 static class Context {
46
47 /**
48 * Place holder for the bytes we're dealing with for our based logic.
49 * Bitwise operations store and extract the encoding or decoding from this variable.
50 */
51 int ibitWorkArea;
52
53 /**
54 * Place holder for the bytes we're dealing with for our based logic.
55 * Bitwise operations store and extract the encoding or decoding from this variable.
56 */
57 long lbitWorkArea;
58
59 /**
60 * Buffer for streaming.
61 */
62 byte[] buffer;
63
64 /**
65 * Position where next character should be written in the buffer.
66 */
67 int pos;
68
69 /**
70 * Position where next character should be read from the buffer.
71 */
72 int readPos;
73
74 /**
75 * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless,
76 * and must be thrown away.
77 */
78 boolean eof;
79
80 /**
81 * Variable tracks how many characters have been written to the current line. Only used when encoding. We use
82 * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0).
83 */
84 int currentLinePos;
85
86 /**
87 * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This
88 * variable helps track that.
89 */
90 int modulus;
91
92 Context() {
93 }
94
95 /**
96 * Returns a String useful for debugging (especially within a debugger.)
97 *
98 * @return a String useful for debugging.
99 */
100 @SuppressWarnings("boxing") // OK to ignore boxing here
101 @Override
102 public String toString() {
103 return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " +
104 "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer),
105 currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos);
106 }
107 }
108
109 /**
110 * EOF
111 *
112 * @since 1.7
113 */
114 static final int EOF = -1;
115
116 /**
117 * MIME chunk size per RFC 2045 section 6.8.
118 *
119 * <p>
120 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
121 * equal signs.
122 * </p>
123 *
124 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
125 */
126 public static final int MIME_CHUNK_SIZE = 76;
127
128 /**
129 * PEM chunk size per RFC 1421 section 4.3.2.4.
130 *
131 * <p>
132 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
133 * equal signs.
134 * </p>
135 *
136 * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a>
137 */
138 public static final int PEM_CHUNK_SIZE = 64;
139
140 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
141
142 /**
143 * Defines the default buffer size - currently {@value}
144 * - must be large enough for at least one encoded block+separator
145 */
146 private static final int DEFAULT_BUFFER_SIZE = 8192;
147
148 /** Mask used to extract 8 bits, used in decoding bytes */
149 protected static final int MASK_8BITS = 0xff;
150
151 /**
152 * Byte used to pad output.
153 */
154 protected static final byte PAD_DEFAULT = '='; // Allow static access to default
155
156 /**
157 * @deprecated Use {@link #pad}. Will be removed in 2.0.
158 */
159 @Deprecated
160 protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later
161
162 protected final byte pad; // instance variable just in case it needs to vary later
163
164 /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */
165 private final int unencodedBlockSize;
166
167 /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */
168 private final int encodedBlockSize;
169
170 /**
171 * Chunksize for encoding. Not used when decoding.
172 * A value of zero or less implies no chunking of the encoded data.
173 * Rounded down to nearest multiple of encodedBlockSize.
174 */
175 protected final int lineLength;
176
177 /**
178 * Size of chunk separator. Not used unless {@link #lineLength} > 0.
179 */
180 private final int chunkSeparatorLength;
181
182 /**
183 * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
184 * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
185 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
186 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
187 * @param lineLength if > 0, use chunking with a length <code>lineLength</code>
188 * @param chunkSeparatorLength the chunk separator length, if relevant
189 */
190 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
191 final int lineLength, final int chunkSeparatorLength) {
192 this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT);
193 }
194
195 /**
196 * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
197 * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
198 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
199 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
200 * @param lineLength if > 0, use chunking with a length <code>lineLength</code>
201 * @param chunkSeparatorLength the chunk separator length, if relevant
202 * @param pad byte used as padding byte.
203 */
204 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
205 final int lineLength, final int chunkSeparatorLength, final byte pad) {
206 this.unencodedBlockSize = unencodedBlockSize;
207 this.encodedBlockSize = encodedBlockSize;
208 final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0;
209 this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
210 this.chunkSeparatorLength = chunkSeparatorLength;
211
212 this.pad = pad;
213 }
214
215 /**
216 * Returns true if this object has buffered data for reading.
217 *
218 * @param context the context to be used
219 * @return true if there is data still available for reading.
220 */
221 boolean hasData(final Context context) { // package protected for access from I/O streams
222 return context.buffer != null;
223 }
224
225 /**
226 * Returns the amount of buffered data available for reading.
227 *
228 * @param context the context to be used
229 * @return The amount of buffered data available for reading.
230 */
231 int available(final Context context) { // package protected for access from I/O streams
232 return context.buffer != null ? context.pos - context.readPos : 0;
233 }
234
235 /**
236 * Get the default buffer size. Can be overridden.
237 *
238 * @return {@link #DEFAULT_BUFFER_SIZE}
239 */
240 protected int getDefaultBufferSize() {
241 return DEFAULT_BUFFER_SIZE;
242 }
243
244 /**
245 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
246 * @param context the context to be used
247 */
248 private byte[] resizeBuffer(final Context context) {
249 if (context.buffer == null) {
250 context.buffer = new byte[getDefaultBufferSize()];
251 context.pos = 0;
252 context.readPos = 0;
253 } else {
254 final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
255 System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
256 context.buffer = b;
257 }
258 return context.buffer;
259 }
260
261 /**
262 * Ensure that the buffer has room for <code>size</code> bytes
263 *
264 * @param size minimum spare space required
265 * @param context the context to be used
266 * @return the buffer
267 */
268 protected byte[] ensureBufferSize(final int size, final Context context){
269 if ((context.buffer == null) || (context.buffer.length < context.pos + size)){
270 return resizeBuffer(context);
271 }
272 return context.buffer;
273 }
274
275 /**
276 * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail
277 * bytes. Returns how many bytes were actually extracted.
278 * <p>
279 * Package protected for access from I/O streams.
280 *
281 * @param b
282 * byte[] array to extract the buffered data into.
283 * @param bPos
284 * position in byte[] array to start extraction at.
285 * @param bAvail
286 * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
287 * @param context
288 * the context to be used
289 * @return The number of bytes successfully extracted into the provided byte[] array.
290 */
291 int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) {
292 if (context.buffer != null) {
293 final int len = Math.min(available(context), bAvail);
294 System.arraycopy(context.buffer, context.readPos, b, bPos, len);
295 context.readPos += len;
296 if (context.readPos >= context.pos) {
297 context.buffer = null; // so hasData() will return false, and this method can return -1
298 }
299 return len;
300 }
301 return context.eof ? EOF : 0;
302 }
303
304 /**
305 * Checks if a byte value is whitespace or not.
306 * Whitespace is taken to mean: space, tab, CR, LF
307 * @param byteToCheck
308 * the byte to check
309 * @return true if byte is whitespace, false otherwise
310 */
311 protected static boolean isWhiteSpace(final byte byteToCheck) {
312 switch (byteToCheck) {
313 case ' ' :
314 case '\n' :
315 case '\r' :
316 case '\t' :
317 return true;
318 default :
319 return false;
320 }
321 }
322
323 /**
324 * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
325 * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
326 *
327 * @param obj
328 * Object to encode
329 * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied.
330 * @throws EncoderException
331 * if the parameter supplied is not of type byte[]
332 */
333 @Override
334 public Object encode(final Object obj) throws EncoderException {
335 if (!(obj instanceof byte[])) {
336 throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]");
337 }
338 return encode((byte[]) obj);
339 }
340
341 /**
342 * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet.
343 * Uses UTF8 encoding.
344 *
345 * @param pArray
346 * a byte array containing binary data
347 * @return A String containing only Base-N character data
348 */
349 public String encodeToString(final byte[] pArray) {
350 return StringUtils.newStringUtf8(encode(pArray));
351 }
352
353 /**
354 * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet.
355 * Uses UTF8 encoding.
356 *
357 * @param pArray a byte array containing binary data
358 * @return String containing only character data in the appropriate alphabet.
359 * @since 1.5
360 * This is a duplicate of {@link #encodeToString(byte[])}; it was merged during refactoring.
361 */
362 public String encodeAsString(final byte[] pArray){
363 return StringUtils.newStringUtf8(encode(pArray));
364 }
365
366 /**
367 * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
368 * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
369 *
370 * @param obj
371 * Object to decode
372 * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String
373 * supplied.
374 * @throws DecoderException
375 * if the parameter supplied is not of type byte[]
376 */
377 @Override
378 public Object decode(final Object obj) throws DecoderException {
379 if (obj instanceof byte[]) {
380 return decode((byte[]) obj);
381 } else if (obj instanceof String) {
382 return decode((String) obj);
383 } else {
384 throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String");
385 }
386 }
387
388 /**
389 * Decodes a String containing characters in the Base-N alphabet.
390 *
391 * @param pArray
392 * A String containing Base-N character data
393 * @return a byte array containing binary data
394 */
395 public byte[] decode(final String pArray) {
396 return decode(StringUtils.getBytesUtf8(pArray));
397 }
398
399 /**
400 * Decodes a byte[] containing characters in the Base-N alphabet.
401 *
402 * @param pArray
403 * A byte array containing Base-N character data
404 * @return a byte array containing binary data
405 */
406 @Override
407 public byte[] decode(final byte[] pArray) {
408 if (pArray == null || pArray.length == 0) {
409 return pArray;
410 }
411 final Context context = new Context();
412 decode(pArray, 0, pArray.length, context);
413 decode(pArray, 0, EOF, context); // Notify decoder of EOF.
414 final byte[] result = new byte[context.pos];
415 readResults(result, 0, result.length, context);
416 return result;
417 }
418
419 /**
420 * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet.
421 *
422 * @param pArray
423 * a byte array containing binary data
424 * @return A byte array containing only the base N alphabetic character data
425 */
426 @Override
427 public byte[] encode(final byte[] pArray) {
428 if (pArray == null || pArray.length == 0) {
429 return pArray;
430 }
431 return encode(pArray, 0, pArray.length);
432 }
433
434 /**
435 * Encodes a byte[] containing binary data, into a byte[] containing
436 * characters in the alphabet.
437 *
438 * @param pArray
439 * a byte array containing binary data
440 * @param offset
441 * initial offset of the subarray.
442 * @param length
443 * length of the subarray.
444 * @return A byte array containing only the base N alphabetic character data
445 * @since 1.11
446 */
447 public byte[] encode(final byte[] pArray, final int offset, final int length) {
448 if (pArray == null || pArray.length == 0) {
449 return pArray;
450 }
451 final Context context = new Context();
452 encode(pArray, offset, length, context);
453 encode(pArray, offset, EOF, context); // Notify encoder of EOF.
454 final byte[] buf = new byte[context.pos - context.readPos];
455 readResults(buf, 0, buf.length, context);
456 return buf;
457 }
458
459 // package protected for access from I/O streams
460 abstract void encode(byte[] pArray, int i, int length, Context context);
461
462 // package protected for access from I/O streams
463 abstract void decode(byte[] pArray, int i, int length, Context context);
464
465 /**
466 * Returns whether or not the <code>octet</code> is in the current alphabet.
467 * Does not allow whitespace or pad.
468 *
469 * @param value The value to test
470 *
471 * @return <code>true</code> if the value is defined in the current alphabet, <code>false</code> otherwise.
472 */
473 protected abstract boolean isInAlphabet(byte value);
474
475 /**
476 * Tests a given byte array to see if it contains only valid characters within the alphabet.
477 * The method optionally treats whitespace and pad as valid.
478 *
479 * @param arrayOctet byte array to test
480 * @param allowWSPad if <code>true</code>, then whitespace and PAD are also allowed
481 *
482 * @return <code>true</code> if all bytes are valid characters in the alphabet or if the byte array is empty;
483 * <code>false</code>, otherwise
484 */
485 public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) {
486 for (final byte octet : arrayOctet) {
487 if (!isInAlphabet(octet) &&
488 (!allowWSPad || (octet != pad) && !isWhiteSpace(octet))) {
489 return false;
490 }
491 }
492 return true;
493 }
494
495 /**
496 * Tests a given String to see if it contains only valid characters within the alphabet.
497 * The method treats whitespace and PAD as valid.
498 *
499 * @param basen String to test
500 * @return <code>true</code> if all characters in the String are valid characters in the alphabet or if
501 * the String is empty; <code>false</code>, otherwise
502 * @see #isInAlphabet(byte[], boolean)
503 */
504 public boolean isInAlphabet(final String basen) {
505 return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
506 }
507
508 /**
509 * Tests a given byte array to see if it contains any characters within the alphabet or PAD.
510 *
511 * Intended for use in checking line-ending arrays
512 *
513 * @param arrayOctet
514 * byte array to test
515 * @return <code>true</code> if any byte is a valid character in the alphabet or PAD; <code>false</code> otherwise
516 */
517 protected boolean containsAlphabetOrPad(final byte[] arrayOctet) {
518 if (arrayOctet == null) {
519 return false;
520 }
521 for (final byte element : arrayOctet) {
522 if (pad == element || isInAlphabet(element)) {
523 return true;
524 }
525 }
526 return false;
527 }
528
529 /**
530 * Calculates the amount of space needed to encode the supplied array.
531 *
532 * @param pArray byte[] array which will later be encoded
533 *
534 * @return amount of space needed to encoded the supplied array.
535 * Returns a long since a max-len array will require > Integer.MAX_VALUE
536 */
537 public long getEncodedLength(final byte[] pArray) {
538 // Calculate non-chunked size - rounded up to allow for padding
539 // cast to long is needed to avoid possibility of overflow
540 long len = ((pArray.length + unencodedBlockSize-1) / unencodedBlockSize) * (long) encodedBlockSize;
541 if (lineLength > 0) { // We're using chunking
542 // Round up to nearest multiple
543 len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength;
544 }
545 return len;
546 }
547 }