View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import java.util.Arrays;
21  
22  import org.apache.commons.codec.BinaryDecoder;
23  import org.apache.commons.codec.BinaryEncoder;
24  import org.apache.commons.codec.DecoderException;
25  import org.apache.commons.codec.EncoderException;
26  
27  /**
28   * Abstract superclass for Base-N encoders and decoders.
29   *
30   * <p>
31   * This class is thread-safe.
32   * </p>
33   *
34   * @version $Id: BaseNCodec.java 1811344 2017-10-06 15:19:57Z ggregory $
35   */
36  public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
37  
38      /**
39       * Holds thread context so classes can be thread-safe.
40       *
41       * This class is not itself thread-safe; each thread must allocate its own copy.
42       *
43       * @since 1.7
44       */
45      static class Context {
46  
47          /**
48           * Place holder for the bytes we're dealing with for our based logic.
49           * Bitwise operations store and extract the encoding or decoding from this variable.
50           */
51          int ibitWorkArea;
52  
53          /**
54           * Place holder for the bytes we're dealing with for our based logic.
55           * Bitwise operations store and extract the encoding or decoding from this variable.
56           */
57          long lbitWorkArea;
58  
59          /**
60           * Buffer for streaming.
61           */
62          byte[] buffer;
63  
64          /**
65           * Position where next character should be written in the buffer.
66           */
67          int pos;
68  
69          /**
70           * Position where next character should be read from the buffer.
71           */
72          int readPos;
73  
74          /**
75           * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless,
76           * and must be thrown away.
77           */
78          boolean eof;
79  
80          /**
81           * Variable tracks how many characters have been written to the current line. Only used when encoding. We use
82           * it to make sure each encoded line never goes beyond lineLength (if lineLength &gt; 0).
83           */
84          int currentLinePos;
85  
86          /**
87           * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This
88           * variable helps track that.
89           */
90          int modulus;
91  
92          Context() {
93          }
94  
95          /**
96           * Returns a String useful for debugging (especially within a debugger.)
97           *
98           * @return a String useful for debugging.
99           */
100         @SuppressWarnings("boxing") // OK to ignore boxing here
101         @Override
102         public String toString() {
103             return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " +
104                     "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer),
105                     currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos);
106         }
107     }
108 
109     /**
110      * EOF
111      *
112      * @since 1.7
113      */
114     static final int EOF = -1;
115 
116     /**
117      *  MIME chunk size per RFC 2045 section 6.8.
118      *
119      * <p>
120      * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
121      * equal signs.
122      * </p>
123      *
124      * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
125      */
126     public static final int MIME_CHUNK_SIZE = 76;
127 
128     /**
129      * PEM chunk size per RFC 1421 section 4.3.2.4.
130      *
131      * <p>
132      * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
133      * equal signs.
134      * </p>
135      *
136      * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a>
137      */
138     public static final int PEM_CHUNK_SIZE = 64;
139 
140     private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
141 
142     /**
143      * Defines the default buffer size - currently {@value}
144      * - must be large enough for at least one encoded block+separator
145      */
146     private static final int DEFAULT_BUFFER_SIZE = 8192;
147 
148     /** Mask used to extract 8 bits, used in decoding bytes */
149     protected static final int MASK_8BITS = 0xff;
150 
151     /**
152      * Byte used to pad output.
153      */
154     protected static final byte PAD_DEFAULT = '='; // Allow static access to default
155 
156     /**
157      * @deprecated Use {@link #pad}. Will be removed in 2.0.
158      */
159     @Deprecated
160     protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later
161 
162     protected final byte pad; // instance variable just in case it needs to vary later
163 
164     /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */
165     private final int unencodedBlockSize;
166 
167     /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */
168     private final int encodedBlockSize;
169 
170     /**
171      * Chunksize for encoding. Not used when decoding.
172      * A value of zero or less implies no chunking of the encoded data.
173      * Rounded down to nearest multiple of encodedBlockSize.
174      */
175     protected final int lineLength;
176 
177     /**
178      * Size of chunk separator. Not used unless {@link #lineLength} &gt; 0.
179      */
180     private final int chunkSeparatorLength;
181 
182     /**
183      * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
184      * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
185      * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
186      * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
187      * @param lineLength if &gt; 0, use chunking with a length <code>lineLength</code>
188      * @param chunkSeparatorLength the chunk separator length, if relevant
189      */
190     protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
191                          final int lineLength, final int chunkSeparatorLength) {
192         this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT);
193     }
194 
195     /**
196      * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
197      * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
198      * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
199      * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
200      * @param lineLength if &gt; 0, use chunking with a length <code>lineLength</code>
201      * @param chunkSeparatorLength the chunk separator length, if relevant
202      * @param pad byte used as padding byte.
203      */
204     protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
205                          final int lineLength, final int chunkSeparatorLength, final byte pad) {
206         this.unencodedBlockSize = unencodedBlockSize;
207         this.encodedBlockSize = encodedBlockSize;
208         final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0;
209         this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
210         this.chunkSeparatorLength = chunkSeparatorLength;
211 
212         this.pad = pad;
213     }
214 
215     /**
216      * Returns true if this object has buffered data for reading.
217      *
218      * @param context the context to be used
219      * @return true if there is data still available for reading.
220      */
221     boolean hasData(final Context context) {  // package protected for access from I/O streams
222         return context.buffer != null;
223     }
224 
225     /**
226      * Returns the amount of buffered data available for reading.
227      *
228      * @param context the context to be used
229      * @return The amount of buffered data available for reading.
230      */
231     int available(final Context context) {  // package protected for access from I/O streams
232         return context.buffer != null ? context.pos - context.readPos : 0;
233     }
234 
235     /**
236      * Get the default buffer size. Can be overridden.
237      *
238      * @return {@link #DEFAULT_BUFFER_SIZE}
239      */
240     protected int getDefaultBufferSize() {
241         return DEFAULT_BUFFER_SIZE;
242     }
243 
244     /**
245      * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
246      * @param context the context to be used
247      */
248     private byte[] resizeBuffer(final Context context) {
249         if (context.buffer == null) {
250             context.buffer = new byte[getDefaultBufferSize()];
251             context.pos = 0;
252             context.readPos = 0;
253         } else {
254             final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
255             System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
256             context.buffer = b;
257         }
258         return context.buffer;
259     }
260 
261     /**
262      * Ensure that the buffer has room for <code>size</code> bytes
263      *
264      * @param size minimum spare space required
265      * @param context the context to be used
266      * @return the buffer
267      */
268     protected byte[] ensureBufferSize(final int size, final Context context){
269         if ((context.buffer == null) || (context.buffer.length < context.pos + size)){
270             return resizeBuffer(context);
271         }
272         return context.buffer;
273     }
274 
275     /**
276      * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail
277      * bytes. Returns how many bytes were actually extracted.
278      * <p>
279      * Package protected for access from I/O streams.
280      *
281      * @param b
282      *            byte[] array to extract the buffered data into.
283      * @param bPos
284      *            position in byte[] array to start extraction at.
285      * @param bAvail
286      *            amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
287      * @param context
288      *            the context to be used
289      * @return The number of bytes successfully extracted into the provided byte[] array.
290      */
291     int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) {
292         if (context.buffer != null) {
293             final int len = Math.min(available(context), bAvail);
294             System.arraycopy(context.buffer, context.readPos, b, bPos, len);
295             context.readPos += len;
296             if (context.readPos >= context.pos) {
297                 context.buffer = null; // so hasData() will return false, and this method can return -1
298             }
299             return len;
300         }
301         return context.eof ? EOF : 0;
302     }
303 
304     /**
305      * Checks if a byte value is whitespace or not.
306      * Whitespace is taken to mean: space, tab, CR, LF
307      * @param byteToCheck
308      *            the byte to check
309      * @return true if byte is whitespace, false otherwise
310      */
311     protected static boolean isWhiteSpace(final byte byteToCheck) {
312         switch (byteToCheck) {
313             case ' ' :
314             case '\n' :
315             case '\r' :
316             case '\t' :
317                 return true;
318             default :
319                 return false;
320         }
321     }
322 
323     /**
324      * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
325      * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
326      *
327      * @param obj
328      *            Object to encode
329      * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied.
330      * @throws EncoderException
331      *             if the parameter supplied is not of type byte[]
332      */
333     @Override
334     public Object encode(final Object obj) throws EncoderException {
335         if (!(obj instanceof byte[])) {
336             throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]");
337         }
338         return encode((byte[]) obj);
339     }
340 
341     /**
342      * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet.
343      * Uses UTF8 encoding.
344      *
345      * @param pArray
346      *            a byte array containing binary data
347      * @return A String containing only Base-N character data
348      */
349     public String encodeToString(final byte[] pArray) {
350         return StringUtils.newStringUtf8(encode(pArray));
351     }
352 
353     /**
354      * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet.
355      * Uses UTF8 encoding.
356      *
357      * @param pArray a byte array containing binary data
358      * @return String containing only character data in the appropriate alphabet.
359      * @since 1.5
360      * This is a duplicate of {@link #encodeToString(byte[])}; it was merged during refactoring.
361     */
362     public String encodeAsString(final byte[] pArray){
363         return StringUtils.newStringUtf8(encode(pArray));
364     }
365 
366     /**
367      * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
368      * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
369      *
370      * @param obj
371      *            Object to decode
372      * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String
373      *         supplied.
374      * @throws DecoderException
375      *             if the parameter supplied is not of type byte[]
376      */
377     @Override
378     public Object decode(final Object obj) throws DecoderException {
379         if (obj instanceof byte[]) {
380             return decode((byte[]) obj);
381         } else if (obj instanceof String) {
382             return decode((String) obj);
383         } else {
384             throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String");
385         }
386     }
387 
388     /**
389      * Decodes a String containing characters in the Base-N alphabet.
390      *
391      * @param pArray
392      *            A String containing Base-N character data
393      * @return a byte array containing binary data
394      */
395     public byte[] decode(final String pArray) {
396         return decode(StringUtils.getBytesUtf8(pArray));
397     }
398 
399     /**
400      * Decodes a byte[] containing characters in the Base-N alphabet.
401      *
402      * @param pArray
403      *            A byte array containing Base-N character data
404      * @return a byte array containing binary data
405      */
406     @Override
407     public byte[] decode(final byte[] pArray) {
408         if (pArray == null || pArray.length == 0) {
409             return pArray;
410         }
411         final Context context = new Context();
412         decode(pArray, 0, pArray.length, context);
413         decode(pArray, 0, EOF, context); // Notify decoder of EOF.
414         final byte[] result = new byte[context.pos];
415         readResults(result, 0, result.length, context);
416         return result;
417     }
418 
419     /**
420      * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet.
421      *
422      * @param pArray
423      *            a byte array containing binary data
424      * @return A byte array containing only the base N alphabetic character data
425      */
426     @Override
427     public byte[] encode(final byte[] pArray) {
428         if (pArray == null || pArray.length == 0) {
429             return pArray;
430         }
431         return encode(pArray, 0, pArray.length);
432     }
433 
434     /**
435      * Encodes a byte[] containing binary data, into a byte[] containing
436      * characters in the alphabet.
437      *
438      * @param pArray
439      *            a byte array containing binary data
440      * @param offset
441      *            initial offset of the subarray.
442      * @param length
443      *            length of the subarray.
444      * @return A byte array containing only the base N alphabetic character data
445      * @since 1.11
446      */
447     public byte[] encode(final byte[] pArray, final int offset, final int length) {
448         if (pArray == null || pArray.length == 0) {
449             return pArray;
450         }
451         final Context context = new Context();
452         encode(pArray, offset, length, context);
453         encode(pArray, offset, EOF, context); // Notify encoder of EOF.
454         final byte[] buf = new byte[context.pos - context.readPos];
455         readResults(buf, 0, buf.length, context);
456         return buf;
457     }
458 
459     // package protected for access from I/O streams
460     abstract void encode(byte[] pArray, int i, int length, Context context);
461 
462     // package protected for access from I/O streams
463     abstract void decode(byte[] pArray, int i, int length, Context context);
464 
465     /**
466      * Returns whether or not the <code>octet</code> is in the current alphabet.
467      * Does not allow whitespace or pad.
468      *
469      * @param value The value to test
470      *
471      * @return <code>true</code> if the value is defined in the current alphabet, <code>false</code> otherwise.
472      */
473     protected abstract boolean isInAlphabet(byte value);
474 
475     /**
476      * Tests a given byte array to see if it contains only valid characters within the alphabet.
477      * The method optionally treats whitespace and pad as valid.
478      *
479      * @param arrayOctet byte array to test
480      * @param allowWSPad if <code>true</code>, then whitespace and PAD are also allowed
481      *
482      * @return <code>true</code> if all bytes are valid characters in the alphabet or if the byte array is empty;
483      *         <code>false</code>, otherwise
484      */
485     public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) {
486         for (final byte octet : arrayOctet) {
487             if (!isInAlphabet(octet) &&
488                     (!allowWSPad || (octet != pad) && !isWhiteSpace(octet))) {
489                 return false;
490             }
491         }
492         return true;
493     }
494 
495     /**
496      * Tests a given String to see if it contains only valid characters within the alphabet.
497      * The method treats whitespace and PAD as valid.
498      *
499      * @param basen String to test
500      * @return <code>true</code> if all characters in the String are valid characters in the alphabet or if
501      *         the String is empty; <code>false</code>, otherwise
502      * @see #isInAlphabet(byte[], boolean)
503      */
504     public boolean isInAlphabet(final String basen) {
505         return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
506     }
507 
508     /**
509      * Tests a given byte array to see if it contains any characters within the alphabet or PAD.
510      *
511      * Intended for use in checking line-ending arrays
512      *
513      * @param arrayOctet
514      *            byte array to test
515      * @return <code>true</code> if any byte is a valid character in the alphabet or PAD; <code>false</code> otherwise
516      */
517     protected boolean containsAlphabetOrPad(final byte[] arrayOctet) {
518         if (arrayOctet == null) {
519             return false;
520         }
521         for (final byte element : arrayOctet) {
522             if (pad == element || isInAlphabet(element)) {
523                 return true;
524             }
525         }
526         return false;
527     }
528 
529     /**
530      * Calculates the amount of space needed to encode the supplied array.
531      *
532      * @param pArray byte[] array which will later be encoded
533      *
534      * @return amount of space needed to encoded the supplied array.
535      * Returns a long since a max-len array will require &gt; Integer.MAX_VALUE
536      */
537     public long getEncodedLength(final byte[] pArray) {
538         // Calculate non-chunked size - rounded up to allow for padding
539         // cast to long is needed to avoid possibility of overflow
540         long len = ((pArray.length + unencodedBlockSize-1)  / unencodedBlockSize) * (long) encodedBlockSize;
541         if (lineLength > 0) { // We're using chunking
542             // Round up to nearest multiple
543             len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength;
544         }
545         return len;
546     }
547 }