View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import org.apache.commons.codec.BinaryDecoder;
21  import org.apache.commons.codec.BinaryEncoder;
22  import org.apache.commons.codec.DecoderException;
23  import org.apache.commons.codec.EncoderException;
24  
25  /**
26   * Abstract superclass for Base-N encoders and decoders.
27   *
28   * <p>
29   * This class is thread-safe.
30   * </p>
31   *
32   * @version $Id: BaseNCodec.html 889935 2013-12-11 05:05:13Z ggregory $
33   */
34  public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
35  
36      /**
37       * Holds thread context so classes can be thread-safe.
38       *
39       * This class is not itself thread-safe; each thread must allocate its own copy.
40       *
41       * @since 1.7
42       */
43      static class Context {
44  
45          /**
46           * Place holder for the bytes we're dealing with for our based logic.
47           * Bitwise operations store and extract the encoding or decoding from this variable.
48           */
49          int ibitWorkArea;
50  
51          /**
52           * Place holder for the bytes we're dealing with for our based logic.
53           * Bitwise operations store and extract the encoding or decoding from this variable.
54           */
55          long lbitWorkArea;
56  
57          /**
58           * Buffer for streaming.
59           */
60          byte[] buffer;
61  
62          /**
63           * Position where next character should be written in the buffer.
64           */
65          int pos;
66  
67          /**
68           * Position where next character should be read from the buffer.
69           */
70          int readPos;
71  
72          /**
73           * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless,
74           * and must be thrown away.
75           */
76          boolean eof;
77  
78          /**
79           * Variable tracks how many characters have been written to the current line. Only used when encoding. We use
80           * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0).
81           */
82          int currentLinePos;
83  
84          /**
85           * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This
86           * variable helps track that.
87           */
88          int modulus;
89  
90          Context() {
91          }
92  
93          /**
94           * Returns a String useful for debugging (especially within a debugger.)
95           *
96           * @return a String useful for debugging.
97           */
98          @SuppressWarnings("boxing") // OK to ignore boxing here
99          @Override
100         public String toString() {
101             return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " +
102                     "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), buffer, currentLinePos, eof,
103                     ibitWorkArea, lbitWorkArea, modulus, pos, readPos);
104         }
105     }
106 
107     /**
108      * EOF
109      *
110      * @since 1.7
111      */
112     static final int EOF = -1;
113 
114     /**
115      *  MIME chunk size per RFC 2045 section 6.8.
116      *
117      * <p>
118      * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
119      * equal signs.
120      * </p>
121      *
122      * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
123      */
124     public static final int MIME_CHUNK_SIZE = 76;
125 
126     /**
127      * PEM chunk size per RFC 1421 section 4.3.2.4.
128      *
129      * <p>
130      * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
131      * equal signs.
132      * </p>
133      *
134      * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a>
135      */
136     public static final int PEM_CHUNK_SIZE = 64;
137 
138     private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
139 
140     /**
141      * Defines the default buffer size - currently {@value}
142      * - must be large enough for at least one encoded block+separator
143      */
144     private static final int DEFAULT_BUFFER_SIZE = 8192;
145 
146     /** Mask used to extract 8 bits, used in decoding bytes */
147     protected static final int MASK_8BITS = 0xff;
148 
149     /**
150      * Byte used to pad output.
151      */
152     protected static final byte PAD_DEFAULT = '='; // Allow static access to default
153 
154     protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later
155 
156     /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */
157     private final int unencodedBlockSize;
158 
159     /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */
160     private final int encodedBlockSize;
161 
162     /**
163      * Chunksize for encoding. Not used when decoding.
164      * A value of zero or less implies no chunking of the encoded data.
165      * Rounded down to nearest multiple of encodedBlockSize.
166      */
167     protected final int lineLength;
168 
169     /**
170      * Size of chunk separator. Not used unless {@link #lineLength} > 0.
171      */
172     private final int chunkSeparatorLength;
173 
174     /**
175      * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
176      * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
177      * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
178      * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
179      * @param lineLength if &gt; 0, use chunking with a length <code>lineLength</code>
180      * @param chunkSeparatorLength the chunk separator length, if relevant
181      */
182     protected BaseNCodec(int unencodedBlockSize, int encodedBlockSize, int lineLength, int chunkSeparatorLength) {
183         this.unencodedBlockSize = unencodedBlockSize;
184         this.encodedBlockSize = encodedBlockSize;
185         final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0;
186         this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
187         this.chunkSeparatorLength = chunkSeparatorLength;
188     }
189 
190     /**
191      * Returns true if this object has buffered data for reading.
192      *
193      * @param context the context to be used
194      * @return true if there is data still available for reading.
195      */
196     boolean hasData(Context context) {  // package protected for access from I/O streams
197         return context.buffer != null;
198     }
199 
200     /**
201      * Returns the amount of buffered data available for reading.
202      *
203      * @param context the context to be used
204      * @return The amount of buffered data available for reading.
205      */
206     int available(Context context) {  // package protected for access from I/O streams
207         return context.buffer != null ? context.pos - context.readPos : 0;
208     }
209 
210     /**
211      * Get the default buffer size. Can be overridden.
212      *
213      * @return {@link #DEFAULT_BUFFER_SIZE}
214      */
215     protected int getDefaultBufferSize() {
216         return DEFAULT_BUFFER_SIZE;
217     }
218 
219     /**
220      * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
221      * @param context the context to be used
222      */
223     private byte[] resizeBuffer(Context context) {
224         if (context.buffer == null) {
225             context.buffer = new byte[getDefaultBufferSize()];
226             context.pos = 0;
227             context.readPos = 0;
228         } else {
229             byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
230             System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
231             context.buffer = b;
232         }
233         return context.buffer;
234     }
235 
236     /**
237      * Ensure that the buffer has room for <code>size</code> bytes
238      *
239      * @param size minimum spare space required
240      * @param context the context to be used
241      */
242     protected byte[] ensureBufferSize(int size, Context context){
243         if ((context.buffer == null) || (context.buffer.length < context.pos + size)){
244             return resizeBuffer(context);
245         }
246         return context.buffer;
247     }
248 
249     /**
250      * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail
251      * bytes. Returns how many bytes were actually extracted.
252      * <p>
253      * Package protected for access from I/O streams.
254      *
255      * @param b
256      *            byte[] array to extract the buffered data into.
257      * @param bPos
258      *            position in byte[] array to start extraction at.
259      * @param bAvail
260      *            amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
261      * @param context
262      *            the context to be used
263      * @return The number of bytes successfully extracted into the provided byte[] array.
264      */
265     int readResults(byte[] b, int bPos, int bAvail, Context context) {
266         if (context.buffer != null) {
267             int len = Math.min(available(context), bAvail);
268             System.arraycopy(context.buffer, context.readPos, b, bPos, len);
269             context.readPos += len;
270             if (context.readPos >= context.pos) {
271                 context.buffer = null; // so hasData() will return false, and this method can return -1
272             }
273             return len;
274         }
275         return context.eof ? EOF : 0;
276     }
277 
278     /**
279      * Checks if a byte value is whitespace or not.
280      * Whitespace is taken to mean: space, tab, CR, LF
281      * @param byteToCheck
282      *            the byte to check
283      * @return true if byte is whitespace, false otherwise
284      */
285     protected static boolean isWhiteSpace(byte byteToCheck) {
286         switch (byteToCheck) {
287             case ' ' :
288             case '\n' :
289             case '\r' :
290             case '\t' :
291                 return true;
292             default :
293                 return false;
294         }
295     }
296 
297     /**
298      * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
299      * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
300      *
301      * @param obj
302      *            Object to encode
303      * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied.
304      * @throws EncoderException
305      *             if the parameter supplied is not of type byte[]
306      */
307     @Override
308     public Object encode(Object obj) throws EncoderException {
309         if (!(obj instanceof byte[])) {
310             throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]");
311         }
312         return encode((byte[]) obj);
313     }
314 
315     /**
316      * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet.
317      * Uses UTF8 encoding.
318      *
319      * @param pArray
320      *            a byte array containing binary data
321      * @return A String containing only Base-N character data
322      */
323     public String encodeToString(byte[] pArray) {
324         return StringUtils.newStringUtf8(encode(pArray));
325     }
326 
327     /**
328      * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet.
329      * Uses UTF8 encoding.
330      *
331      * @param pArray a byte array containing binary data
332      * @return String containing only character data in the appropriate alphabet.
333     */
334     public String encodeAsString(byte[] pArray){
335         return StringUtils.newStringUtf8(encode(pArray));
336     }
337 
338     /**
339      * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
340      * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
341      *
342      * @param obj
343      *            Object to decode
344      * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String
345      *         supplied.
346      * @throws DecoderException
347      *             if the parameter supplied is not of type byte[]
348      */
349     @Override
350     public Object decode(Object obj) throws DecoderException {
351         if (obj instanceof byte[]) {
352             return decode((byte[]) obj);
353         } else if (obj instanceof String) {
354             return decode((String) obj);
355         } else {
356             throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String");
357         }
358     }
359 
360     /**
361      * Decodes a String containing characters in the Base-N alphabet.
362      *
363      * @param pArray
364      *            A String containing Base-N character data
365      * @return a byte array containing binary data
366      */
367     public byte[] decode(String pArray) {
368         return decode(StringUtils.getBytesUtf8(pArray));
369     }
370 
371     /**
372      * Decodes a byte[] containing characters in the Base-N alphabet.
373      *
374      * @param pArray
375      *            A byte array containing Base-N character data
376      * @return a byte array containing binary data
377      */
378     @Override
379     public byte[] decode(byte[] pArray) {
380         if (pArray == null || pArray.length == 0) {
381             return pArray;
382         }
383         Context context = new Context();
384         decode(pArray, 0, pArray.length, context);
385         decode(pArray, 0, EOF, context); // Notify decoder of EOF.
386         byte[] result = new byte[context.pos];
387         readResults(result, 0, result.length, context);
388         return result;
389     }
390 
391     /**
392      * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet.
393      *
394      * @param pArray
395      *            a byte array containing binary data
396      * @return A byte array containing only the basen alphabetic character data
397      */
398     @Override
399     public byte[] encode(byte[] pArray) {
400         if (pArray == null || pArray.length == 0) {
401             return pArray;
402         }
403         Context context = new Context();
404         encode(pArray, 0, pArray.length, context);
405         encode(pArray, 0, EOF, context); // Notify encoder of EOF.
406         byte[] buf = new byte[context.pos - context.readPos];
407         readResults(buf, 0, buf.length, context);
408         return buf;
409     }
410 
411     // package protected for access from I/O streams
412     abstract void encode(byte[] pArray, int i, int length, Context context);
413 
414     // package protected for access from I/O streams
415     abstract void decode(byte[] pArray, int i, int length, Context context);
416 
417     /**
418      * Returns whether or not the <code>octet</code> is in the current alphabet.
419      * Does not allow whitespace or pad.
420      *
421      * @param value The value to test
422      *
423      * @return {@code true} if the value is defined in the current alphabet, {@code false} otherwise.
424      */
425     protected abstract boolean isInAlphabet(byte value);
426 
427     /**
428      * Tests a given byte array to see if it contains only valid characters within the alphabet.
429      * The method optionally treats whitespace and pad as valid.
430      *
431      * @param arrayOctet byte array to test
432      * @param allowWSPad if {@code true}, then whitespace and PAD are also allowed
433      *
434      * @return {@code true} if all bytes are valid characters in the alphabet or if the byte array is empty;
435      *         {@code false}, otherwise
436      */
437     public boolean isInAlphabet(byte[] arrayOctet, boolean allowWSPad) {
438         for (int i = 0; i < arrayOctet.length; i++) {
439             if (!isInAlphabet(arrayOctet[i]) &&
440                     (!allowWSPad || (arrayOctet[i] != PAD) && !isWhiteSpace(arrayOctet[i]))) {
441                 return false;
442             }
443         }
444         return true;
445     }
446 
447     /**
448      * Tests a given String to see if it contains only valid characters within the alphabet.
449      * The method treats whitespace and PAD as valid.
450      *
451      * @param basen String to test
452      * @return {@code true} if all characters in the String are valid characters in the alphabet or if
453      *         the String is empty; {@code false}, otherwise
454      * @see #isInAlphabet(byte[], boolean)
455      */
456     public boolean isInAlphabet(String basen) {
457         return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
458     }
459 
460     /**
461      * Tests a given byte array to see if it contains any characters within the alphabet or PAD.
462      *
463      * Intended for use in checking line-ending arrays
464      *
465      * @param arrayOctet
466      *            byte array to test
467      * @return {@code true} if any byte is a valid character in the alphabet or PAD; {@code false} otherwise
468      */
469     protected boolean containsAlphabetOrPad(byte[] arrayOctet) {
470         if (arrayOctet == null) {
471             return false;
472         }
473         for (byte element : arrayOctet) {
474             if (PAD == element || isInAlphabet(element)) {
475                 return true;
476             }
477         }
478         return false;
479     }
480 
481     /**
482      * Calculates the amount of space needed to encode the supplied array.
483      *
484      * @param pArray byte[] array which will later be encoded
485      *
486      * @return amount of space needed to encoded the supplied array.
487      * Returns a long since a max-len array will require > Integer.MAX_VALUE
488      */
489     public long getEncodedLength(byte[] pArray) {
490         // Calculate non-chunked size - rounded up to allow for padding
491         // cast to long is needed to avoid possibility of overflow
492         long len = ((pArray.length + unencodedBlockSize-1)  / unencodedBlockSize) * (long) encodedBlockSize;
493         if (lineLength > 0) { // We're using chunking
494             // Round up to nearest multiple
495             len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength;
496         }
497         return len;
498     }
499 }