001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.binary;
019    
020    import java.util.Arrays;
021    
022    import org.apache.commons.codec.BinaryDecoder;
023    import org.apache.commons.codec.BinaryEncoder;
024    import org.apache.commons.codec.DecoderException;
025    import org.apache.commons.codec.EncoderException;
026    
027    /**
028     * Abstract superclass for Base-N encoders and decoders.
029     *
030     * <p>
031     * This class is thread-safe.
032     * </p>
033     *
034     * @version $Id: BaseNCodec.html 889935 2013-12-11 05:05:13Z ggregory $
035     */
036    public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
037    
038        /**
039         * Holds thread context so classes can be thread-safe.
040         *
041         * This class is not itself thread-safe; each thread must allocate its own copy.
042         *
043         * @since 1.7
044         */
045        static class Context {
046    
047            /**
048             * Place holder for the bytes we're dealing with for our based logic.
049             * Bitwise operations store and extract the encoding or decoding from this variable.
050             */
051            int ibitWorkArea;
052    
053            /**
054             * Place holder for the bytes we're dealing with for our based logic.
055             * Bitwise operations store and extract the encoding or decoding from this variable.
056             */
057            long lbitWorkArea;
058    
059            /**
060             * Buffer for streaming.
061             */
062            byte[] buffer;
063    
064            /**
065             * Position where next character should be written in the buffer.
066             */
067            int pos;
068    
069            /**
070             * Position where next character should be read from the buffer.
071             */
072            int readPos;
073    
074            /**
075             * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless,
076             * and must be thrown away.
077             */
078            boolean eof;
079    
080            /**
081             * Variable tracks how many characters have been written to the current line. Only used when encoding. We use
082             * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0).
083             */
084            int currentLinePos;
085    
086            /**
087             * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This
088             * variable helps track that.
089             */
090            int modulus;
091    
092            Context() {
093            }
094    
095            /**
096             * Returns a String useful for debugging (especially within a debugger.)
097             *
098             * @return a String useful for debugging.
099             */
100            @SuppressWarnings("boxing") // OK to ignore boxing here
101            @Override
102            public String toString() {
103                return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " +
104                        "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer),
105                        currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos);
106            }
107        }
108    
109        /**
110         * EOF
111         *
112         * @since 1.7
113         */
114        static final int EOF = -1;
115    
116        /**
117         *  MIME chunk size per RFC 2045 section 6.8.
118         *
119         * <p>
120         * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
121         * equal signs.
122         * </p>
123         *
124         * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
125         */
126        public static final int MIME_CHUNK_SIZE = 76;
127    
128        /**
129         * PEM chunk size per RFC 1421 section 4.3.2.4.
130         *
131         * <p>
132         * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
133         * equal signs.
134         * </p>
135         *
136         * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a>
137         */
138        public static final int PEM_CHUNK_SIZE = 64;
139    
140        private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
141    
142        /**
143         * Defines the default buffer size - currently {@value}
144         * - must be large enough for at least one encoded block+separator
145         */
146        private static final int DEFAULT_BUFFER_SIZE = 8192;
147    
148        /** Mask used to extract 8 bits, used in decoding bytes */
149        protected static final int MASK_8BITS = 0xff;
150    
151        /**
152         * Byte used to pad output.
153         */
154        protected static final byte PAD_DEFAULT = '='; // Allow static access to default
155    
156        protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later
157    
158        /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */
159        private final int unencodedBlockSize;
160    
161        /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */
162        private final int encodedBlockSize;
163    
164        /**
165         * Chunksize for encoding. Not used when decoding.
166         * A value of zero or less implies no chunking of the encoded data.
167         * Rounded down to nearest multiple of encodedBlockSize.
168         */
169        protected final int lineLength;
170    
171        /**
172         * Size of chunk separator. Not used unless {@link #lineLength} > 0.
173         */
174        private final int chunkSeparatorLength;
175    
176        /**
177         * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
178         * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
179         * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
180         * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
181         * @param lineLength if &gt; 0, use chunking with a length <code>lineLength</code>
182         * @param chunkSeparatorLength the chunk separator length, if relevant
183         */
184        protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
185                             final int lineLength, final int chunkSeparatorLength) {
186            this.unencodedBlockSize = unencodedBlockSize;
187            this.encodedBlockSize = encodedBlockSize;
188            final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0;
189            this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
190            this.chunkSeparatorLength = chunkSeparatorLength;
191        }
192    
193        /**
194         * Returns true if this object has buffered data for reading.
195         *
196         * @param context the context to be used
197         * @return true if there is data still available for reading.
198         */
199        boolean hasData(final Context context) {  // package protected for access from I/O streams
200            return context.buffer != null;
201        }
202    
203        /**
204         * Returns the amount of buffered data available for reading.
205         *
206         * @param context the context to be used
207         * @return The amount of buffered data available for reading.
208         */
209        int available(final Context context) {  // package protected for access from I/O streams
210            return context.buffer != null ? context.pos - context.readPos : 0;
211        }
212    
213        /**
214         * Get the default buffer size. Can be overridden.
215         *
216         * @return {@link #DEFAULT_BUFFER_SIZE}
217         */
218        protected int getDefaultBufferSize() {
219            return DEFAULT_BUFFER_SIZE;
220        }
221    
222        /**
223         * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
224         * @param context the context to be used
225         */
226        private byte[] resizeBuffer(final Context context) {
227            if (context.buffer == null) {
228                context.buffer = new byte[getDefaultBufferSize()];
229                context.pos = 0;
230                context.readPos = 0;
231            } else {
232                final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
233                System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
234                context.buffer = b;
235            }
236            return context.buffer;
237        }
238    
239        /**
240         * Ensure that the buffer has room for <code>size</code> bytes
241         *
242         * @param size minimum spare space required
243         * @param context the context to be used
244         */
245        protected byte[] ensureBufferSize(final int size, final Context context){
246            if ((context.buffer == null) || (context.buffer.length < context.pos + size)){
247                return resizeBuffer(context);
248            }
249            return context.buffer;
250        }
251    
252        /**
253         * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail
254         * bytes. Returns how many bytes were actually extracted.
255         * <p>
256         * Package protected for access from I/O streams.
257         *
258         * @param b
259         *            byte[] array to extract the buffered data into.
260         * @param bPos
261         *            position in byte[] array to start extraction at.
262         * @param bAvail
263         *            amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
264         * @param context
265         *            the context to be used
266         * @return The number of bytes successfully extracted into the provided byte[] array.
267         */
268        int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) {
269            if (context.buffer != null) {
270                final int len = Math.min(available(context), bAvail);
271                System.arraycopy(context.buffer, context.readPos, b, bPos, len);
272                context.readPos += len;
273                if (context.readPos >= context.pos) {
274                    context.buffer = null; // so hasData() will return false, and this method can return -1
275                }
276                return len;
277            }
278            return context.eof ? EOF : 0;
279        }
280    
281        /**
282         * Checks if a byte value is whitespace or not.
283         * Whitespace is taken to mean: space, tab, CR, LF
284         * @param byteToCheck
285         *            the byte to check
286         * @return true if byte is whitespace, false otherwise
287         */
288        protected static boolean isWhiteSpace(final byte byteToCheck) {
289            switch (byteToCheck) {
290                case ' ' :
291                case '\n' :
292                case '\r' :
293                case '\t' :
294                    return true;
295                default :
296                    return false;
297            }
298        }
299    
300        /**
301         * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
302         * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
303         *
304         * @param obj
305         *            Object to encode
306         * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied.
307         * @throws EncoderException
308         *             if the parameter supplied is not of type byte[]
309         */
310        @Override
311        public Object encode(final Object obj) throws EncoderException {
312            if (!(obj instanceof byte[])) {
313                throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]");
314            }
315            return encode((byte[]) obj);
316        }
317    
318        /**
319         * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet.
320         * Uses UTF8 encoding.
321         *
322         * @param pArray
323         *            a byte array containing binary data
324         * @return A String containing only Base-N character data
325         */
326        public String encodeToString(final byte[] pArray) {
327            return StringUtils.newStringUtf8(encode(pArray));
328        }
329    
330        /**
331         * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet.
332         * Uses UTF8 encoding.
333         *
334         * @param pArray a byte array containing binary data
335         * @return String containing only character data in the appropriate alphabet.
336        */
337        public String encodeAsString(final byte[] pArray){
338            return StringUtils.newStringUtf8(encode(pArray));
339        }
340    
341        /**
342         * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
343         * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
344         *
345         * @param obj
346         *            Object to decode
347         * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String
348         *         supplied.
349         * @throws DecoderException
350         *             if the parameter supplied is not of type byte[]
351         */
352        @Override
353        public Object decode(final Object obj) throws DecoderException {
354            if (obj instanceof byte[]) {
355                return decode((byte[]) obj);
356            } else if (obj instanceof String) {
357                return decode((String) obj);
358            } else {
359                throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String");
360            }
361        }
362    
363        /**
364         * Decodes a String containing characters in the Base-N alphabet.
365         *
366         * @param pArray
367         *            A String containing Base-N character data
368         * @return a byte array containing binary data
369         */
370        public byte[] decode(final String pArray) {
371            return decode(StringUtils.getBytesUtf8(pArray));
372        }
373    
374        /**
375         * Decodes a byte[] containing characters in the Base-N alphabet.
376         *
377         * @param pArray
378         *            A byte array containing Base-N character data
379         * @return a byte array containing binary data
380         */
381        @Override
382        public byte[] decode(final byte[] pArray) {
383            if (pArray == null || pArray.length == 0) {
384                return pArray;
385            }
386            final Context context = new Context();
387            decode(pArray, 0, pArray.length, context);
388            decode(pArray, 0, EOF, context); // Notify decoder of EOF.
389            final byte[] result = new byte[context.pos];
390            readResults(result, 0, result.length, context);
391            return result;
392        }
393    
394        /**
395         * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet.
396         *
397         * @param pArray
398         *            a byte array containing binary data
399         * @return A byte array containing only the basen alphabetic character data
400         */
401        @Override
402        public byte[] encode(final byte[] pArray) {
403            if (pArray == null || pArray.length == 0) {
404                return pArray;
405            }
406            final Context context = new Context();
407            encode(pArray, 0, pArray.length, context);
408            encode(pArray, 0, EOF, context); // Notify encoder of EOF.
409            final byte[] buf = new byte[context.pos - context.readPos];
410            readResults(buf, 0, buf.length, context);
411            return buf;
412        }
413    
414        // package protected for access from I/O streams
415        abstract void encode(byte[] pArray, int i, int length, Context context);
416    
417        // package protected for access from I/O streams
418        abstract void decode(byte[] pArray, int i, int length, Context context);
419    
420        /**
421         * Returns whether or not the <code>octet</code> is in the current alphabet.
422         * Does not allow whitespace or pad.
423         *
424         * @param value The value to test
425         *
426         * @return {@code true} if the value is defined in the current alphabet, {@code false} otherwise.
427         */
428        protected abstract boolean isInAlphabet(byte value);
429    
430        /**
431         * Tests a given byte array to see if it contains only valid characters within the alphabet.
432         * The method optionally treats whitespace and pad as valid.
433         *
434         * @param arrayOctet byte array to test
435         * @param allowWSPad if {@code true}, then whitespace and PAD are also allowed
436         *
437         * @return {@code true} if all bytes are valid characters in the alphabet or if the byte array is empty;
438         *         {@code false}, otherwise
439         */
440        public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) {
441            for (int i = 0; i < arrayOctet.length; i++) {
442                if (!isInAlphabet(arrayOctet[i]) &&
443                        (!allowWSPad || (arrayOctet[i] != PAD) && !isWhiteSpace(arrayOctet[i]))) {
444                    return false;
445                }
446            }
447            return true;
448        }
449    
450        /**
451         * Tests a given String to see if it contains only valid characters within the alphabet.
452         * The method treats whitespace and PAD as valid.
453         *
454         * @param basen String to test
455         * @return {@code true} if all characters in the String are valid characters in the alphabet or if
456         *         the String is empty; {@code false}, otherwise
457         * @see #isInAlphabet(byte[], boolean)
458         */
459        public boolean isInAlphabet(final String basen) {
460            return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
461        }
462    
463        /**
464         * Tests a given byte array to see if it contains any characters within the alphabet or PAD.
465         *
466         * Intended for use in checking line-ending arrays
467         *
468         * @param arrayOctet
469         *            byte array to test
470         * @return {@code true} if any byte is a valid character in the alphabet or PAD; {@code false} otherwise
471         */
472        protected boolean containsAlphabetOrPad(final byte[] arrayOctet) {
473            if (arrayOctet == null) {
474                return false;
475            }
476            for (final byte element : arrayOctet) {
477                if (PAD == element || isInAlphabet(element)) {
478                    return true;
479                }
480            }
481            return false;
482        }
483    
484        /**
485         * Calculates the amount of space needed to encode the supplied array.
486         *
487         * @param pArray byte[] array which will later be encoded
488         *
489         * @return amount of space needed to encoded the supplied array.
490         * Returns a long since a max-len array will require > Integer.MAX_VALUE
491         */
492        public long getEncodedLength(final byte[] pArray) {
493            // Calculate non-chunked size - rounded up to allow for padding
494            // cast to long is needed to avoid possibility of overflow
495            long len = ((pArray.length + unencodedBlockSize-1)  / unencodedBlockSize) * (long) encodedBlockSize;
496            if (lineLength > 0) { // We're using chunking
497                // Round up to nearest multiple
498                len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength;
499            }
500            return len;
501        }
502    }