001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import java.util.Arrays;
021
022import org.apache.commons.codec.BinaryDecoder;
023import org.apache.commons.codec.BinaryEncoder;
024import org.apache.commons.codec.DecoderException;
025import org.apache.commons.codec.EncoderException;
026
027/**
028 * Abstract superclass for Base-N encoders and decoders.
029 *
030 * <p>
031 * This class is thread-safe.
032 * </p>
033 *
034 * @version $Id: BaseNCodec.html 928559 2014-11-10 02:53:54Z ggregory $
035 */
036public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
037
038    /**
039     * Holds thread context so classes can be thread-safe.
040     *
041     * This class is not itself thread-safe; each thread must allocate its own copy.
042     *
043     * @since 1.7
044     */
045    static class Context {
046
047        /**
048         * Place holder for the bytes we're dealing with for our based logic.
049         * Bitwise operations store and extract the encoding or decoding from this variable.
050         */
051        int ibitWorkArea;
052
053        /**
054         * Place holder for the bytes we're dealing with for our based logic.
055         * Bitwise operations store and extract the encoding or decoding from this variable.
056         */
057        long lbitWorkArea;
058
059        /**
060         * Buffer for streaming.
061         */
062        byte[] buffer;
063
064        /**
065         * Position where next character should be written in the buffer.
066         */
067        int pos;
068
069        /**
070         * Position where next character should be read from the buffer.
071         */
072        int readPos;
073
074        /**
075         * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless,
076         * and must be thrown away.
077         */
078        boolean eof;
079
080        /**
081         * Variable tracks how many characters have been written to the current line. Only used when encoding. We use
082         * it to make sure each encoded line never goes beyond lineLength (if lineLength &gt; 0).
083         */
084        int currentLinePos;
085
086        /**
087         * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This
088         * variable helps track that.
089         */
090        int modulus;
091
092        Context() {
093        }
094
095        /**
096         * Returns a String useful for debugging (especially within a debugger.)
097         *
098         * @return a String useful for debugging.
099         */
100        @SuppressWarnings("boxing") // OK to ignore boxing here
101        @Override
102        public String toString() {
103            return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " +
104                    "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer),
105                    currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos);
106        }
107    }
108
109    /**
110     * EOF
111     *
112     * @since 1.7
113     */
114    static final int EOF = -1;
115
116    /**
117     *  MIME chunk size per RFC 2045 section 6.8.
118     *
119     * <p>
120     * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
121     * equal signs.
122     * </p>
123     *
124     * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
125     */
126    public static final int MIME_CHUNK_SIZE = 76;
127
128    /**
129     * PEM chunk size per RFC 1421 section 4.3.2.4.
130     *
131     * <p>
132     * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
133     * equal signs.
134     * </p>
135     *
136     * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a>
137     */
138    public static final int PEM_CHUNK_SIZE = 64;
139
140    private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
141
142    /**
143     * Defines the default buffer size - currently {@value}
144     * - must be large enough for at least one encoded block+separator
145     */
146    private static final int DEFAULT_BUFFER_SIZE = 8192;
147
148    /** Mask used to extract 8 bits, used in decoding bytes */
149    protected static final int MASK_8BITS = 0xff;
150
151    /**
152     * Byte used to pad output.
153     */
154    protected static final byte PAD_DEFAULT = '='; // Allow static access to default
155
156    /**
157     * @deprecated Use {@link #pad}. Will be removed in 2.0.
158     */
159    @Deprecated
160    protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later
161
162    protected final byte pad; // instance variable just in case it needs to vary later
163
164    /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */
165    private final int unencodedBlockSize;
166
167    /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */
168    private final int encodedBlockSize;
169
170    /**
171     * Chunksize for encoding. Not used when decoding.
172     * A value of zero or less implies no chunking of the encoded data.
173     * Rounded down to nearest multiple of encodedBlockSize.
174     */
175    protected final int lineLength;
176
177    /**
178     * Size of chunk separator. Not used unless {@link #lineLength} &gt; 0.
179     */
180    private final int chunkSeparatorLength;
181
182    /**
183     * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
184     * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
185     * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
186     * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
187     * @param lineLength if &gt; 0, use chunking with a length <code>lineLength</code>
188     * @param chunkSeparatorLength the chunk separator length, if relevant
189     */
190    protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
191                         final int lineLength, final int chunkSeparatorLength) {
192        this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT);
193    }
194
195    /**
196     * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
197     * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
198     * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
199     * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
200     * @param lineLength if &gt; 0, use chunking with a length <code>lineLength</code>
201     * @param chunkSeparatorLength the chunk separator length, if relevant
202     * @param pad byte used as padding byte.
203     */
204    protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
205                         final int lineLength, final int chunkSeparatorLength, final byte pad) {
206        this.unencodedBlockSize = unencodedBlockSize;
207        this.encodedBlockSize = encodedBlockSize;
208        final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0;
209        this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
210        this.chunkSeparatorLength = chunkSeparatorLength;
211
212        this.pad = pad;
213    }
214
215    /**
216     * Returns true if this object has buffered data for reading.
217     *
218     * @param context the context to be used
219     * @return true if there is data still available for reading.
220     */
221    boolean hasData(final Context context) {  // package protected for access from I/O streams
222        return context.buffer != null;
223    }
224
225    /**
226     * Returns the amount of buffered data available for reading.
227     *
228     * @param context the context to be used
229     * @return The amount of buffered data available for reading.
230     */
231    int available(final Context context) {  // package protected for access from I/O streams
232        return context.buffer != null ? context.pos - context.readPos : 0;
233    }
234
235    /**
236     * Get the default buffer size. Can be overridden.
237     *
238     * @return {@link #DEFAULT_BUFFER_SIZE}
239     */
240    protected int getDefaultBufferSize() {
241        return DEFAULT_BUFFER_SIZE;
242    }
243
244    /**
245     * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
246     * @param context the context to be used
247     */
248    private byte[] resizeBuffer(final Context context) {
249        if (context.buffer == null) {
250            context.buffer = new byte[getDefaultBufferSize()];
251            context.pos = 0;
252            context.readPos = 0;
253        } else {
254            final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
255            System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
256            context.buffer = b;
257        }
258        return context.buffer;
259    }
260
261    /**
262     * Ensure that the buffer has room for <code>size</code> bytes
263     *
264     * @param size minimum spare space required
265     * @param context the context to be used
266     * @return the buffer
267     */
268    protected byte[] ensureBufferSize(final int size, final Context context){
269        if ((context.buffer == null) || (context.buffer.length < context.pos + size)){
270            return resizeBuffer(context);
271        }
272        return context.buffer;
273    }
274
275    /**
276     * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail
277     * bytes. Returns how many bytes were actually extracted.
278     * <p>
279     * Package protected for access from I/O streams.
280     *
281     * @param b
282     *            byte[] array to extract the buffered data into.
283     * @param bPos
284     *            position in byte[] array to start extraction at.
285     * @param bAvail
286     *            amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
287     * @param context
288     *            the context to be used
289     * @return The number of bytes successfully extracted into the provided byte[] array.
290     */
291    int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) {
292        if (context.buffer != null) {
293            final int len = Math.min(available(context), bAvail);
294            System.arraycopy(context.buffer, context.readPos, b, bPos, len);
295            context.readPos += len;
296            if (context.readPos >= context.pos) {
297                context.buffer = null; // so hasData() will return false, and this method can return -1
298            }
299            return len;
300        }
301        return context.eof ? EOF : 0;
302    }
303
304    /**
305     * Checks if a byte value is whitespace or not.
306     * Whitespace is taken to mean: space, tab, CR, LF
307     * @param byteToCheck
308     *            the byte to check
309     * @return true if byte is whitespace, false otherwise
310     */
311    protected static boolean isWhiteSpace(final byte byteToCheck) {
312        switch (byteToCheck) {
313            case ' ' :
314            case '\n' :
315            case '\r' :
316            case '\t' :
317                return true;
318            default :
319                return false;
320        }
321    }
322
323    /**
324     * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
325     * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
326     *
327     * @param obj
328     *            Object to encode
329     * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied.
330     * @throws EncoderException
331     *             if the parameter supplied is not of type byte[]
332     */
333    @Override
334    public Object encode(final Object obj) throws EncoderException {
335        if (!(obj instanceof byte[])) {
336            throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]");
337        }
338        return encode((byte[]) obj);
339    }
340
341    /**
342     * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet.
343     * Uses UTF8 encoding.
344     *
345     * @param pArray
346     *            a byte array containing binary data
347     * @return A String containing only Base-N character data
348     */
349    public String encodeToString(final byte[] pArray) {
350        return StringUtils.newStringUtf8(encode(pArray));
351    }
352
353    /**
354     * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet.
355     * Uses UTF8 encoding.
356     *
357     * @param pArray a byte array containing binary data
358     * @return String containing only character data in the appropriate alphabet.
359    */
360    public String encodeAsString(final byte[] pArray){
361        return StringUtils.newStringUtf8(encode(pArray));
362    }
363
364    /**
365     * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
366     * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
367     *
368     * @param obj
369     *            Object to decode
370     * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String
371     *         supplied.
372     * @throws DecoderException
373     *             if the parameter supplied is not of type byte[]
374     */
375    @Override
376    public Object decode(final Object obj) throws DecoderException {
377        if (obj instanceof byte[]) {
378            return decode((byte[]) obj);
379        } else if (obj instanceof String) {
380            return decode((String) obj);
381        } else {
382            throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String");
383        }
384    }
385
386    /**
387     * Decodes a String containing characters in the Base-N alphabet.
388     *
389     * @param pArray
390     *            A String containing Base-N character data
391     * @return a byte array containing binary data
392     */
393    public byte[] decode(final String pArray) {
394        return decode(StringUtils.getBytesUtf8(pArray));
395    }
396
397    /**
398     * Decodes a byte[] containing characters in the Base-N alphabet.
399     *
400     * @param pArray
401     *            A byte array containing Base-N character data
402     * @return a byte array containing binary data
403     */
404    @Override
405    public byte[] decode(final byte[] pArray) {
406        if (pArray == null || pArray.length == 0) {
407            return pArray;
408        }
409        final Context context = new Context();
410        decode(pArray, 0, pArray.length, context);
411        decode(pArray, 0, EOF, context); // Notify decoder of EOF.
412        final byte[] result = new byte[context.pos];
413        readResults(result, 0, result.length, context);
414        return result;
415    }
416
417    /**
418     * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet.
419     *
420     * @param pArray
421     *            a byte array containing binary data
422     * @return A byte array containing only the basen alphabetic character data
423     */
424    @Override
425    public byte[] encode(final byte[] pArray) {
426        if (pArray == null || pArray.length == 0) {
427            return pArray;
428        }
429        final Context context = new Context();
430        encode(pArray, 0, pArray.length, context);
431        encode(pArray, 0, EOF, context); // Notify encoder of EOF.
432        final byte[] buf = new byte[context.pos - context.readPos];
433        readResults(buf, 0, buf.length, context);
434        return buf;
435    }
436
437    // package protected for access from I/O streams
438    abstract void encode(byte[] pArray, int i, int length, Context context);
439
440    // package protected for access from I/O streams
441    abstract void decode(byte[] pArray, int i, int length, Context context);
442
443    /**
444     * Returns whether or not the <code>octet</code> is in the current alphabet.
445     * Does not allow whitespace or pad.
446     *
447     * @param value The value to test
448     *
449     * @return <code>true</code> if the value is defined in the current alphabet, <code>false</code> otherwise.
450     */
451    protected abstract boolean isInAlphabet(byte value);
452
453    /**
454     * Tests a given byte array to see if it contains only valid characters within the alphabet.
455     * The method optionally treats whitespace and pad as valid.
456     *
457     * @param arrayOctet byte array to test
458     * @param allowWSPad if <code>true</code>, then whitespace and PAD are also allowed
459     *
460     * @return <code>true</code> if all bytes are valid characters in the alphabet or if the byte array is empty;
461     *         <code>false</code>, otherwise
462     */
463    public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) {
464        for (int i = 0; i < arrayOctet.length; i++) {
465            if (!isInAlphabet(arrayOctet[i]) &&
466                    (!allowWSPad || (arrayOctet[i] != pad) && !isWhiteSpace(arrayOctet[i]))) {
467                return false;
468            }
469        }
470        return true;
471    }
472
473    /**
474     * Tests a given String to see if it contains only valid characters within the alphabet.
475     * The method treats whitespace and PAD as valid.
476     *
477     * @param basen String to test
478     * @return <code>true</code> if all characters in the String are valid characters in the alphabet or if
479     *         the String is empty; <code>false</code>, otherwise
480     * @see #isInAlphabet(byte[], boolean)
481     */
482    public boolean isInAlphabet(final String basen) {
483        return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
484    }
485
486    /**
487     * Tests a given byte array to see if it contains any characters within the alphabet or PAD.
488     *
489     * Intended for use in checking line-ending arrays
490     *
491     * @param arrayOctet
492     *            byte array to test
493     * @return <code>true</code> if any byte is a valid character in the alphabet or PAD; <code>false</code> otherwise
494     */
495    protected boolean containsAlphabetOrPad(final byte[] arrayOctet) {
496        if (arrayOctet == null) {
497            return false;
498        }
499        for (final byte element : arrayOctet) {
500            if (pad == element || isInAlphabet(element)) {
501                return true;
502            }
503        }
504        return false;
505    }
506
507    /**
508     * Calculates the amount of space needed to encode the supplied array.
509     *
510     * @param pArray byte[] array which will later be encoded
511     *
512     * @return amount of space needed to encoded the supplied array.
513     * Returns a long since a max-len array will require &gt; Integer.MAX_VALUE
514     */
515    public long getEncodedLength(final byte[] pArray) {
516        // Calculate non-chunked size - rounded up to allow for padding
517        // cast to long is needed to avoid possibility of overflow
518        long len = ((pArray.length + unencodedBlockSize-1)  / unencodedBlockSize) * (long) encodedBlockSize;
519        if (lineLength > 0) { // We're using chunking
520            // Round up to nearest multiple
521            len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength;
522        }
523        return len;
524    }
525}