BaseNCodec.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.codec.binary;

  18. import java.util.Arrays;

  19. import org.apache.commons.codec.BinaryDecoder;
  20. import org.apache.commons.codec.BinaryEncoder;
  21. import org.apache.commons.codec.DecoderException;
  22. import org.apache.commons.codec.EncoderException;

  23. /**
  24.  * Abstract superclass for Base-N encoders and decoders.
  25.  *
  26.  * <p>
  27.  * This class is thread-safe.
  28.  * </p>
  29.  *
  30.  * @version $Id: BaseNCodec.java 1811344 2017-10-06 15:19:57Z ggregory $
  31.  */
  32. public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {

  33.     /**
  34.      * Holds thread context so classes can be thread-safe.
  35.      *
  36.      * This class is not itself thread-safe; each thread must allocate its own copy.
  37.      *
  38.      * @since 1.7
  39.      */
  40.     static class Context {

  41.         /**
  42.          * Place holder for the bytes we're dealing with for our based logic.
  43.          * Bitwise operations store and extract the encoding or decoding from this variable.
  44.          */
  45.         int ibitWorkArea;

  46.         /**
  47.          * Place holder for the bytes we're dealing with for our based logic.
  48.          * Bitwise operations store and extract the encoding or decoding from this variable.
  49.          */
  50.         long lbitWorkArea;

  51.         /**
  52.          * Buffer for streaming.
  53.          */
  54.         byte[] buffer;

  55.         /**
  56.          * Position where next character should be written in the buffer.
  57.          */
  58.         int pos;

  59.         /**
  60.          * Position where next character should be read from the buffer.
  61.          */
  62.         int readPos;

  63.         /**
  64.          * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless,
  65.          * and must be thrown away.
  66.          */
  67.         boolean eof;

  68.         /**
  69.          * Variable tracks how many characters have been written to the current line. Only used when encoding. We use
  70.          * it to make sure each encoded line never goes beyond lineLength (if lineLength &gt; 0).
  71.          */
  72.         int currentLinePos;

  73.         /**
  74.          * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This
  75.          * variable helps track that.
  76.          */
  77.         int modulus;

  78.         Context() {
  79.         }

  80.         /**
  81.          * Returns a String useful for debugging (especially within a debugger.)
  82.          *
  83.          * @return a String useful for debugging.
  84.          */
  85.         @SuppressWarnings("boxing") // OK to ignore boxing here
  86.         @Override
  87.         public String toString() {
  88.             return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " +
  89.                     "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer),
  90.                     currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos);
  91.         }
  92.     }

  93.     /**
  94.      * EOF
  95.      *
  96.      * @since 1.7
  97.      */
  98.     static final int EOF = -1;

  99.     /**
  100.      *  MIME chunk size per RFC 2045 section 6.8.
  101.      *
  102.      * <p>
  103.      * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
  104.      * equal signs.
  105.      * </p>
  106.      *
  107.      * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
  108.      */
  109.     public static final int MIME_CHUNK_SIZE = 76;

  110.     /**
  111.      * PEM chunk size per RFC 1421 section 4.3.2.4.
  112.      *
  113.      * <p>
  114.      * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
  115.      * equal signs.
  116.      * </p>
  117.      *
  118.      * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a>
  119.      */
  120.     public static final int PEM_CHUNK_SIZE = 64;

  121.     private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;

  122.     /**
  123.      * Defines the default buffer size - currently {@value}
  124.      * - must be large enough for at least one encoded block+separator
  125.      */
  126.     private static final int DEFAULT_BUFFER_SIZE = 8192;

  127.     /** Mask used to extract 8 bits, used in decoding bytes */
  128.     protected static final int MASK_8BITS = 0xff;

  129.     /**
  130.      * Byte used to pad output.
  131.      */
  132.     protected static final byte PAD_DEFAULT = '='; // Allow static access to default

  133.     /**
  134.      * @deprecated Use {@link #pad}. Will be removed in 2.0.
  135.      */
  136.     @Deprecated
  137.     protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later

  138.     protected final byte pad; // instance variable just in case it needs to vary later

  139.     /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */
  140.     private final int unencodedBlockSize;

  141.     /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */
  142.     private final int encodedBlockSize;

  143.     /**
  144.      * Chunksize for encoding. Not used when decoding.
  145.      * A value of zero or less implies no chunking of the encoded data.
  146.      * Rounded down to nearest multiple of encodedBlockSize.
  147.      */
  148.     protected final int lineLength;

  149.     /**
  150.      * Size of chunk separator. Not used unless {@link #lineLength} &gt; 0.
  151.      */
  152.     private final int chunkSeparatorLength;

  153.     /**
  154.      * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
  155.      * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
  156.      * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
  157.      * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
  158.      * @param lineLength if &gt; 0, use chunking with a length <code>lineLength</code>
  159.      * @param chunkSeparatorLength the chunk separator length, if relevant
  160.      */
  161.     protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
  162.                          final int lineLength, final int chunkSeparatorLength) {
  163.         this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT);
  164.     }

  165.     /**
  166.      * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
  167.      * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
  168.      * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
  169.      * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
  170.      * @param lineLength if &gt; 0, use chunking with a length <code>lineLength</code>
  171.      * @param chunkSeparatorLength the chunk separator length, if relevant
  172.      * @param pad byte used as padding byte.
  173.      */
  174.     protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
  175.                          final int lineLength, final int chunkSeparatorLength, final byte pad) {
  176.         this.unencodedBlockSize = unencodedBlockSize;
  177.         this.encodedBlockSize = encodedBlockSize;
  178.         final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0;
  179.         this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
  180.         this.chunkSeparatorLength = chunkSeparatorLength;

  181.         this.pad = pad;
  182.     }

  183.     /**
  184.      * Returns true if this object has buffered data for reading.
  185.      *
  186.      * @param context the context to be used
  187.      * @return true if there is data still available for reading.
  188.      */
  189.     boolean hasData(final Context context) {  // package protected for access from I/O streams
  190.         return context.buffer != null;
  191.     }

  192.     /**
  193.      * Returns the amount of buffered data available for reading.
  194.      *
  195.      * @param context the context to be used
  196.      * @return The amount of buffered data available for reading.
  197.      */
  198.     int available(final Context context) {  // package protected for access from I/O streams
  199.         return context.buffer != null ? context.pos - context.readPos : 0;
  200.     }

  201.     /**
  202.      * Get the default buffer size. Can be overridden.
  203.      *
  204.      * @return {@link #DEFAULT_BUFFER_SIZE}
  205.      */
  206.     protected int getDefaultBufferSize() {
  207.         return DEFAULT_BUFFER_SIZE;
  208.     }

  209.     /**
  210.      * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
  211.      * @param context the context to be used
  212.      */
  213.     private byte[] resizeBuffer(final Context context) {
  214.         if (context.buffer == null) {
  215.             context.buffer = new byte[getDefaultBufferSize()];
  216.             context.pos = 0;
  217.             context.readPos = 0;
  218.         } else {
  219.             final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
  220.             System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
  221.             context.buffer = b;
  222.         }
  223.         return context.buffer;
  224.     }

  225.     /**
  226.      * Ensure that the buffer has room for <code>size</code> bytes
  227.      *
  228.      * @param size minimum spare space required
  229.      * @param context the context to be used
  230.      * @return the buffer
  231.      */
  232.     protected byte[] ensureBufferSize(final int size, final Context context){
  233.         if ((context.buffer == null) || (context.buffer.length < context.pos + size)){
  234.             return resizeBuffer(context);
  235.         }
  236.         return context.buffer;
  237.     }

  238.     /**
  239.      * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail
  240.      * bytes. Returns how many bytes were actually extracted.
  241.      * <p>
  242.      * Package protected for access from I/O streams.
  243.      *
  244.      * @param b
  245.      *            byte[] array to extract the buffered data into.
  246.      * @param bPos
  247.      *            position in byte[] array to start extraction at.
  248.      * @param bAvail
  249.      *            amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
  250.      * @param context
  251.      *            the context to be used
  252.      * @return The number of bytes successfully extracted into the provided byte[] array.
  253.      */
  254.     int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) {
  255.         if (context.buffer != null) {
  256.             final int len = Math.min(available(context), bAvail);
  257.             System.arraycopy(context.buffer, context.readPos, b, bPos, len);
  258.             context.readPos += len;
  259.             if (context.readPos >= context.pos) {
  260.                 context.buffer = null; // so hasData() will return false, and this method can return -1
  261.             }
  262.             return len;
  263.         }
  264.         return context.eof ? EOF : 0;
  265.     }

  266.     /**
  267.      * Checks if a byte value is whitespace or not.
  268.      * Whitespace is taken to mean: space, tab, CR, LF
  269.      * @param byteToCheck
  270.      *            the byte to check
  271.      * @return true if byte is whitespace, false otherwise
  272.      */
  273.     protected static boolean isWhiteSpace(final byte byteToCheck) {
  274.         switch (byteToCheck) {
  275.             case ' ' :
  276.             case '\n' :
  277.             case '\r' :
  278.             case '\t' :
  279.                 return true;
  280.             default :
  281.                 return false;
  282.         }
  283.     }

  284.     /**
  285.      * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
  286.      * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
  287.      *
  288.      * @param obj
  289.      *            Object to encode
  290.      * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied.
  291.      * @throws EncoderException
  292.      *             if the parameter supplied is not of type byte[]
  293.      */
  294.     @Override
  295.     public Object encode(final Object obj) throws EncoderException {
  296.         if (!(obj instanceof byte[])) {
  297.             throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]");
  298.         }
  299.         return encode((byte[]) obj);
  300.     }

  301.     /**
  302.      * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet.
  303.      * Uses UTF8 encoding.
  304.      *
  305.      * @param pArray
  306.      *            a byte array containing binary data
  307.      * @return A String containing only Base-N character data
  308.      */
  309.     public String encodeToString(final byte[] pArray) {
  310.         return StringUtils.newStringUtf8(encode(pArray));
  311.     }

  312.     /**
  313.      * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet.
  314.      * Uses UTF8 encoding.
  315.      *
  316.      * @param pArray a byte array containing binary data
  317.      * @return String containing only character data in the appropriate alphabet.
  318.      * @since 1.5
  319.      * This is a duplicate of {@link #encodeToString(byte[])}; it was merged during refactoring.
  320.     */
  321.     public String encodeAsString(final byte[] pArray){
  322.         return StringUtils.newStringUtf8(encode(pArray));
  323.     }

  324.     /**
  325.      * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
  326.      * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
  327.      *
  328.      * @param obj
  329.      *            Object to decode
  330.      * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String
  331.      *         supplied.
  332.      * @throws DecoderException
  333.      *             if the parameter supplied is not of type byte[]
  334.      */
  335.     @Override
  336.     public Object decode(final Object obj) throws DecoderException {
  337.         if (obj instanceof byte[]) {
  338.             return decode((byte[]) obj);
  339.         } else if (obj instanceof String) {
  340.             return decode((String) obj);
  341.         } else {
  342.             throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String");
  343.         }
  344.     }

  345.     /**
  346.      * Decodes a String containing characters in the Base-N alphabet.
  347.      *
  348.      * @param pArray
  349.      *            A String containing Base-N character data
  350.      * @return a byte array containing binary data
  351.      */
  352.     public byte[] decode(final String pArray) {
  353.         return decode(StringUtils.getBytesUtf8(pArray));
  354.     }

  355.     /**
  356.      * Decodes a byte[] containing characters in the Base-N alphabet.
  357.      *
  358.      * @param pArray
  359.      *            A byte array containing Base-N character data
  360.      * @return a byte array containing binary data
  361.      */
  362.     @Override
  363.     public byte[] decode(final byte[] pArray) {
  364.         if (pArray == null || pArray.length == 0) {
  365.             return pArray;
  366.         }
  367.         final Context context = new Context();
  368.         decode(pArray, 0, pArray.length, context);
  369.         decode(pArray, 0, EOF, context); // Notify decoder of EOF.
  370.         final byte[] result = new byte[context.pos];
  371.         readResults(result, 0, result.length, context);
  372.         return result;
  373.     }

  374.     /**
  375.      * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet.
  376.      *
  377.      * @param pArray
  378.      *            a byte array containing binary data
  379.      * @return A byte array containing only the base N alphabetic character data
  380.      */
  381.     @Override
  382.     public byte[] encode(final byte[] pArray) {
  383.         if (pArray == null || pArray.length == 0) {
  384.             return pArray;
  385.         }
  386.         return encode(pArray, 0, pArray.length);
  387.     }

  388.     /**
  389.      * Encodes a byte[] containing binary data, into a byte[] containing
  390.      * characters in the alphabet.
  391.      *
  392.      * @param pArray
  393.      *            a byte array containing binary data
  394.      * @param offset
  395.      *            initial offset of the subarray.
  396.      * @param length
  397.      *            length of the subarray.
  398.      * @return A byte array containing only the base N alphabetic character data
  399.      * @since 1.11
  400.      */
  401.     public byte[] encode(final byte[] pArray, final int offset, final int length) {
  402.         if (pArray == null || pArray.length == 0) {
  403.             return pArray;
  404.         }
  405.         final Context context = new Context();
  406.         encode(pArray, offset, length, context);
  407.         encode(pArray, offset, EOF, context); // Notify encoder of EOF.
  408.         final byte[] buf = new byte[context.pos - context.readPos];
  409.         readResults(buf, 0, buf.length, context);
  410.         return buf;
  411.     }

  412.     // package protected for access from I/O streams
  413.     abstract void encode(byte[] pArray, int i, int length, Context context);

  414.     // package protected for access from I/O streams
  415.     abstract void decode(byte[] pArray, int i, int length, Context context);

  416.     /**
  417.      * Returns whether or not the <code>octet</code> is in the current alphabet.
  418.      * Does not allow whitespace or pad.
  419.      *
  420.      * @param value The value to test
  421.      *
  422.      * @return <code>true</code> if the value is defined in the current alphabet, <code>false</code> otherwise.
  423.      */
  424.     protected abstract boolean isInAlphabet(byte value);

  425.     /**
  426.      * Tests a given byte array to see if it contains only valid characters within the alphabet.
  427.      * The method optionally treats whitespace and pad as valid.
  428.      *
  429.      * @param arrayOctet byte array to test
  430.      * @param allowWSPad if <code>true</code>, then whitespace and PAD are also allowed
  431.      *
  432.      * @return <code>true</code> if all bytes are valid characters in the alphabet or if the byte array is empty;
  433.      *         <code>false</code>, otherwise
  434.      */
  435.     public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) {
  436.         for (final byte octet : arrayOctet) {
  437.             if (!isInAlphabet(octet) &&
  438.                     (!allowWSPad || (octet != pad) && !isWhiteSpace(octet))) {
  439.                 return false;
  440.             }
  441.         }
  442.         return true;
  443.     }

  444.     /**
  445.      * Tests a given String to see if it contains only valid characters within the alphabet.
  446.      * The method treats whitespace and PAD as valid.
  447.      *
  448.      * @param basen String to test
  449.      * @return <code>true</code> if all characters in the String are valid characters in the alphabet or if
  450.      *         the String is empty; <code>false</code>, otherwise
  451.      * @see #isInAlphabet(byte[], boolean)
  452.      */
  453.     public boolean isInAlphabet(final String basen) {
  454.         return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
  455.     }

  456.     /**
  457.      * Tests a given byte array to see if it contains any characters within the alphabet or PAD.
  458.      *
  459.      * Intended for use in checking line-ending arrays
  460.      *
  461.      * @param arrayOctet
  462.      *            byte array to test
  463.      * @return <code>true</code> if any byte is a valid character in the alphabet or PAD; <code>false</code> otherwise
  464.      */
  465.     protected boolean containsAlphabetOrPad(final byte[] arrayOctet) {
  466.         if (arrayOctet == null) {
  467.             return false;
  468.         }
  469.         for (final byte element : arrayOctet) {
  470.             if (pad == element || isInAlphabet(element)) {
  471.                 return true;
  472.             }
  473.         }
  474.         return false;
  475.     }

  476.     /**
  477.      * Calculates the amount of space needed to encode the supplied array.
  478.      *
  479.      * @param pArray byte[] array which will later be encoded
  480.      *
  481.      * @return amount of space needed to encoded the supplied array.
  482.      * Returns a long since a max-len array will require &gt; Integer.MAX_VALUE
  483.      */
  484.     public long getEncodedLength(final byte[] pArray) {
  485.         // Calculate non-chunked size - rounded up to allow for padding
  486.         // cast to long is needed to avoid possibility of overflow
  487.         long len = ((pArray.length + unencodedBlockSize-1)  / unencodedBlockSize) * (long) encodedBlockSize;
  488.         if (lineLength > 0) { // We're using chunking
  489.             // Round up to nearest multiple
  490.             len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength;
  491.         }
  492.         return len;
  493.     }
  494. }