StringSubstitutorReader.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.text.io;

  18. import java.io.FilterReader;
  19. import java.io.IOException;
  20. import java.io.Reader;

  21. import org.apache.commons.text.StringSubstitutor;
  22. import org.apache.commons.text.TextStringBuilder;
  23. import org.apache.commons.text.matcher.StringMatcher;
  24. import org.apache.commons.text.matcher.StringMatcherFactory;

  25. /**
  26.  * A {@link Reader} that performs string substitution on a source {@code Reader} using a {@link StringSubstitutor}.
  27.  *
  28.  * <p>
  29.  * Using this Reader avoids reading a whole file into memory as a {@code String} to perform string substitution, for
  30.  * example, when a Servlet filters a file to a client.
  31.  * </p>
  32.  * <p>
  33.  * This class is not thread-safe.
  34.  * </p>
  35.  *
  36.  * @since 1.9
  37.  */
  38. public class StringSubstitutorReader extends FilterReader {

  39.     /** The end-of-stream character marker. */
  40.     private static final int EOS = -1;

  41.     /** Our internal buffer. */
  42.     private final TextStringBuilder buffer = new TextStringBuilder();

  43.     /** End-of-Stream flag. */
  44.     private boolean eos;

  45.     /** Matches escaped variable starts. */
  46.     private final StringMatcher prefixEscapeMatcher;

  47.     /** Internal buffer for {@link #read()} method. */
  48.     private final char[] read1CharBuffer = {0};

  49.     /** The underlying StringSubstitutor. */
  50.     private final StringSubstitutor stringSubstitutor;

  51.     /** We don't always want to drain the whole buffer. */
  52.     private int toDrain;

  53.     /**
  54.      * Constructs a new instance.
  55.      *
  56.      * @param reader the underlying reader containing the template text known to the given {@code StringSubstitutor}.
  57.      * @param stringSubstitutor How to replace as we read.
  58.      * @throws NullPointerException if {@code reader} is {@code null}.
  59.      * @throws NullPointerException if {@code stringSubstitutor} is {@code null}.
  60.      */
  61.     public StringSubstitutorReader(final Reader reader, final StringSubstitutor stringSubstitutor) {
  62.         super(reader);
  63.         this.stringSubstitutor = new StringSubstitutor(stringSubstitutor);
  64.         this.prefixEscapeMatcher = StringMatcherFactory.INSTANCE.charMatcher(stringSubstitutor.getEscapeChar())
  65.             .andThen(stringSubstitutor.getVariablePrefixMatcher());
  66.     }

  67.     /**
  68.      * Buffers the requested number of characters if available.
  69.      */
  70.     private int buffer(final int requestReadCount) throws IOException {
  71.         final int actualReadCount = buffer.readFrom(super.in, requestReadCount);
  72.         eos = actualReadCount == EOS;
  73.         return actualReadCount;
  74.     }

  75.     /**
  76.      * Reads a requested number of chars from the underlying reader into the buffer. On EOS, set the state is DRAINING,
  77.      * drain, and return a drain count, otherwise, returns the actual read count.
  78.      */
  79.     private int bufferOrDrainOnEos(final int requestReadCount, final char[] target, final int targetIndex,
  80.         final int targetLength) throws IOException {
  81.         final int actualReadCount = buffer(requestReadCount);
  82.         return drainOnEos(actualReadCount, target, targetIndex, targetLength);
  83.     }

  84.     /**
  85.      * Drains characters from our buffer to the given {@code target}.
  86.      */
  87.     private int drain(final char[] target, final int targetIndex, final int targetLength) {
  88.         final int actualLen = Math.min(buffer.length(), targetLength);
  89.         final int drainCount = buffer.drainChars(0, actualLen, target, targetIndex);
  90.         toDrain -= drainCount;
  91.         if (buffer.isEmpty() || toDrain == 0) {
  92.             // nothing or everything drained.
  93.             toDrain = 0;
  94.         }
  95.         return drainCount;
  96.     }

  97.     /**
  98.      * Drains from the buffer to the target only if we are at EOS per the input count. If input count is EOS, drain and
  99.      * returns the drain count, otherwise return the input count. If draining, the state is set to DRAINING.
  100.      */
  101.     private int drainOnEos(final int readCountOrEos, final char[] target, final int targetIndex,
  102.         final int targetLength) {
  103.         if (readCountOrEos == EOS) {
  104.             // At EOS, drain.
  105.             if (buffer.isNotEmpty()) {
  106.                 toDrain = buffer.size();
  107.                 return drain(target, targetIndex, targetLength);
  108.             }
  109.             return EOS;
  110.         }
  111.         return readCountOrEos;
  112.     }

  113.     /**
  114.      * Tests if our buffer matches the given string matcher at the given position in the buffer.
  115.      */
  116.     private boolean isBufferMatchAt(final StringMatcher stringMatcher, final int pos) {
  117.         return stringMatcher.isMatch(buffer, pos) == stringMatcher.size();
  118.     }

  119.     /**
  120.      * Tests if we are draining.
  121.      */
  122.     private boolean isDraining() {
  123.         return toDrain > 0;
  124.     }

  125.     /**
  126.      * Reads a single character.
  127.      *
  128.      * @return a character as an {@code int} or {@code -1} for end-of-stream.
  129.      * @throws IOException If an I/O error occurs
  130.      */
  131.     @Override
  132.     public int read() throws IOException {
  133.         int count = 0;
  134.         // ask until we get a char or EOS
  135.         do {
  136.             count = read(read1CharBuffer, 0, 1);
  137.             if (count == EOS) {
  138.                 return EOS;
  139.             }
  140.             // keep on buffering
  141.         } while (count < 1);
  142.         return read1CharBuffer[0];
  143.     }

  144.     /**
  145.      * Reads characters into a portion of an array.
  146.      *
  147.      * @param target Target buffer.
  148.      * @param targetIndexIn Index in the target at which to start storing characters.
  149.      * @param targetLengthIn Maximum number of characters to read.
  150.      * @return The number of characters read, or -1 on end of stream.
  151.      * @throws IOException If an I/O error occurs
  152.      */
  153.     @Override
  154.     public int read(final char[] target, final int targetIndexIn, final int targetLengthIn) throws IOException {
  155.         // The whole thing is inefficient because we must look for a balanced suffix to match the starting prefix
  156.         // Trying to substitute an incomplete expression can perform replacements when it should not.
  157.         // At a high level:
  158.         // - if draining, drain until empty or target length hit
  159.         // - copy to target until we find a variable start
  160.         // - buffer until a balanced suffix is read, then substitute.
  161.         if (eos && buffer.isEmpty()) {
  162.             return EOS;
  163.         }
  164.         if (targetLengthIn <= 0) {
  165.             // short-circuit: ask nothing, give nothing
  166.             return 0;
  167.         }
  168.         // drain check
  169.         int targetIndex = targetIndexIn;
  170.         int targetLength = targetLengthIn;
  171.         if (isDraining()) {
  172.             // drain as much as possible
  173.             final int drainCount = drain(target, targetIndex, Math.min(toDrain, targetLength));
  174.             if (drainCount == targetLength) {
  175.                 // drained length requested, target is full, can only do more in the next invocation
  176.                 return targetLength;
  177.             }
  178.             // drained less than requested, target not full.
  179.             targetIndex += drainCount;
  180.             targetLength -= drainCount;
  181.         }
  182.         // BUFFER from the underlying reader
  183.         final int minReadLenPrefix = prefixEscapeMatcher.size();
  184.         // READ enough to test for an [optionally escaped] variable start
  185.         int readCount = buffer(readCount(minReadLenPrefix, 0));
  186.         if (buffer.length() < minReadLenPrefix && targetLength < minReadLenPrefix) {
  187.             // read less than minReadLenPrefix, no variable possible
  188.             final int drainCount = drain(target, targetIndex, targetLength);
  189.             targetIndex += drainCount;
  190.             final int targetSize = targetIndex - targetIndexIn;
  191.             return eos && targetSize <= 0 ? EOS : targetSize;
  192.         }
  193.         if (eos) {
  194.             // EOS
  195.             stringSubstitutor.replaceIn(buffer);
  196.             toDrain = buffer.size();
  197.             final int drainCount = drain(target, targetIndex, targetLength);
  198.             targetIndex += drainCount;
  199.             final int targetSize = targetIndex - targetIndexIn;
  200.             return eos && targetSize <= 0 ? EOS : targetSize;
  201.         }
  202.         // PREFIX
  203.         // buffer and drain until we find a variable start, escaped or plain.
  204.         int balance = 0;
  205.         final StringMatcher prefixMatcher = stringSubstitutor.getVariablePrefixMatcher();
  206.         int pos = 0;
  207.         while (targetLength > 0) {
  208.             if (isBufferMatchAt(prefixMatcher, 0)) {
  209.                 balance = 1;
  210.                 pos = prefixMatcher.size();
  211.                 break;
  212.             }
  213.             if (isBufferMatchAt(prefixEscapeMatcher, 0)) {
  214.                 balance = 1;
  215.                 pos = prefixEscapeMatcher.size();
  216.                 break;
  217.             }
  218.             // drain first char
  219.             final int drainCount = drain(target, targetIndex, 1);
  220.             targetIndex += drainCount;
  221.             targetLength -= drainCount;
  222.             if (buffer.size() < minReadLenPrefix) {
  223.                 readCount = bufferOrDrainOnEos(minReadLenPrefix, target, targetIndex, targetLength);
  224.                 if (eos || isDraining()) {
  225.                     // if draining, readCount is a drain count
  226.                     if (readCount != EOS) {
  227.                         targetIndex += readCount;
  228.                         targetLength -= readCount;
  229.                     }
  230.                     final int actual = targetIndex - targetIndexIn;
  231.                     return actual > 0 ? actual : EOS;
  232.                 }
  233.             }
  234.         }
  235.         // we found a variable start
  236.         if (targetLength <= 0) {
  237.             // no more room in target
  238.             return targetLengthIn;
  239.         }
  240.         // SUFFIX
  241.         // buffer more to find a balanced suffix
  242.         final StringMatcher suffixMatcher = stringSubstitutor.getVariableSuffixMatcher();
  243.         final int minReadLenSuffix = Math.max(minReadLenPrefix, suffixMatcher.size());
  244.         readCount = buffer(readCount(minReadLenSuffix, pos));
  245.         if (eos) {
  246.             // EOS
  247.             stringSubstitutor.replaceIn(buffer);
  248.             toDrain = buffer.size();
  249.             final int drainCount = drain(target, targetIndex, targetLength);
  250.             return targetIndex + drainCount - targetIndexIn;
  251.         }
  252.         // buffer and break out when we find the end or a balanced suffix
  253.         while (true) {
  254.             if (isBufferMatchAt(suffixMatcher, pos)) {
  255.                 balance--;
  256.                 pos++;
  257.                 if (balance == 0) {
  258.                     break;
  259.                 }
  260.             } else if (isBufferMatchAt(prefixMatcher, pos)) {
  261.                 balance++;
  262.                 pos += prefixMatcher.size();
  263.             } else if (isBufferMatchAt(prefixEscapeMatcher, pos)) {
  264.                 balance++;
  265.                 pos += prefixEscapeMatcher.size();
  266.             } else {
  267.                 pos++;
  268.             }
  269.             readCount = buffer(readCount(minReadLenSuffix, pos));
  270.             if (readCount == EOS && pos >= buffer.size()) {
  271.                 break;
  272.             }
  273.         }
  274.         // substitute
  275.         final int endPos = pos + 1;
  276.         final int leftover = Math.max(0, buffer.size() - pos);
  277.         stringSubstitutor.replaceIn(buffer, 0, Math.min(buffer.size(), endPos));
  278.         pos = buffer.size() - leftover;
  279.         final int drainLen = Math.min(targetLength, pos);
  280.         // only drain up to what we've substituted
  281.         toDrain = pos;
  282.         drain(target, targetIndex, drainLen);
  283.         return targetIndex - targetIndexIn + drainLen;
  284.     }

  285.     /**
  286.      * Returns how many chars to attempt reading to have room in the buffer for {@code count} chars starting at position
  287.      * {@code pos}.
  288.      */
  289.     private int readCount(final int count, final int pos) {
  290.         final int avail = buffer.size() - pos;
  291.         return avail >= count ? 0 : count - avail;
  292.     }

  293. }