View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.text.io;
19  
20  import java.io.FilterReader;
21  import java.io.IOException;
22  import java.io.Reader;
23  
24  import org.apache.commons.text.StringSubstitutor;
25  import org.apache.commons.text.TextStringBuilder;
26  import org.apache.commons.text.matcher.StringMatcher;
27  import org.apache.commons.text.matcher.StringMatcherFactory;
28  
29  /**
30   * A {@link Reader} that performs string substitution on a source {@code Reader} using a {@link StringSubstitutor}.
31   *
32   * <p>
33   * Using this Reader avoids reading a whole file into memory as a {@code String} to perform string substitution, for
34   * example, when a Servlet filters a file to a client.
35   * </p>
36   * <p>
37   * This class is not thread-safe.
38   * </p>
39   *
40   * @since 1.9
41   */
42  public class StringSubstitutorReader extends FilterReader {
43  
44      /** The end-of-stream character marker. */
45      private static final int EOS = -1;
46  
47      /** Our internal buffer. */
48      private final TextStringBuilder buffer = new TextStringBuilder();
49  
50      /** End-of-Stream flag. */
51      private boolean eos;
52  
53      /** Matches escaped variable starts. */
54      private final StringMatcher prefixEscapeMatcher;
55  
56      /** Internal buffer for {@link #read()} method. */
57      private final char[] read1CharBuffer = {0};
58  
59      /** The underlying StringSubstitutor. */
60      private final StringSubstitutor stringSubstitutor;
61  
62      /** We don't always want to drain the whole buffer. */
63      private int toDrain;
64  
65      /**
66       * Constructs a new instance.
67       *
68       * @param reader the underlying reader containing the template text known to the given {@code StringSubstitutor}.
69       * @param stringSubstitutor How to replace as we read.
70       * @throws NullPointerException if {@code reader} is {@code null}.
71       * @throws NullPointerException if {@code stringSubstitutor} is {@code null}.
72       */
73      public StringSubstitutorReader(final Reader reader, final StringSubstitutor stringSubstitutor) {
74          super(reader);
75          this.stringSubstitutor = new StringSubstitutor(stringSubstitutor);
76          this.prefixEscapeMatcher = StringMatcherFactory.INSTANCE.charMatcher(stringSubstitutor.getEscapeChar())
77              .andThen(stringSubstitutor.getVariablePrefixMatcher());
78      }
79  
80      /**
81       * Buffers the requested number of characters if available.
82       */
83      private int buffer(final int requestReadCount) throws IOException {
84          final int actualReadCount = buffer.readFrom(super.in, requestReadCount);
85          eos = actualReadCount == EOS;
86          return actualReadCount;
87      }
88  
89      /**
90       * Reads a requested number of chars from the underlying reader into the buffer. On EOS, set the state is DRAINING,
91       * drain, and return a drain count, otherwise, returns the actual read count.
92       */
93      private int bufferOrDrainOnEos(final int requestReadCount, final char[] target, final int targetIndex,
94          final int targetLength) throws IOException {
95          final int actualReadCount = buffer(requestReadCount);
96          return drainOnEos(actualReadCount, target, targetIndex, targetLength);
97      }
98  
99      /**
100      * Drains characters from our buffer to the given {@code target}.
101      */
102     private int drain(final char[] target, final int targetIndex, final int targetLength) {
103         final int actualLen = Math.min(buffer.length(), targetLength);
104         final int drainCount = buffer.drainChars(0, actualLen, target, targetIndex);
105         toDrain -= drainCount;
106         if (buffer.isEmpty() || toDrain == 0) {
107             // nothing or everything drained.
108             toDrain = 0;
109         }
110         return drainCount;
111     }
112 
113     /**
114      * Drains from the buffer to the target only if we are at EOS per the input count. If input count is EOS, drain and
115      * returns the drain count, otherwise return the input count. If draining, the state is set to DRAINING.
116      */
117     private int drainOnEos(final int readCountOrEos, final char[] target, final int targetIndex,
118         final int targetLength) {
119         if (readCountOrEos == EOS) {
120             // At EOS, drain.
121             if (buffer.isNotEmpty()) {
122                 toDrain = buffer.size();
123                 return drain(target, targetIndex, targetLength);
124             }
125             return EOS;
126         }
127         return readCountOrEos;
128     }
129 
130     /**
131      * Tests if our buffer matches the given string matcher at the given position in the buffer.
132      */
133     private boolean isBufferMatchAt(final StringMatcher stringMatcher, final int pos) {
134         return stringMatcher.isMatch(buffer, pos) == stringMatcher.size();
135     }
136 
137     /**
138      * Tests if we are draining.
139      */
140     private boolean isDraining() {
141         return toDrain > 0;
142     }
143 
144     /**
145      * Reads a single character.
146      *
147      * @return a character as an {@code int} or {@code -1} for end-of-stream.
148      * @throws IOException If an I/O error occurs
149      */
150     @Override
151     public int read() throws IOException {
152         int count = 0;
153         // ask until we get a char or EOS
154         do {
155             count = read(read1CharBuffer, 0, 1);
156             if (count == EOS) {
157                 return EOS;
158             }
159             // keep on buffering
160         } while (count < 1);
161         return read1CharBuffer[0];
162     }
163 
164     /**
165      * Reads characters into a portion of an array.
166      *
167      * @param target Target buffer.
168      * @param targetIndexIn Index in the target at which to start storing characters.
169      * @param targetLengthIn Maximum number of characters to read.
170      *
171      * @return The number of characters read, or -1 on end of stream.
172      * @throws IOException If an I/O error occurs
173      */
174     @Override
175     public int read(final char[] target, final int targetIndexIn, final int targetLengthIn) throws IOException {
176         // The whole thing is inefficient because we must look for a balanced suffix to match the starting prefix
177         // Trying to substitute an incomplete expression can perform replacements when it should not.
178         // At a high level:
179         // - if draining, drain until empty or target length hit
180         // - copy to target until we find a variable start
181         // - buffer until a balanced suffix is read, then substitute.
182         if (eos && buffer.isEmpty()) {
183             return EOS;
184         }
185         if (targetLengthIn <= 0) {
186             // short-circuit: ask nothing, give nothing
187             return 0;
188         }
189         // drain check
190         int targetIndex = targetIndexIn;
191         int targetLength = targetLengthIn;
192         if (isDraining()) {
193             // drain as much as possible
194             final int drainCount = drain(target, targetIndex, Math.min(toDrain, targetLength));
195             if (drainCount == targetLength) {
196                 // drained length requested, target is full, can only do more in the next invocation
197                 return targetLength;
198             }
199             // drained less than requested, target not full.
200             targetIndex += drainCount;
201             targetLength -= drainCount;
202         }
203         // BUFFER from the underlying reader
204         final int minReadLenPrefix = prefixEscapeMatcher.size();
205         // READ enough to test for an [optionally escaped] variable start
206         int readCount = buffer(readCount(minReadLenPrefix, 0));
207         if (buffer.length() < minReadLenPrefix && targetLength < minReadLenPrefix) {
208             // read less than minReadLenPrefix, no variable possible
209             final int drainCount = drain(target, targetIndex, targetLength);
210             targetIndex += drainCount;
211             final int targetSize = targetIndex - targetIndexIn;
212             return eos && targetSize <= 0 ? EOS : targetSize;
213         }
214         if (eos) {
215             // EOS
216             stringSubstitutor.replaceIn(buffer);
217             toDrain = buffer.size();
218             final int drainCount = drain(target, targetIndex, targetLength);
219             targetIndex += drainCount;
220             final int targetSize = targetIndex - targetIndexIn;
221             return eos && targetSize <= 0 ? EOS : targetSize;
222         }
223         // PREFIX
224         // buffer and drain until we find a variable start, escaped or plain.
225         int balance = 0;
226         final StringMatcher prefixMatcher = stringSubstitutor.getVariablePrefixMatcher();
227         int pos = 0;
228         while (targetLength > 0) {
229             if (isBufferMatchAt(prefixMatcher, 0)) {
230                 balance = 1;
231                 pos = prefixMatcher.size();
232                 break;
233             }
234             if (isBufferMatchAt(prefixEscapeMatcher, 0)) {
235                 balance = 1;
236                 pos = prefixEscapeMatcher.size();
237                 break;
238             }
239             // drain first char
240             final int drainCount = drain(target, targetIndex, 1);
241             targetIndex += drainCount;
242             targetLength -= drainCount;
243             if (buffer.size() < minReadLenPrefix) {
244                 readCount = bufferOrDrainOnEos(minReadLenPrefix, target, targetIndex, targetLength);
245                 if (eos || isDraining()) {
246                     // if draining, readCount is a drain count
247                     if (readCount != EOS) {
248                         targetIndex += readCount;
249                         targetLength -= readCount;
250                     }
251                     final int actual = targetIndex - targetIndexIn;
252                     return actual > 0 ? actual : EOS;
253                 }
254             }
255         }
256         // we found a variable start
257         if (targetLength <= 0) {
258             // no more room in target
259             return targetLengthIn;
260         }
261         // SUFFIX
262         // buffer more to find a balanced suffix
263         final StringMatcher suffixMatcher = stringSubstitutor.getVariableSuffixMatcher();
264         final int minReadLenSuffix = Math.max(minReadLenPrefix, suffixMatcher.size());
265         readCount = buffer(readCount(minReadLenSuffix, pos));
266         if (eos) {
267             // EOS
268             stringSubstitutor.replaceIn(buffer);
269             toDrain = buffer.size();
270             final int drainCount = drain(target, targetIndex, targetLength);
271             return targetIndex + drainCount - targetIndexIn;
272         }
273         // buffer and break out when we find the end or a balanced suffix
274         while (true) {
275             if (isBufferMatchAt(suffixMatcher, pos)) {
276                 balance--;
277                 pos++;
278                 if (balance == 0) {
279                     break;
280                 }
281             } else if (isBufferMatchAt(prefixMatcher, pos)) {
282                 balance++;
283                 pos += prefixMatcher.size();
284             } else if (isBufferMatchAt(prefixEscapeMatcher, pos)) {
285                 balance++;
286                 pos += prefixEscapeMatcher.size();
287             } else {
288                 pos++;
289             }
290             readCount = buffer(readCount(minReadLenSuffix, pos));
291             if (readCount == EOS && pos >= buffer.size()) {
292                 break;
293             }
294         }
295         // substitute
296         final int endPos = pos + 1;
297         final int leftover = Math.max(0, buffer.size() - pos);
298         stringSubstitutor.replaceIn(buffer, 0, Math.min(buffer.size(), endPos));
299         pos = buffer.size() - leftover;
300         final int drainLen = Math.min(targetLength, pos);
301         // only drain up to what we've substituted
302         toDrain = pos;
303         drain(target, targetIndex, drainLen);
304         return targetIndex - targetIndexIn + drainLen;
305     }
306 
307     /**
308      * Returns how many chars to attempt reading to have room in the buffer for {@code count} chars starting at position
309      * {@code pos}.
310      */
311     private int readCount(final int count, final int pos) {
312         final int avail = buffer.size() - pos;
313         return avail >= count ? 0 : count - avail;
314     }
315 
316 }