View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.text.io;
19  
20  import java.io.FilterReader;
21  import java.io.IOException;
22  import java.io.Reader;
23  
24  import org.apache.commons.text.StringSubstitutor;
25  import org.apache.commons.text.TextStringBuilder;
26  import org.apache.commons.text.matcher.StringMatcher;
27  import org.apache.commons.text.matcher.StringMatcherFactory;
28  
29  /**
30   * A {@link Reader} that performs string substitution on a source {@code Reader} using a {@link StringSubstitutor}.
31   *
32   * <p>
33   * Using this Reader avoids reading a whole file into memory as a {@code String} to perform string substitution, for
34   * example, when a Servlet filters a file to a client.
35   * </p>
36   * <p>
37   * This class is not thread-safe.
38   * </p>
39   *
40   * @since 1.9
41   */
42  public class StringSubstitutorReader extends FilterReader {
43  
44      /** The end-of-stream character marker. */
45      private static final int EOS = -1;
46  
47      /** Our internal buffer. */
48      private final TextStringBuilder buffer = new TextStringBuilder();
49  
50      /** End-of-Stream flag. */
51      private boolean eos;
52  
53      /** Matches escaped variable starts. */
54      private final StringMatcher prefixEscapeMatcher;
55  
56      /** Internal buffer for {@link #read()} method. */
57      private final char[] read1CharBuffer = {0};
58  
59      /** The underlying StringSubstitutor. */
60      private final StringSubstitutor stringSubstitutor;
61  
62      /** We don't always want to drain the whole buffer. */
63      private int toDrain;
64  
65      /**
66       * Constructs a new instance.
67       *
68       * @param reader the underlying reader containing the template text known to the given {@code StringSubstitutor}.
69       * @param stringSubstitutor How to replace as we read.
70       * @throws NullPointerException if {@code reader} is {@code null}.
71       * @throws NullPointerException if {@code stringSubstitutor} is {@code null}.
72       */
73      public StringSubstitutorReader(final Reader reader, final StringSubstitutor stringSubstitutor) {
74          super(reader);
75          this.stringSubstitutor = new StringSubstitutor(stringSubstitutor);
76          this.prefixEscapeMatcher = StringMatcherFactory.INSTANCE.charMatcher(stringSubstitutor.getEscapeChar())
77              .andThen(stringSubstitutor.getVariablePrefixMatcher());
78      }
79  
80      /**
81       * Buffers the requested number of characters if available.
82       */
83      private int buffer(final int requestReadCount) throws IOException {
84          final int actualReadCount = buffer.readFrom(super.in, requestReadCount);
85          eos = actualReadCount == EOS;
86          return actualReadCount;
87      }
88  
89      /**
90       * Reads a requested number of chars from the underlying reader into the buffer. On EOS, set the state is DRAINING,
91       * drain, and return a drain count, otherwise, returns the actual read count.
92       */
93      private int bufferOrDrainOnEos(final int requestReadCount, final char[] target, final int targetIndex,
94          final int targetLength) throws IOException {
95          final int actualReadCount = buffer(requestReadCount);
96          return drainOnEos(actualReadCount, target, targetIndex, targetLength);
97      }
98  
99      /**
100      * Drains characters from our buffer to the given {@code target}.
101      */
102     private int drain(final char[] target, final int targetIndex, final int targetLength) {
103         final int actualLen = Math.min(buffer.length(), targetLength);
104         final int drainCount = buffer.drainChars(0, actualLen, target, targetIndex);
105         toDrain -= drainCount;
106         if (buffer.isEmpty() || toDrain == 0) {
107             // nothing or everything drained.
108             toDrain = 0;
109         }
110         return drainCount;
111     }
112 
113     /**
114      * Drains from the buffer to the target only if we are at EOS per the input count. If input count is EOS, drain and
115      * returns the drain count, otherwise return the input count. If draining, the state is set to DRAINING.
116      */
117     private int drainOnEos(final int readCountOrEos, final char[] target, final int targetIndex,
118         final int targetLength) {
119         if (readCountOrEos == EOS) {
120             // At EOS, drain.
121             if (buffer.isNotEmpty()) {
122                 toDrain = buffer.size();
123                 return drain(target, targetIndex, targetLength);
124             }
125             return EOS;
126         }
127         return readCountOrEos;
128     }
129 
130     /**
131      * Tests if our buffer matches the given string matcher at the given position in the buffer.
132      */
133     private boolean isBufferMatchAt(final StringMatcher stringMatcher, final int pos) {
134         return stringMatcher.isMatch(buffer, pos) == stringMatcher.size();
135     }
136 
137     /**
138      * Tests if we are draining.
139      */
140     private boolean isDraining() {
141         return toDrain > 0;
142     }
143 
144     /**
145      * Reads a single character.
146      *
147      * @return a character as an {@code int} or {@code -1} for end-of-stream.
148      * @throws IOException If an I/O error occurs
149      */
150     @Override
151     public int read() throws IOException {
152         int count = 0;
153         // ask until we get a char or EOS
154         do {
155             count = read(read1CharBuffer, 0, 1);
156             if (count == EOS) {
157                 return EOS;
158             }
159             // keep on buffering
160         } while (count < 1);
161         return read1CharBuffer[0];
162     }
163 
164     /**
165      * Reads characters into a portion of an array.
166      *
167      * @param target Target buffer.
168      * @param targetIndexIn Index in the target at which to start storing characters.
169      * @param targetLengthIn Maximum number of characters to read.
170      * @return The number of characters read, or -1 on end of stream.
171      * @throws IOException If an I/O error occurs
172      */
173     @Override
174     public int read(final char[] target, final int targetIndexIn, final int targetLengthIn) throws IOException {
175         // The whole thing is inefficient because we must look for a balanced suffix to match the starting prefix
176         // Trying to substitute an incomplete expression can perform replacements when it should not.
177         // At a high level:
178         // - if draining, drain until empty or target length hit
179         // - copy to target until we find a variable start
180         // - buffer until a balanced suffix is read, then substitute.
181         if (eos && buffer.isEmpty()) {
182             return EOS;
183         }
184         if (targetLengthIn <= 0) {
185             // short-circuit: ask nothing, give nothing
186             return 0;
187         }
188         // drain check
189         int targetIndex = targetIndexIn;
190         int targetLength = targetLengthIn;
191         if (isDraining()) {
192             // drain as much as possible
193             final int drainCount = drain(target, targetIndex, Math.min(toDrain, targetLength));
194             if (drainCount == targetLength) {
195                 // drained length requested, target is full, can only do more in the next invocation
196                 return targetLength;
197             }
198             // drained less than requested, target not full.
199             targetIndex += drainCount;
200             targetLength -= drainCount;
201         }
202         // BUFFER from the underlying reader
203         final int minReadLenPrefix = prefixEscapeMatcher.size();
204         // READ enough to test for an [optionally escaped] variable start
205         int readCount = buffer(readCount(minReadLenPrefix, 0));
206         if (buffer.length() < minReadLenPrefix && targetLength < minReadLenPrefix) {
207             // read less than minReadLenPrefix, no variable possible
208             final int drainCount = drain(target, targetIndex, targetLength);
209             targetIndex += drainCount;
210             final int targetSize = targetIndex - targetIndexIn;
211             return eos && targetSize <= 0 ? EOS : targetSize;
212         }
213         if (eos) {
214             // EOS
215             stringSubstitutor.replaceIn(buffer);
216             toDrain = buffer.size();
217             final int drainCount = drain(target, targetIndex, targetLength);
218             targetIndex += drainCount;
219             final int targetSize = targetIndex - targetIndexIn;
220             return eos && targetSize <= 0 ? EOS : targetSize;
221         }
222         // PREFIX
223         // buffer and drain until we find a variable start, escaped or plain.
224         int balance = 0;
225         final StringMatcher prefixMatcher = stringSubstitutor.getVariablePrefixMatcher();
226         int pos = 0;
227         while (targetLength > 0) {
228             if (isBufferMatchAt(prefixMatcher, 0)) {
229                 balance = 1;
230                 pos = prefixMatcher.size();
231                 break;
232             }
233             if (isBufferMatchAt(prefixEscapeMatcher, 0)) {
234                 balance = 1;
235                 pos = prefixEscapeMatcher.size();
236                 break;
237             }
238             // drain first char
239             final int drainCount = drain(target, targetIndex, 1);
240             targetIndex += drainCount;
241             targetLength -= drainCount;
242             if (buffer.size() < minReadLenPrefix) {
243                 readCount = bufferOrDrainOnEos(minReadLenPrefix, target, targetIndex, targetLength);
244                 if (eos || isDraining()) {
245                     // if draining, readCount is a drain count
246                     if (readCount != EOS) {
247                         targetIndex += readCount;
248                         targetLength -= readCount;
249                     }
250                     final int actual = targetIndex - targetIndexIn;
251                     return actual > 0 ? actual : EOS;
252                 }
253             }
254         }
255         // we found a variable start
256         if (targetLength <= 0) {
257             // no more room in target
258             return targetLengthIn;
259         }
260         // SUFFIX
261         // buffer more to find a balanced suffix
262         final StringMatcher suffixMatcher = stringSubstitutor.getVariableSuffixMatcher();
263         final int minReadLenSuffix = Math.max(minReadLenPrefix, suffixMatcher.size());
264         readCount = buffer(readCount(minReadLenSuffix, pos));
265         if (eos) {
266             // EOS
267             stringSubstitutor.replaceIn(buffer);
268             toDrain = buffer.size();
269             final int drainCount = drain(target, targetIndex, targetLength);
270             return targetIndex + drainCount - targetIndexIn;
271         }
272         // buffer and break out when we find the end or a balanced suffix
273         while (true) {
274             if (isBufferMatchAt(suffixMatcher, pos)) {
275                 balance--;
276                 pos++;
277                 if (balance == 0) {
278                     break;
279                 }
280             } else if (isBufferMatchAt(prefixMatcher, pos)) {
281                 balance++;
282                 pos += prefixMatcher.size();
283             } else if (isBufferMatchAt(prefixEscapeMatcher, pos)) {
284                 balance++;
285                 pos += prefixEscapeMatcher.size();
286             } else {
287                 pos++;
288             }
289             readCount = buffer(readCount(minReadLenSuffix, pos));
290             if (readCount == EOS && pos >= buffer.size()) {
291                 break;
292             }
293         }
294         // substitute
295         final int endPos = pos + 1;
296         final int leftover = Math.max(0, buffer.size() - pos);
297         stringSubstitutor.replaceIn(buffer, 0, Math.min(buffer.size(), endPos));
298         pos = buffer.size() - leftover;
299         final int drainLen = Math.min(targetLength, pos);
300         // only drain up to what we've substituted
301         toDrain = pos;
302         drain(target, targetIndex, drainLen);
303         return targetIndex - targetIndexIn + drainLen;
304     }
305 
306     /**
307      * Returns how many chars to attempt reading to have room in the buffer for {@code count} chars starting at position
308      * {@code pos}.
309      */
310     private int readCount(final int count, final int pos) {
311         final int avail = buffer.size() - pos;
312         return avail >= count ? 0 : count - avail;
313     }
314 
315 }