View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.io.input;
19  
20  import static org.apache.commons.io.IOUtils.EOF;
21  
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.nio.ByteBuffer;
25  import java.nio.CharBuffer;
26  import java.nio.charset.CharacterCodingException;
27  import java.nio.charset.Charset;
28  import java.nio.charset.CharsetEncoder;
29  import java.nio.charset.CoderResult;
30  import java.nio.charset.CodingErrorAction;
31  import java.util.Objects;
32  
33  import org.apache.commons.io.Charsets;
34  import org.apache.commons.io.IOUtils;
35  import org.apache.commons.io.build.AbstractStreamBuilder;
36  import org.apache.commons.io.charset.CharsetEncoders;
37  import org.apache.commons.io.function.Uncheck;
38  
39  /**
40   * Implements an {@link InputStream} to read from String, StringBuffer, StringBuilder or CharBuffer.
41   * <p>
42   * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}.
43   * </p>
44   *
45   * @since 2.2
46   */
47  public class CharSequenceInputStream extends InputStream {
48  
49      /**
50       * Builds a new {@link CharSequenceInputStream} instance.
51       * <p>
52       * For example:
53       * </p>
54       * <h2>Using a Charset</h2>
55       * <pre>{@code
56       * CharSequenceInputStream s = CharSequenceInputStream.builder()
57       *   .setBufferSize(8192)
58       *   .setCharSequence("String")
59       *   .setCharset(Charset.defaultCharset())
60       *   .get();}
61       * </pre>
62       * <h2>Using a CharsetEncoder</h2>
63       * <pre>{@code
64       * CharSequenceInputStream s = CharSequenceInputStream.builder()
65       *   .setBufferSize(8192)
66       *   .setCharSequence("String")
67       *   .setCharsetEncoder(Charset.defaultCharset().newEncoder()
68       *     .onMalformedInput(CodingErrorAction.REPLACE)
69       *     .onUnmappableCharacter(CodingErrorAction.REPLACE))
70       *   .get();}
71       * </pre>
72       *
73       * @since 2.13.0
74       */
75      public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> {
76  
77          private CharsetEncoder charsetEncoder = newEncoder(getCharset());
78  
79          /**
80           * Constructs a new instance.
81           * <p>
82           * This builder use the aspects the CharSequence, buffer size, and Charset.
83           * </p>
84           *
85           * @return a new instance.
86           * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
87           */
88          @Override
89          public CharSequenceInputStream get() {
90              return Uncheck.get(() -> new CharSequenceInputStream(getCharSequence(), getBufferSize(), charsetEncoder));
91          }
92  
93          CharsetEncoder getCharsetEncoder() {
94              return charsetEncoder;
95          }
96  
97          @Override
98          public Builder setCharset(final Charset charset) {
99              super.setCharset(charset);
100             charsetEncoder = newEncoder(getCharset());
101             return this;
102         }
103 
104         /**
105          * Sets the charset encoder. Assumes that the caller has configured the encoder.
106          *
107          * @param newEncoder the charset encoder.
108          * @return this
109          * @since 2.13.0
110          */
111         public Builder setCharsetEncoder(final CharsetEncoder newEncoder) {
112             charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault()));
113             super.setCharset(charsetEncoder.charset());
114             return this;
115         }
116 
117     }
118 
119     private static final int NO_MARK = -1;
120 
121     /**
122      * Constructs a new {@link Builder}.
123      *
124      * @return a new {@link Builder}.
125      * @since 2.12.0
126      */
127     public static Builder builder() {
128         return new Builder();
129     }
130 
131     private static CharsetEncoder newEncoder(final Charset charset) {
132         // @formatter:off
133         return Charsets.toCharset(charset).newEncoder()
134                 .onMalformedInput(CodingErrorAction.REPLACE)
135                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
136         // @formatter:on
137     }
138 
139     private final ByteBuffer bBuf;
140     private int bBufMark; // position in bBuf
141     private final CharBuffer cBuf;
142     private int cBufMark; // position in cBuf
143     private final CharsetEncoder charsetEncoder;
144 
145     /**
146      * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
147      *
148      * @param cs the input character sequence.
149      * @param charset the character set name to use.
150      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
151      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
152      */
153     @Deprecated
154     public CharSequenceInputStream(final CharSequence cs, final Charset charset) {
155         this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
156     }
157 
158     /**
159      * Constructs a new instance.
160      *
161      * @param cs the input character sequence.
162      * @param charset the character set name to use, null maps to the default Charset.
163      * @param bufferSize the buffer size to use.
164      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
165      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
166      */
167     @Deprecated
168     public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) {
169         // @formatter:off
170         this(cs, bufferSize, newEncoder(charset));
171         // @formatter:on
172     }
173 
174     private CharSequenceInputStream(final CharSequence cs, final int bufferSize, final CharsetEncoder charsetEncoder) {
175         this.charsetEncoder = charsetEncoder;
176         // Ensure that buffer is long enough to hold a complete character
177         this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(charsetEncoder, bufferSize));
178         this.bBuf.flip();
179         this.cBuf = CharBuffer.wrap(cs);
180         this.cBufMark = NO_MARK;
181         this.bBufMark = NO_MARK;
182     }
183 
184     /**
185      * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
186      *
187      * @param cs the input character sequence.
188      * @param charset the character set name to use.
189      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
190      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
191      */
192     @Deprecated
193     public CharSequenceInputStream(final CharSequence cs, final String charset) {
194         this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
195     }
196 
197     /**
198      * Constructs a new instance.
199      *
200      * @param cs the input character sequence.
201      * @param charset the character set name to use, null maps to the default Charset.
202      * @param bufferSize the buffer size to use.
203      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
204      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
205      */
206     @Deprecated
207     public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) {
208         this(cs, Charsets.toCharset(charset), bufferSize);
209     }
210 
211     /**
212      * Return an estimate of the number of bytes remaining in the byte stream.
213      * @return the count of bytes that can be read without blocking (or returning EOF).
214      *
215      * @throws IOException if an error occurs (probably not possible).
216      */
217     @Override
218     public int available() throws IOException {
219         // The cached entries are in bBuf; since encoding always creates at least one byte
220         // per character, we can add the two to get a better estimate (e.g. if bBuf is empty)
221         // Note that the implementation in 2.4 could return zero even though there were
222         // encoded bytes still available.
223         return this.bBuf.remaining() + this.cBuf.remaining();
224     }
225 
226     @Override
227     public void close() throws IOException {
228         // noop
229     }
230 
231     /**
232      * Fills the byte output buffer from the input char buffer.
233      *
234      * @throws CharacterCodingException
235      *             an error encoding data.
236      */
237     private void fillBuffer() throws CharacterCodingException {
238         this.bBuf.compact();
239         final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true);
240         if (result.isError()) {
241             result.throwException();
242         }
243         this.bBuf.flip();
244     }
245 
246     /**
247      * Gets the CharsetEncoder.
248      *
249      * @return the CharsetEncoder.
250      */
251     CharsetEncoder getCharsetEncoder() {
252         return charsetEncoder;
253     }
254 
255     /**
256      * {@inheritDoc}
257      * @param readLimit max read limit (ignored).
258      */
259     @Override
260     public synchronized void mark(final int readLimit) {
261         this.cBufMark = this.cBuf.position();
262         this.bBufMark = this.bBuf.position();
263         this.cBuf.mark();
264         this.bBuf.mark();
265         // It would be nice to be able to use mark & reset on the cBuf and bBuf;
266         // however the bBuf is re-used so that won't work
267     }
268 
269     @Override
270     public boolean markSupported() {
271         return true;
272     }
273 
274     @Override
275     public int read() throws IOException {
276         for (;;) {
277             if (this.bBuf.hasRemaining()) {
278                 return this.bBuf.get() & 0xFF;
279             }
280             fillBuffer();
281             if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
282                 return EOF;
283             }
284         }
285     }
286 
287     @Override
288     public int read(final byte[] b) throws IOException {
289         return read(b, 0, b.length);
290     }
291 
292     @Override
293     public int read(final byte[] array, int off, int len) throws IOException {
294         Objects.requireNonNull(array, "array");
295         if (len < 0 || off + len > array.length) {
296             throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len);
297         }
298         if (len == 0) {
299             return 0; // must return 0 for zero length read
300         }
301         if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
302             return EOF;
303         }
304         int bytesRead = 0;
305         while (len > 0) {
306             if (this.bBuf.hasRemaining()) {
307                 final int chunk = Math.min(this.bBuf.remaining(), len);
308                 this.bBuf.get(array, off, chunk);
309                 off += chunk;
310                 len -= chunk;
311                 bytesRead += chunk;
312             } else {
313                 fillBuffer();
314                 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
315                     break;
316                 }
317             }
318         }
319         return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead;
320     }
321 
322     @Override
323     public synchronized void reset() throws IOException {
324         //
325         // This is not the most efficient implementation, as it re-encodes from the beginning.
326         //
327         // Since the bBuf is re-used, in general it's necessary to re-encode the data.
328         //
329         // It should be possible to apply some optimizations however:
330         // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since
331         // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is
332         // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to
333         // restart from there.
334         //
335         if (this.cBufMark != NO_MARK) {
336             // if cBuf is at 0, we have not started reading anything, so skip re-encoding
337             if (this.cBuf.position() != 0) {
338                 this.charsetEncoder.reset();
339                 this.cBuf.rewind();
340                 this.bBuf.rewind();
341                 this.bBuf.limit(0); // rewind does not clear the buffer
342                 while (this.cBuf.position() < this.cBufMark) {
343                     this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing)
344                     this.bBuf.limit(0);
345                     fillBuffer();
346                 }
347             }
348             if (this.cBuf.position() != this.cBufMark) {
349                 throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " +
350                         "expected=" + this.cBufMark);
351             }
352             this.bBuf.position(this.bBufMark);
353             this.cBufMark = NO_MARK;
354             this.bBufMark = NO_MARK;
355         }
356     }
357 
358     @Override
359     public long skip(long n) throws IOException {
360         //
361         // This could be made more efficient by using position to skip within the current buffer.
362         //
363         long skipped = 0;
364         while (n > 0 && available() > 0) {
365             this.read();
366             n--;
367             skipped++;
368         }
369         return skipped;
370     }
371 
372 }