View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.io.input;
19  
20  import static org.apache.commons.io.IOUtils.EOF;
21  
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.nio.ByteBuffer;
25  import java.nio.CharBuffer;
26  import java.nio.charset.CharacterCodingException;
27  import java.nio.charset.Charset;
28  import java.nio.charset.CharsetEncoder;
29  import java.nio.charset.CoderResult;
30  import java.nio.charset.CodingErrorAction;
31  import java.util.Objects;
32  
33  import org.apache.commons.io.Charsets;
34  import org.apache.commons.io.IOUtils;
35  import org.apache.commons.io.build.AbstractStreamBuilder;
36  import org.apache.commons.io.charset.CharsetEncoders;
37  import org.apache.commons.io.function.Uncheck;
38  
39  /**
40   * Implements an {@link InputStream} to read bytes from String, StringBuffer, StringBuilder or CharBuffer,
41   * encoded using the specified Charset. The Charset defaults to Charset.defaultCharset().
42   * <p>
43   * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}.
44   * </p>
45   * <p>
46   * To build an instance, use {@link Builder}.
47   * </p>
48   *
49   * @see Builder
50   * @since 2.2
51   */
52  public class CharSequenceInputStream extends InputStream {
53  
54      //@formatter:off
55      /**
56       * Builds a new {@link CharSequenceInputStream}.
57       *
58       * <p>
59       * For example:
60       * </p>
61       * <h2>Using a Charset</h2>
62       * <pre>{@code
63       * CharSequenceInputStream s = CharSequenceInputStream.builder()
64       *   .setBufferSize(8192)
65       *   .setCharSequence("String")
66       *   .setCharset(Charset.defaultCharset())
67       *   .get();}
68       * </pre>
69       * <h2>Using a CharsetEncoder</h2>
70       * <pre>{@code
71       * CharSequenceInputStream s = CharSequenceInputStream.builder()
72       *   .setBufferSize(8192)
73       *   .setCharSequence("String")
74       *   .setCharsetEncoder(Charset.defaultCharset().newEncoder()
75       *     .onMalformedInput(CodingErrorAction.REPLACE)
76       *     .onUnmappableCharacter(CodingErrorAction.REPLACE))
77       *   .get();}
78       * </pre>
79       *
80       * @see #get()
81       * @since 2.13.0
82       */
83      //@formatter:on
84      public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> {
85  
86          private CharsetEncoder charsetEncoder = newEncoder(getCharset());
87  
88          /**
89           * Builds a new {@link CharSequenceInputStream}.
90           * <p>
91           * You must set input that supports {@link #getCharSequence()}, otherwise, this method throws an exception.
92           * </p>
93           * <p>
94           * This builder use the following aspects:
95           * </p>
96           * <ul>
97           * <li>{@link #getCharSequence()}</li>
98           * <li>{@link #getBufferSize()}</li>
99           * <li>{@link CharsetEncoder}</li>
100          * </ul>
101          *
102          * @return a new instance.
103          * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
104          */
105         @Override
106         public CharSequenceInputStream get() {
107             return Uncheck.get(() -> new CharSequenceInputStream(getCharSequence(), getBufferSize(), charsetEncoder));
108         }
109 
110         CharsetEncoder getCharsetEncoder() {
111             return charsetEncoder;
112         }
113 
114         @Override
115         public Builder setCharset(final Charset charset) {
116             super.setCharset(charset);
117             charsetEncoder = newEncoder(getCharset());
118             return this;
119         }
120 
121         /**
122          * Sets the charset encoder. Assumes that the caller has configured the encoder.
123          *
124          * @param newEncoder the charset encoder.
125          * @return this
126          * @since 2.13.0
127          */
128         public Builder setCharsetEncoder(final CharsetEncoder newEncoder) {
129             charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault()));
130             super.setCharset(charsetEncoder.charset());
131             return this;
132         }
133 
134     }
135 
136     private static final int NO_MARK = -1;
137 
138     /**
139      * Constructs a new {@link Builder}.
140      *
141      * @return a new {@link Builder}.
142      * @since 2.12.0
143      */
144     public static Builder builder() {
145         return new Builder();
146     }
147 
148     private static CharsetEncoder newEncoder(final Charset charset) {
149         // @formatter:off
150         return Charsets.toCharset(charset).newEncoder()
151                 .onMalformedInput(CodingErrorAction.REPLACE)
152                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
153         // @formatter:on
154     }
155 
156     private final ByteBuffer bBuf;
157     private int bBufMark; // position in bBuf
158     private final CharBuffer cBuf;
159     private int cBufMark; // position in cBuf
160     private final CharsetEncoder charsetEncoder;
161 
162     /**
163      * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
164      *
165      * @param cs the input character sequence.
166      * @param charset the character set name to use.
167      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
168      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
169      */
170     @Deprecated
171     public CharSequenceInputStream(final CharSequence cs, final Charset charset) {
172         this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
173     }
174 
175     /**
176      * Constructs a new instance.
177      *
178      * @param cs the input character sequence.
179      * @param charset the character set name to use, null maps to the default Charset.
180      * @param bufferSize the buffer size to use.
181      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
182      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
183      */
184     @Deprecated
185     public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) {
186         // @formatter:off
187         this(cs, bufferSize, newEncoder(charset));
188         // @formatter:on
189     }
190 
191     private CharSequenceInputStream(final CharSequence cs, final int bufferSize, final CharsetEncoder charsetEncoder) {
192         this.charsetEncoder = charsetEncoder;
193         // Ensure that buffer is long enough to hold a complete character
194         this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(charsetEncoder, bufferSize));
195         this.bBuf.flip();
196         this.cBuf = CharBuffer.wrap(cs);
197         this.cBufMark = NO_MARK;
198         this.bBufMark = NO_MARK;
199         try {
200             fillBuffer();
201         } catch (final CharacterCodingException ex) {
202             // Reset everything without filling the buffer
203             // so the same exception can be thrown again later.
204             this.bBuf.clear();
205             this.bBuf.flip();
206             this.cBuf.rewind();
207         }
208     }
209 
210     /**
211      * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
212      *
213      * @param cs the input character sequence.
214      * @param charset the character set name to use.
215      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
216      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
217      */
218     @Deprecated
219     public CharSequenceInputStream(final CharSequence cs, final String charset) {
220         this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
221     }
222 
223     /**
224      * Constructs a new instance.
225      *
226      * @param cs the input character sequence.
227      * @param charset the character set name to use, null maps to the default Charset.
228      * @param bufferSize the buffer size to use.
229      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
230      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
231      */
232     @Deprecated
233     public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) {
234         this(cs, Charsets.toCharset(charset), bufferSize);
235     }
236 
237     /**
238      * Gets a lower bound on the number of bytes remaining in the byte stream.
239      *
240      * @return the count of bytes that can be read without blocking (or returning EOF).
241      * @throws IOException if an error occurs (probably not possible).
242      */
243     @Override
244     public int available() throws IOException {
245         return this.bBuf.remaining();
246     }
247 
248     @Override
249     public void close() throws IOException {
250         // noop
251     }
252 
253     /**
254      * Fills the byte output buffer from the input char buffer.
255      *
256      * @throws CharacterCodingException
257      *             an error encoding data.
258      */
259     private void fillBuffer() throws CharacterCodingException {
260         this.bBuf.compact();
261         final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true);
262         if (result.isError()) {
263             result.throwException();
264         }
265         this.bBuf.flip();
266     }
267 
268     /**
269      * Gets the CharsetEncoder.
270      *
271      * @return the CharsetEncoder.
272      */
273     CharsetEncoder getCharsetEncoder() {
274         return charsetEncoder;
275     }
276 
277     /**
278      * {@inheritDoc}
279      * @param readLimit max read limit (ignored).
280      */
281     @Override
282     public synchronized void mark(final int readLimit) {
283         this.cBufMark = this.cBuf.position();
284         this.bBufMark = this.bBuf.position();
285         this.cBuf.mark();
286         this.bBuf.mark();
287         // It would be nice to be able to use mark & reset on the cBuf and bBuf;
288         // however the bBuf is re-used so that won't work
289     }
290 
291     @Override
292     public boolean markSupported() {
293         return true;
294     }
295 
296     @Override
297     public int read() throws IOException {
298         for (;;) {
299             if (this.bBuf.hasRemaining()) {
300                 return this.bBuf.get() & 0xFF;
301             }
302             fillBuffer();
303             if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
304                 return EOF;
305             }
306         }
307     }
308 
309     @Override
310     public int read(final byte[] b) throws IOException {
311         return read(b, 0, b.length);
312     }
313 
314     @Override
315     public int read(final byte[] array, int off, int len) throws IOException {
316         Objects.requireNonNull(array, "array");
317         if (len < 0 || off + len > array.length) {
318             throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len);
319         }
320         if (len == 0) {
321             return 0; // must return 0 for zero length read
322         }
323         if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
324             return EOF;
325         }
326         int bytesRead = 0;
327         while (len > 0) {
328             if (this.bBuf.hasRemaining()) {
329                 final int chunk = Math.min(this.bBuf.remaining(), len);
330                 this.bBuf.get(array, off, chunk);
331                 off += chunk;
332                 len -= chunk;
333                 bytesRead += chunk;
334             } else {
335                 fillBuffer();
336                 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
337                     break;
338                 }
339             }
340         }
341         return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead;
342     }
343 
344     @Override
345     public synchronized void reset() throws IOException {
346         //
347         // This is not the most efficient implementation, as it re-encodes from the beginning.
348         //
349         // Since the bBuf is re-used, in general it's necessary to re-encode the data.
350         //
351         // It should be possible to apply some optimizations however:
352         // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since
353         // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is
354         // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to
355         // restart from there.
356         //
357         if (this.cBufMark != NO_MARK) {
358             // if cBuf is at 0, we have not started reading anything, so skip re-encoding
359             if (this.cBuf.position() != 0) {
360                 this.charsetEncoder.reset();
361                 this.cBuf.rewind();
362                 this.bBuf.rewind();
363                 this.bBuf.limit(0); // rewind does not clear the buffer
364                 while (this.cBuf.position() < this.cBufMark) {
365                     this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing)
366                     this.bBuf.limit(0);
367                     fillBuffer();
368                 }
369             }
370             if (this.cBuf.position() != this.cBufMark) {
371                 throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " +
372                         "expected=" + this.cBufMark);
373             }
374             this.bBuf.position(this.bBufMark);
375             this.cBufMark = NO_MARK;
376             this.bBufMark = NO_MARK;
377         }
378         mark(0);
379     }
380 
381     @Override
382     public long skip(long n) throws IOException {
383         //
384         // This could be made more efficient by using position to skip within the current buffer.
385         //
386         long skipped = 0;
387         while (n > 0 && available() > 0) {
388             this.read();
389             n--;
390             skipped++;
391         }
392         return skipped;
393     }
394 
395 }