View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.io.input;
19  
20  import static org.apache.commons.io.IOUtils.EOF;
21  
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.nio.ByteBuffer;
25  import java.nio.CharBuffer;
26  import java.nio.charset.CharacterCodingException;
27  import java.nio.charset.Charset;
28  import java.nio.charset.CharsetEncoder;
29  import java.nio.charset.CoderResult;
30  import java.nio.charset.CodingErrorAction;
31  
32  import org.apache.commons.io.Charsets;
33  import org.apache.commons.io.IOUtils;
34  import org.apache.commons.io.build.AbstractStreamBuilder;
35  import org.apache.commons.io.charset.CharsetEncoders;
36  import org.apache.commons.io.function.Uncheck;
37  
38  /**
39   * Implements an {@link InputStream} to read bytes from String, StringBuffer, StringBuilder or CharBuffer,
40   * encoded using the specified Charset. The Charset defaults to Charset.defaultCharset().
41   * <p>
42   * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}.
43   * </p>
44   * <p>
45   * To build an instance, use {@link Builder}.
46   * </p>
47   *
48   * @see Builder
49   * @since 2.2
50   */
51  public class CharSequenceInputStream extends InputStream {
52  
53      //@formatter:off
54      /**
55       * Builds a new {@link CharSequenceInputStream}.
56       *
57       * <p>
58       * For example:
59       * </p>
60       * <h2>Using a Charset</h2>
61       * <pre>{@code
62       * CharSequenceInputStream s = CharSequenceInputStream.builder()
63       *   .setBufferSize(8192)
64       *   .setCharSequence("String")
65       *   .setCharset(Charset.defaultCharset())
66       *   .get();}
67       * </pre>
68       * <h2>Using a CharsetEncoder</h2>
69       * <pre>{@code
70       * CharSequenceInputStream s = CharSequenceInputStream.builder()
71       *   .setBufferSize(8192)
72       *   .setCharSequence("String")
73       *   .setCharsetEncoder(Charset.defaultCharset().newEncoder()
74       *     .onMalformedInput(CodingErrorAction.REPLACE)
75       *     .onUnmappableCharacter(CodingErrorAction.REPLACE))
76       *   .get();}
77       * </pre>
78       *
79       * @see #get()
80       * @since 2.13.0
81       */
82      //@formatter:on
83      public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> {
84  
85          private CharsetEncoder charsetEncoder = newEncoder(getCharset());
86  
87          /**
88           * Constructs a new builder of {@link CharSequenceInputStream}.
89           */
90          public Builder() {
91              // empty
92          }
93  
94          /**
95           * Builds a new {@link CharSequenceInputStream}.
96           * <p>
97           * You must set an aspect that supports {@link #getCharSequence()}, otherwise, this method throws an exception.
98           * </p>
99           * <p>
100          * This builder uses the following aspects:
101          * </p>
102          * <ul>
103          * <li>{@link #getCharSequence()} gets the target aspect.</li>
104          * <li>{@link #getBufferSize()}</li>
105          * <li>{@link CharsetEncoder}</li>
106          * </ul>
107          *
108          * @return a new instance.
109          * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
110          * @see #getUnchecked()
111          */
112         @Override
113         public CharSequenceInputStream get() {
114             return Uncheck.get(() -> new CharSequenceInputStream(this));
115         }
116 
117         CharsetEncoder getCharsetEncoder() {
118             return charsetEncoder;
119         }
120 
121         @Override
122         public Builder setCharset(final Charset charset) {
123             super.setCharset(charset);
124             charsetEncoder = newEncoder(getCharset());
125             return this;
126         }
127 
128         /**
129          * Sets the charset encoder. Assumes that the caller has configured the encoder.
130          *
131          * @param newEncoder the charset encoder.
132          * @return {@code this} instance.
133          * @since 2.13.0
134          */
135         public Builder setCharsetEncoder(final CharsetEncoder newEncoder) {
136             charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault()));
137             super.setCharset(charsetEncoder.charset());
138             return this;
139         }
140 
141     }
142 
143     private static final int NO_MARK = -1;
144 
145     /**
146      * Constructs a new {@link Builder}.
147      *
148      * @return a new {@link Builder}.
149      * @since 2.12.0
150      */
151     public static Builder builder() {
152         return new Builder();
153     }
154 
155     private static CharsetEncoder newEncoder(final Charset charset) {
156         // @formatter:off
157         return Charsets.toCharset(charset).newEncoder()
158                 .onMalformedInput(CodingErrorAction.REPLACE)
159                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
160         // @formatter:on
161     }
162 
163     private final ByteBuffer bBuf;
164     private int bBufMark; // position in bBuf
165     private final CharBuffer cBuf;
166     private int cBufMark; // position in cBuf
167     private final CharsetEncoder charsetEncoder;
168 
169     private CharSequenceInputStream(final Builder builder) {
170         this.charsetEncoder = builder.charsetEncoder;
171         // Ensure that buffer is long enough to hold a complete character
172         this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(builder.charsetEncoder, builder.getBufferSize()));
173         this.bBuf.flip();
174         this.cBuf = CharBuffer.wrap(Uncheck.get(() -> builder.getCharSequence()));
175         this.cBufMark = NO_MARK;
176         this.bBufMark = NO_MARK;
177         try {
178             fillBuffer();
179         } catch (final CharacterCodingException ex) {
180             // Reset everything without filling the buffer
181             // so the same exception can be thrown again later.
182             this.bBuf.clear();
183             this.bBuf.flip();
184             this.cBuf.rewind();
185         }
186     }
187 
188     /**
189      * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
190      *
191      * @param cs the input character sequence.
192      * @param charset the character set name to use.
193      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
194      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
195      */
196     @Deprecated
197     public CharSequenceInputStream(final CharSequence cs, final Charset charset) {
198         this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
199     }
200 
201     /**
202      * Constructs a new instance.
203      *
204      * @param cs the input character sequence.
205      * @param charset the character set name to use, null maps to the default Charset.
206      * @param bufferSize the buffer size to use.
207      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
208      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
209      */
210     @Deprecated
211     public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) {
212         this(builder().setCharSequence(cs).setCharset(charset).setBufferSize(bufferSize));
213     }
214 
215     /**
216      * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
217      *
218      * @param cs the input character sequence.
219      * @param charset the character set name to use.
220      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
221      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
222      */
223     @Deprecated
224     public CharSequenceInputStream(final CharSequence cs, final String charset) {
225         this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
226     }
227 
228     /**
229      * Constructs a new instance.
230      *
231      * @param cs the input character sequence.
232      * @param charset the character set name to use, null maps to the default Charset.
233      * @param bufferSize the buffer size to use.
234      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
235      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
236      */
237     @Deprecated
238     public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) {
239         this(cs, Charsets.toCharset(charset), bufferSize);
240     }
241 
242     /**
243      * Gets a lower bound on the number of bytes remaining in the byte stream.
244      *
245      * @return the count of bytes that can be read without blocking (or returning EOF).
246      * @throws IOException if an error occurs (probably not possible).
247      */
248     @Override
249     public int available() throws IOException {
250         return this.bBuf.remaining();
251     }
252 
253     @Override
254     public void close() throws IOException {
255         bBuf.position(bBuf.limit());
256     }
257 
258     /**
259      * Fills the byte output buffer from the input char buffer.
260      *
261      * @throws CharacterCodingException
262      *             an error encoding data.
263      */
264     private void fillBuffer() throws CharacterCodingException {
265         this.bBuf.compact();
266         final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true);
267         if (result.isError()) {
268             result.throwException();
269         }
270         this.bBuf.flip();
271     }
272 
273     /**
274      * Gets the CharsetEncoder.
275      *
276      * @return the CharsetEncoder.
277      */
278     CharsetEncoder getCharsetEncoder() {
279         return charsetEncoder;
280     }
281 
282     /**
283      * {@inheritDoc}
284      * @param readLimit max read limit (ignored).
285      */
286     @Override
287     public synchronized void mark(final int readLimit) {
288         this.cBufMark = this.cBuf.position();
289         this.bBufMark = this.bBuf.position();
290         this.cBuf.mark();
291         this.bBuf.mark();
292         // It would be nice to be able to use mark & reset on the cBuf and bBuf;
293         // however the bBuf is re-used so that won't work
294     }
295 
296     @Override
297     public boolean markSupported() {
298         return true;
299     }
300 
301     @Override
302     public int read() throws IOException {
303         for (;;) {
304             if (this.bBuf.hasRemaining()) {
305                 return this.bBuf.get() & 0xFF;
306             }
307             fillBuffer();
308             if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
309                 return EOF;
310             }
311         }
312     }
313 
314     @Override
315     public int read(final byte[] b) throws IOException {
316         return read(b, 0, b.length);
317     }
318 
319     @Override
320     public int read(final byte[] array, int off, int len) throws IOException {
321         IOUtils.checkFromIndexSize(array, off, len);
322         if (len == 0) {
323             return 0; // must return 0 for zero length read
324         }
325         if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
326             return EOF;
327         }
328         int bytesRead = 0;
329         while (len > 0) {
330             if (this.bBuf.hasRemaining()) {
331                 final int chunk = Math.min(this.bBuf.remaining(), len);
332                 this.bBuf.get(array, off, chunk);
333                 off += chunk;
334                 len -= chunk;
335                 bytesRead += chunk;
336             } else {
337                 fillBuffer();
338                 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
339                     break;
340                 }
341             }
342         }
343         return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead;
344     }
345 
346     @Override
347     public synchronized void reset() throws IOException {
348         //
349         // This is not the most efficient implementation, as it re-encodes from the beginning.
350         //
351         // Since the bBuf is re-used, in general it's necessary to re-encode the data.
352         //
353         // It should be possible to apply some optimizations however:
354         // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since
355         // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is
356         // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to
357         // restart from there.
358         //
359         if (this.cBufMark != NO_MARK) {
360             // if cBuf is at 0, we have not started reading anything, so skip re-encoding
361             if (this.cBuf.position() != 0) {
362                 this.charsetEncoder.reset();
363                 this.cBuf.rewind();
364                 this.bBuf.rewind();
365                 this.bBuf.limit(0); // rewind does not clear the buffer
366                 while (this.cBuf.position() < this.cBufMark) {
367                     this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing)
368                     this.bBuf.limit(0);
369                     fillBuffer();
370                 }
371             }
372             if (this.cBuf.position() != this.cBufMark) {
373                 throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " +
374                         "expected=" + this.cBufMark);
375             }
376             this.bBuf.position(this.bBufMark);
377             this.cBufMark = NO_MARK;
378             this.bBufMark = NO_MARK;
379         }
380         mark(0);
381     }
382 
383     @Override
384     public long skip(long n) throws IOException {
385         //
386         // This could be made more efficient by using position to skip within the current buffer.
387         //
388         long skipped = 0;
389         while (n > 0 && available() > 0) {
390             this.read();
391             n--;
392             skipped++;
393         }
394         return skipped;
395     }
396 
397 }