View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.io.input;
19  
20  import static org.apache.commons.io.IOUtils.EOF;
21  
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.nio.ByteBuffer;
25  import java.nio.CharBuffer;
26  import java.nio.charset.CharacterCodingException;
27  import java.nio.charset.Charset;
28  import java.nio.charset.CharsetEncoder;
29  import java.nio.charset.CoderResult;
30  import java.nio.charset.CodingErrorAction;
31  
32  import org.apache.commons.io.Charsets;
33  import org.apache.commons.io.IOUtils;
34  import org.apache.commons.io.build.AbstractStreamBuilder;
35  import org.apache.commons.io.charset.CharsetEncoders;
36  import org.apache.commons.io.function.Uncheck;
37  
38  /**
39   * Implements an {@link InputStream} to read bytes from String, StringBuffer, StringBuilder or CharBuffer,
40   * encoded using the specified Charset. The Charset defaults to Charset.defaultCharset().
41   * <p>
42   * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}.
43   * </p>
44   * <p>
45   * To build an instance, use {@link Builder}.
46   * </p>
47   *
48   * @see Builder
49   * @since 2.2
50   */
51  public class CharSequenceInputStream extends InputStream {
52  
53      //@formatter:off
54      /**
55       * Builds a new {@link CharSequenceInputStream}.
56       *
57       * <p>
58       * For example:
59       * </p>
60       * <h2>Using a Charset</h2>
61       * <pre>{@code
62       * CharSequenceInputStream s = CharSequenceInputStream.builder()
63       *   .setBufferSize(8192)
64       *   .setCharSequence("String")
65       *   .setCharset(Charset.defaultCharset())
66       *   .get();}
67       * </pre>
68       * <h2>Using a CharsetEncoder</h2>
69       * <pre>{@code
70       * CharSequenceInputStream s = CharSequenceInputStream.builder()
71       *   .setBufferSize(8192)
72       *   .setCharSequence("String")
73       *   .setCharsetEncoder(Charset.defaultCharset().newEncoder()
74       *     .onMalformedInput(CodingErrorAction.REPLACE)
75       *     .onUnmappableCharacter(CodingErrorAction.REPLACE))
76       *   .get();}
77       * </pre>
78       *
79       * @see #get()
80       * @since 2.13.0
81       */
82      //@formatter:on
83      public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> {
84  
85          private CharsetEncoder charsetEncoder = newEncoder(getCharset());
86  
87          /**
88           * Constructs a new builder of {@link CharSequenceInputStream}.
89           */
90          public Builder() {
91              // empty
92          }
93  
94          /**
95           * Builds a new {@link CharSequenceInputStream}.
96           * <p>
97           * You must set an aspect that supports {@link #getCharSequence()}, otherwise, this method throws an exception.
98           * </p>
99           * <p>
100          * This builder uses the following aspects:
101          * </p>
102          * <ul>
103          * <li>{@link #getCharSequence()} gets the target aspect.</li>
104          * <li>{@link #getBufferSize()}</li>
105          * <li>{@link CharsetEncoder}</li>
106          * </ul>
107          *
108          * @return a new instance.
109          * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
110          * @see #getUnchecked()
111          */
112         @Override
113         public CharSequenceInputStream get() {
114             return Uncheck.get(() -> new CharSequenceInputStream(this));
115         }
116 
117         CharsetEncoder getCharsetEncoder() {
118             return charsetEncoder;
119         }
120 
121         @Override
122         public Builder setCharset(final Charset charset) {
123             super.setCharset(charset);
124             charsetEncoder = newEncoder(getCharset());
125             return this;
126         }
127 
128         /**
129          * Sets the charset encoder. Assumes that the caller has configured the encoder.
130          *
131          * @param newEncoder the charset encoder.
132          * @return {@code this} instance.
133          * @since 2.13.0
134          */
135         public Builder setCharsetEncoder(final CharsetEncoder newEncoder) {
136             charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault()));
137             super.setCharset(charsetEncoder.charset());
138             return this;
139         }
140 
141     }
142 
143     private static final int NO_MARK = -1;
144 
145     /**
146      * Constructs a new {@link Builder}.
147      *
148      * @return a new {@link Builder}.
149      * @since 2.12.0
150      */
151     public static Builder builder() {
152         return new Builder();
153     }
154 
155     private static CharsetEncoder newEncoder(final Charset charset) {
156         // @formatter:off
157         return Charsets.toCharset(charset).newEncoder()
158                 .onMalformedInput(CodingErrorAction.REPLACE)
159                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
160         // @formatter:on
161     }
162 
163     private final ByteBuffer bBuf;
164     private int bBufMark; // position in bBuf
165     private final CharBuffer cBuf;
166     private int cBufMark; // position in cBuf
167     private final CharsetEncoder charsetEncoder;
168 
169     private CharSequenceInputStream(final Builder builder) {
170         this.charsetEncoder = builder.charsetEncoder;
171         // Ensure that buffer is long enough to hold a complete character
172         this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(builder.charsetEncoder, builder.getBufferSize()));
173         this.bBuf.flip();
174         this.cBuf = CharBuffer.wrap(Uncheck.get(() -> builder.getCharSequence()));
175         this.cBufMark = NO_MARK;
176         this.bBufMark = NO_MARK;
177         try {
178             fillBuffer();
179         } catch (final CharacterCodingException ex) {
180             // Reset everything without filling the buffer
181             // so the same exception can be thrown again later.
182             this.bBuf.clear();
183             this.bBuf.flip();
184             this.cBuf.rewind();
185         }
186     }
187 
188     /**
189      * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
190      *
191      * @param cs the input character sequence.
192      * @param charset the character set name to use.
193      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
194      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
195      */
196     @Deprecated
197     public CharSequenceInputStream(final CharSequence cs, final Charset charset) {
198         this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
199     }
200 
201     /**
202      * Constructs a new instance.
203      *
204      * @param cs the input character sequence.
205      * @param charset the character set name to use, null maps to the default Charset.
206      * @param bufferSize the buffer size to use.
207      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
208      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
209      */
210     @Deprecated
211     public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) {
212         this(builder().setCharSequence(cs).setCharset(charset).setBufferSize(bufferSize));
213     }
214 
215     /**
216      * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
217      *
218      * @param cs the input character sequence.
219      * @param charset the character set name to use.
220      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
221      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
222      */
223     @Deprecated
224     public CharSequenceInputStream(final CharSequence cs, final String charset) {
225         this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
226     }
227 
228     /**
229      * Constructs a new instance.
230      *
231      * @param cs the input character sequence.
232      * @param charset the character set name to use, null maps to the default Charset.
233      * @param bufferSize the buffer size to use.
234      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
235      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
236      */
237     @Deprecated
238     public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) {
239         this(cs, Charsets.toCharset(charset), bufferSize);
240     }
241 
242     /**
243      * Gets a lower bound on the number of bytes remaining in the byte stream.
244      *
245      * @return the count of bytes that can be read without blocking (or returning EOF).
246      * @throws IOException if an error occurs (probably not possible).
247      */
248     @Override
249     public int available() throws IOException {
250         return this.bBuf.remaining();
251     }
252 
253     @Override
254     public void close() throws IOException {
255         bBuf.position(bBuf.limit());
256     }
257 
258     /**
259      * Fills the byte output buffer from the input char buffer.
260      *
261      * @throws CharacterCodingException
262      *             an error encoding data.
263      */
264     private void fillBuffer() throws CharacterCodingException {
265         this.bBuf.compact();
266         final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true);
267         if (result.isError()) {
268             result.throwException();
269         }
270         this.bBuf.flip();
271     }
272 
273     /**
274      * Gets the CharsetEncoder.
275      *
276      * @return the CharsetEncoder.
277      */
278     CharsetEncoder getCharsetEncoder() {
279         return charsetEncoder;
280     }
281 
282     /**
283      * {@inheritDoc}
284      *
285      * @param readLimit max read limit (ignored).
286      */
287     @Override
288     public synchronized void mark(final int readLimit) {
289         this.cBufMark = this.cBuf.position();
290         this.bBufMark = this.bBuf.position();
291         this.cBuf.mark();
292         this.bBuf.mark();
293         // It would be nice to be able to use mark & reset on the cBuf and bBuf;
294         // however the bBuf is re-used so that won't work
295     }
296 
297     @Override
298     public boolean markSupported() {
299         return true;
300     }
301 
302     @Override
303     public int read() throws IOException {
304         for (;;) {
305             if (this.bBuf.hasRemaining()) {
306                 return this.bBuf.get() & 0xFF;
307             }
308             fillBuffer();
309             if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
310                 return EOF;
311             }
312         }
313     }
314 
315     @Override
316     public int read(final byte[] b) throws IOException {
317         return read(b, 0, b.length);
318     }
319 
320     @Override
321     public int read(final byte[] array, int off, int len) throws IOException {
322         IOUtils.checkFromIndexSize(array, off, len);
323         if (len == 0) {
324             return 0; // must return 0 for zero length read
325         }
326         if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
327             return EOF;
328         }
329         int bytesRead = 0;
330         while (len > 0) {
331             if (this.bBuf.hasRemaining()) {
332                 final int chunk = Math.min(this.bBuf.remaining(), len);
333                 this.bBuf.get(array, off, chunk);
334                 off += chunk;
335                 len -= chunk;
336                 bytesRead += chunk;
337             } else {
338                 fillBuffer();
339                 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
340                     break;
341                 }
342             }
343         }
344         return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead;
345     }
346 
347     @Override
348     public synchronized void reset() throws IOException {
349         //
350         // This is not the most efficient implementation, as it re-encodes from the beginning.
351         //
352         // Since the bBuf is re-used, in general it's necessary to re-encode the data.
353         //
354         // It should be possible to apply some optimizations however:
355         // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since
356         // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is
357         // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to
358         // restart from there.
359         //
360         if (this.cBufMark != NO_MARK) {
361             // if cBuf is at 0, we have not started reading anything, so skip re-encoding
362             if (this.cBuf.position() != 0) {
363                 this.charsetEncoder.reset();
364                 this.cBuf.rewind();
365                 this.bBuf.rewind();
366                 this.bBuf.limit(0); // rewind does not clear the buffer
367                 while (this.cBuf.position() < this.cBufMark) {
368                     this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing)
369                     this.bBuf.limit(0);
370                     fillBuffer();
371                 }
372             }
373             if (this.cBuf.position() != this.cBufMark) {
374                 throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " +
375                         "expected=" + this.cBufMark);
376             }
377             this.bBuf.position(this.bBufMark);
378             this.cBufMark = NO_MARK;
379             this.bBufMark = NO_MARK;
380         }
381         mark(0);
382     }
383 
384     @Override
385     public long skip(long n) throws IOException {
386         //
387         // This could be made more efficient by using position to skip within the current buffer.
388         //
389         long skipped = 0;
390         while (n > 0 && available() > 0) {
391             this.read();
392             n--;
393             skipped++;
394         }
395         return skipped;
396     }
397 
398 }