View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.io.input;
19  
20  import static org.apache.commons.io.IOUtils.EOF;
21  
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.nio.ByteBuffer;
25  import java.nio.CharBuffer;
26  import java.nio.charset.CharacterCodingException;
27  import java.nio.charset.Charset;
28  import java.nio.charset.CharsetEncoder;
29  import java.nio.charset.CoderResult;
30  import java.nio.charset.CodingErrorAction;
31  import java.util.Objects;
32  
33  import org.apache.commons.io.Charsets;
34  import org.apache.commons.io.IOUtils;
35  import org.apache.commons.io.build.AbstractStreamBuilder;
36  import org.apache.commons.io.charset.CharsetEncoders;
37  import org.apache.commons.io.function.Uncheck;
38  
39  /**
40   * Implements an {@link InputStream} to read bytes from String, StringBuffer, StringBuilder or CharBuffer,
41   * encoded using the specified Charset. The Charset defaults to Charset.defaultCharset().
42   * <p>
43   * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}.
44   * </p>
45   * <p>
46   * To build an instance, use {@link Builder}.
47   * </p>
48   *
49   * @see Builder
50   * @since 2.2
51   */
52  public class CharSequenceInputStream extends InputStream {
53  
54      //@formatter:off
55      /**
56       * Builds a new {@link CharSequenceInputStream}.
57       *
58       * <p>
59       * For example:
60       * </p>
61       * <h2>Using a Charset</h2>
62       * <pre>{@code
63       * CharSequenceInputStream s = CharSequenceInputStream.builder()
64       *   .setBufferSize(8192)
65       *   .setCharSequence("String")
66       *   .setCharset(Charset.defaultCharset())
67       *   .get();}
68       * </pre>
69       * <h2>Using a CharsetEncoder</h2>
70       * <pre>{@code
71       * CharSequenceInputStream s = CharSequenceInputStream.builder()
72       *   .setBufferSize(8192)
73       *   .setCharSequence("String")
74       *   .setCharsetEncoder(Charset.defaultCharset().newEncoder()
75       *     .onMalformedInput(CodingErrorAction.REPLACE)
76       *     .onUnmappableCharacter(CodingErrorAction.REPLACE))
77       *   .get();}
78       * </pre>
79       *
80       * @see #get()
81       * @since 2.13.0
82       */
83      //@formatter:on
84      public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> {
85  
86          private CharsetEncoder charsetEncoder = newEncoder(getCharset());
87  
88          /**
89           * Constructs a new builder of {@link CharSequenceInputStream}.
90           */
91          public Builder() {
92              // empty
93          }
94  
95          /**
96           * Builds a new {@link CharSequenceInputStream}.
97           * <p>
98           * You must set an aspect that supports {@link #getCharSequence()}, otherwise, this method throws an exception.
99           * </p>
100          * <p>
101          * This builder uses the following aspects:
102          * </p>
103          * <ul>
104          * <li>{@link #getCharSequence()} gets the target aspect.</li>
105          * <li>{@link #getBufferSize()}</li>
106          * <li>{@link CharsetEncoder}</li>
107          * </ul>
108          *
109          * @return a new instance.
110          * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
111          * @see #getUnchecked()
112          */
113         @Override
114         public CharSequenceInputStream get() {
115             return Uncheck.get(() -> new CharSequenceInputStream(getCharSequence(), getBufferSize(), charsetEncoder));
116         }
117 
118         CharsetEncoder getCharsetEncoder() {
119             return charsetEncoder;
120         }
121 
122         @Override
123         public Builder setCharset(final Charset charset) {
124             super.setCharset(charset);
125             charsetEncoder = newEncoder(getCharset());
126             return this;
127         }
128 
129         /**
130          * Sets the charset encoder. Assumes that the caller has configured the encoder.
131          *
132          * @param newEncoder the charset encoder.
133          * @return {@code this} instance.
134          * @since 2.13.0
135          */
136         public Builder setCharsetEncoder(final CharsetEncoder newEncoder) {
137             charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault()));
138             super.setCharset(charsetEncoder.charset());
139             return this;
140         }
141 
142     }
143 
144     private static final int NO_MARK = -1;
145 
146     /**
147      * Constructs a new {@link Builder}.
148      *
149      * @return a new {@link Builder}.
150      * @since 2.12.0
151      */
152     public static Builder builder() {
153         return new Builder();
154     }
155 
156     private static CharsetEncoder newEncoder(final Charset charset) {
157         // @formatter:off
158         return Charsets.toCharset(charset).newEncoder()
159                 .onMalformedInput(CodingErrorAction.REPLACE)
160                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
161         // @formatter:on
162     }
163 
164     private final ByteBuffer bBuf;
165     private int bBufMark; // position in bBuf
166     private final CharBuffer cBuf;
167     private int cBufMark; // position in cBuf
168     private final CharsetEncoder charsetEncoder;
169 
170     /**
171      * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
172      *
173      * @param cs the input character sequence.
174      * @param charset the character set name to use.
175      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
176      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
177      */
178     @Deprecated
179     public CharSequenceInputStream(final CharSequence cs, final Charset charset) {
180         this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
181     }
182 
183     /**
184      * Constructs a new instance.
185      *
186      * @param cs the input character sequence.
187      * @param charset the character set name to use, null maps to the default Charset.
188      * @param bufferSize the buffer size to use.
189      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
190      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
191      */
192     @Deprecated
193     public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) {
194         // @formatter:off
195         this(cs, bufferSize, newEncoder(charset));
196         // @formatter:on
197     }
198 
199     private CharSequenceInputStream(final CharSequence cs, final int bufferSize, final CharsetEncoder charsetEncoder) {
200         this.charsetEncoder = charsetEncoder;
201         // Ensure that buffer is long enough to hold a complete character
202         this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(charsetEncoder, bufferSize));
203         this.bBuf.flip();
204         this.cBuf = CharBuffer.wrap(cs);
205         this.cBufMark = NO_MARK;
206         this.bBufMark = NO_MARK;
207         try {
208             fillBuffer();
209         } catch (final CharacterCodingException ex) {
210             // Reset everything without filling the buffer
211             // so the same exception can be thrown again later.
212             this.bBuf.clear();
213             this.bBuf.flip();
214             this.cBuf.rewind();
215         }
216     }
217 
218     /**
219      * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
220      *
221      * @param cs the input character sequence.
222      * @param charset the character set name to use.
223      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
224      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
225      */
226     @Deprecated
227     public CharSequenceInputStream(final CharSequence cs, final String charset) {
228         this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
229     }
230 
231     /**
232      * Constructs a new instance.
233      *
234      * @param cs the input character sequence.
235      * @param charset the character set name to use, null maps to the default Charset.
236      * @param bufferSize the buffer size to use.
237      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
238      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
239      */
240     @Deprecated
241     public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) {
242         this(cs, Charsets.toCharset(charset), bufferSize);
243     }
244 
245     /**
246      * Gets a lower bound on the number of bytes remaining in the byte stream.
247      *
248      * @return the count of bytes that can be read without blocking (or returning EOF).
249      * @throws IOException if an error occurs (probably not possible).
250      */
251     @Override
252     public int available() throws IOException {
253         return this.bBuf.remaining();
254     }
255 
256     @Override
257     public void close() throws IOException {
258         bBuf.position(bBuf.limit());
259     }
260 
261     /**
262      * Fills the byte output buffer from the input char buffer.
263      *
264      * @throws CharacterCodingException
265      *             an error encoding data.
266      */
267     private void fillBuffer() throws CharacterCodingException {
268         this.bBuf.compact();
269         final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true);
270         if (result.isError()) {
271             result.throwException();
272         }
273         this.bBuf.flip();
274     }
275 
276     /**
277      * Gets the CharsetEncoder.
278      *
279      * @return the CharsetEncoder.
280      */
281     CharsetEncoder getCharsetEncoder() {
282         return charsetEncoder;
283     }
284 
285     /**
286      * {@inheritDoc}
287      * @param readLimit max read limit (ignored).
288      */
289     @Override
290     public synchronized void mark(final int readLimit) {
291         this.cBufMark = this.cBuf.position();
292         this.bBufMark = this.bBuf.position();
293         this.cBuf.mark();
294         this.bBuf.mark();
295         // It would be nice to be able to use mark & reset on the cBuf and bBuf;
296         // however the bBuf is re-used so that won't work
297     }
298 
299     @Override
300     public boolean markSupported() {
301         return true;
302     }
303 
304     @Override
305     public int read() throws IOException {
306         for (;;) {
307             if (this.bBuf.hasRemaining()) {
308                 return this.bBuf.get() & 0xFF;
309             }
310             fillBuffer();
311             if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
312                 return EOF;
313             }
314         }
315     }
316 
317     @Override
318     public int read(final byte[] b) throws IOException {
319         return read(b, 0, b.length);
320     }
321 
322     @Override
323     public int read(final byte[] array, int off, int len) throws IOException {
324         Objects.requireNonNull(array, "array");
325         if (len < 0 || off + len > array.length) {
326             throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len);
327         }
328         if (len == 0) {
329             return 0; // must return 0 for zero length read
330         }
331         if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
332             return EOF;
333         }
334         int bytesRead = 0;
335         while (len > 0) {
336             if (this.bBuf.hasRemaining()) {
337                 final int chunk = Math.min(this.bBuf.remaining(), len);
338                 this.bBuf.get(array, off, chunk);
339                 off += chunk;
340                 len -= chunk;
341                 bytesRead += chunk;
342             } else {
343                 fillBuffer();
344                 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
345                     break;
346                 }
347             }
348         }
349         return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead;
350     }
351 
352     @Override
353     public synchronized void reset() throws IOException {
354         //
355         // This is not the most efficient implementation, as it re-encodes from the beginning.
356         //
357         // Since the bBuf is re-used, in general it's necessary to re-encode the data.
358         //
359         // It should be possible to apply some optimizations however:
360         // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since
361         // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is
362         // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to
363         // restart from there.
364         //
365         if (this.cBufMark != NO_MARK) {
366             // if cBuf is at 0, we have not started reading anything, so skip re-encoding
367             if (this.cBuf.position() != 0) {
368                 this.charsetEncoder.reset();
369                 this.cBuf.rewind();
370                 this.bBuf.rewind();
371                 this.bBuf.limit(0); // rewind does not clear the buffer
372                 while (this.cBuf.position() < this.cBufMark) {
373                     this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing)
374                     this.bBuf.limit(0);
375                     fillBuffer();
376                 }
377             }
378             if (this.cBuf.position() != this.cBufMark) {
379                 throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " +
380                         "expected=" + this.cBufMark);
381             }
382             this.bBuf.position(this.bBufMark);
383             this.cBufMark = NO_MARK;
384             this.bBufMark = NO_MARK;
385         }
386         mark(0);
387     }
388 
389     @Override
390     public long skip(long n) throws IOException {
391         //
392         // This could be made more efficient by using position to skip within the current buffer.
393         //
394         long skipped = 0;
395         while (n > 0 && available() > 0) {
396             this.read();
397             n--;
398             skipped++;
399         }
400         return skipped;
401     }
402 
403 }