View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.io.input;
19  
20  import static org.apache.commons.io.IOUtils.EOF;
21  
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.nio.ByteBuffer;
25  import java.nio.CharBuffer;
26  import java.nio.charset.CharacterCodingException;
27  import java.nio.charset.Charset;
28  import java.nio.charset.CharsetEncoder;
29  import java.nio.charset.CoderResult;
30  import java.nio.charset.CodingErrorAction;
31  import java.util.Objects;
32  
33  import org.apache.commons.io.Charsets;
34  import org.apache.commons.io.IOUtils;
35  import org.apache.commons.io.build.AbstractStreamBuilder;
36  import org.apache.commons.io.charset.CharsetEncoders;
37  import org.apache.commons.io.function.Uncheck;
38  
39  /**
40   * Implements an {@link InputStream} to read bytes from String, StringBuffer, StringBuilder or CharBuffer,
41   * encoded using the specified Charset. The Charset defaults to Charset.defaultCharset().
42   * <p>
43   * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}.
44   * </p>
45   * <p>
46   * To build an instance, use {@link Builder}.
47   * </p>
48   *
49   * @see Builder
50   * @since 2.2
51   */
52  public class CharSequenceInputStream extends InputStream {
53  
54      //@formatter:off
55      /**
56       * Builds a new {@link CharSequenceInputStream}.
57       *
58       * <p>
59       * For example:
60       * </p>
61       * <h2>Using a Charset</h2>
62       * <pre>{@code
63       * CharSequenceInputStream s = CharSequenceInputStream.builder()
64       *   .setBufferSize(8192)
65       *   .setCharSequence("String")
66       *   .setCharset(Charset.defaultCharset())
67       *   .get();}
68       * </pre>
69       * <h2>Using a CharsetEncoder</h2>
70       * <pre>{@code
71       * CharSequenceInputStream s = CharSequenceInputStream.builder()
72       *   .setBufferSize(8192)
73       *   .setCharSequence("String")
74       *   .setCharsetEncoder(Charset.defaultCharset().newEncoder()
75       *     .onMalformedInput(CodingErrorAction.REPLACE)
76       *     .onUnmappableCharacter(CodingErrorAction.REPLACE))
77       *   .get();}
78       * </pre>
79       *
80       * @see #get()
81       * @since 2.13.0
82       */
83      //@formatter:on
84      public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> {
85  
86          private CharsetEncoder charsetEncoder = newEncoder(getCharset());
87  
88          /**
89           * Constructs a new builder of {@link CharSequenceInputStream}.
90           */
91          public Builder() {
92              // empty
93          }
94  
95          /**
96           * Builds a new {@link CharSequenceInputStream}.
97           * <p>
98           * You must set an aspect that supports {@link #getCharSequence()}, otherwise, this method throws an exception.
99           * </p>
100          * <p>
101          * This builder uses the following aspects:
102          * </p>
103          * <ul>
104          * <li>{@link #getCharSequence()} gets the target aspect.</li>
105          * <li>{@link #getBufferSize()}</li>
106          * <li>{@link CharsetEncoder}</li>
107          * </ul>
108          *
109          * @return a new instance.
110          * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
111          * @see #getUnchecked()
112          */
113         @Override
114         public CharSequenceInputStream get() {
115             return Uncheck.get(() -> new CharSequenceInputStream(this));
116         }
117 
118         CharsetEncoder getCharsetEncoder() {
119             return charsetEncoder;
120         }
121 
122         @Override
123         public Builder setCharset(final Charset charset) {
124             super.setCharset(charset);
125             charsetEncoder = newEncoder(getCharset());
126             return this;
127         }
128 
129         /**
130          * Sets the charset encoder. Assumes that the caller has configured the encoder.
131          *
132          * @param newEncoder the charset encoder.
133          * @return {@code this} instance.
134          * @since 2.13.0
135          */
136         public Builder setCharsetEncoder(final CharsetEncoder newEncoder) {
137             charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault()));
138             super.setCharset(charsetEncoder.charset());
139             return this;
140         }
141 
142     }
143 
144     private static final int NO_MARK = -1;
145 
146     /**
147      * Constructs a new {@link Builder}.
148      *
149      * @return a new {@link Builder}.
150      * @since 2.12.0
151      */
152     public static Builder builder() {
153         return new Builder();
154     }
155 
156     private static CharsetEncoder newEncoder(final Charset charset) {
157         // @formatter:off
158         return Charsets.toCharset(charset).newEncoder()
159                 .onMalformedInput(CodingErrorAction.REPLACE)
160                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
161         // @formatter:on
162     }
163 
164     private final ByteBuffer bBuf;
165     private int bBufMark; // position in bBuf
166     private final CharBuffer cBuf;
167     private int cBufMark; // position in cBuf
168     private final CharsetEncoder charsetEncoder;
169 
170     private CharSequenceInputStream(final Builder builder) {
171         this.charsetEncoder = builder.charsetEncoder;
172         // Ensure that buffer is long enough to hold a complete character
173         this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(builder.charsetEncoder, builder.getBufferSize()));
174         this.bBuf.flip();
175         this.cBuf = CharBuffer.wrap(Uncheck.get(() -> builder.getCharSequence()));
176         this.cBufMark = NO_MARK;
177         this.bBufMark = NO_MARK;
178         try {
179             fillBuffer();
180         } catch (final CharacterCodingException ex) {
181             // Reset everything without filling the buffer
182             // so the same exception can be thrown again later.
183             this.bBuf.clear();
184             this.bBuf.flip();
185             this.cBuf.rewind();
186         }
187     }
188 
189     /**
190      * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
191      *
192      * @param cs the input character sequence.
193      * @param charset the character set name to use.
194      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
195      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
196      */
197     @Deprecated
198     public CharSequenceInputStream(final CharSequence cs, final Charset charset) {
199         this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
200     }
201 
202     /**
203      * Constructs a new instance.
204      *
205      * @param cs the input character sequence.
206      * @param charset the character set name to use, null maps to the default Charset.
207      * @param bufferSize the buffer size to use.
208      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
209      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
210      */
211     @Deprecated
212     public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) {
213         this(builder().setCharSequence(cs).setCharset(charset).setBufferSize(bufferSize));
214     }
215 
216     /**
217      * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
218      *
219      * @param cs the input character sequence.
220      * @param charset the character set name to use.
221      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
222      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
223      */
224     @Deprecated
225     public CharSequenceInputStream(final CharSequence cs, final String charset) {
226         this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
227     }
228 
229     /**
230      * Constructs a new instance.
231      *
232      * @param cs the input character sequence.
233      * @param charset the character set name to use, null maps to the default Charset.
234      * @param bufferSize the buffer size to use.
235      * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
236      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
237      */
238     @Deprecated
239     public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) {
240         this(cs, Charsets.toCharset(charset), bufferSize);
241     }
242 
243     /**
244      * Gets a lower bound on the number of bytes remaining in the byte stream.
245      *
246      * @return the count of bytes that can be read without blocking (or returning EOF).
247      * @throws IOException if an error occurs (probably not possible).
248      */
249     @Override
250     public int available() throws IOException {
251         return this.bBuf.remaining();
252     }
253 
254     @Override
255     public void close() throws IOException {
256         bBuf.position(bBuf.limit());
257     }
258 
259     /**
260      * Fills the byte output buffer from the input char buffer.
261      *
262      * @throws CharacterCodingException
263      *             an error encoding data.
264      */
265     private void fillBuffer() throws CharacterCodingException {
266         this.bBuf.compact();
267         final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true);
268         if (result.isError()) {
269             result.throwException();
270         }
271         this.bBuf.flip();
272     }
273 
274     /**
275      * Gets the CharsetEncoder.
276      *
277      * @return the CharsetEncoder.
278      */
279     CharsetEncoder getCharsetEncoder() {
280         return charsetEncoder;
281     }
282 
283     /**
284      * {@inheritDoc}
285      * @param readLimit max read limit (ignored).
286      */
287     @Override
288     public synchronized void mark(final int readLimit) {
289         this.cBufMark = this.cBuf.position();
290         this.bBufMark = this.bBuf.position();
291         this.cBuf.mark();
292         this.bBuf.mark();
293         // It would be nice to be able to use mark & reset on the cBuf and bBuf;
294         // however the bBuf is re-used so that won't work
295     }
296 
297     @Override
298     public boolean markSupported() {
299         return true;
300     }
301 
302     @Override
303     public int read() throws IOException {
304         for (;;) {
305             if (this.bBuf.hasRemaining()) {
306                 return this.bBuf.get() & 0xFF;
307             }
308             fillBuffer();
309             if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
310                 return EOF;
311             }
312         }
313     }
314 
315     @Override
316     public int read(final byte[] b) throws IOException {
317         return read(b, 0, b.length);
318     }
319 
320     @Override
321     public int read(final byte[] array, int off, int len) throws IOException {
322         Objects.requireNonNull(array, "array");
323         if (len < 0 || off + len > array.length) {
324             throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len);
325         }
326         if (len == 0) {
327             return 0; // must return 0 for zero length read
328         }
329         if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
330             return EOF;
331         }
332         int bytesRead = 0;
333         while (len > 0) {
334             if (this.bBuf.hasRemaining()) {
335                 final int chunk = Math.min(this.bBuf.remaining(), len);
336                 this.bBuf.get(array, off, chunk);
337                 off += chunk;
338                 len -= chunk;
339                 bytesRead += chunk;
340             } else {
341                 fillBuffer();
342                 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
343                     break;
344                 }
345             }
346         }
347         return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead;
348     }
349 
350     @Override
351     public synchronized void reset() throws IOException {
352         //
353         // This is not the most efficient implementation, as it re-encodes from the beginning.
354         //
355         // Since the bBuf is re-used, in general it's necessary to re-encode the data.
356         //
357         // It should be possible to apply some optimizations however:
358         // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since
359         // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is
360         // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to
361         // restart from there.
362         //
363         if (this.cBufMark != NO_MARK) {
364             // if cBuf is at 0, we have not started reading anything, so skip re-encoding
365             if (this.cBuf.position() != 0) {
366                 this.charsetEncoder.reset();
367                 this.cBuf.rewind();
368                 this.bBuf.rewind();
369                 this.bBuf.limit(0); // rewind does not clear the buffer
370                 while (this.cBuf.position() < this.cBufMark) {
371                     this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing)
372                     this.bBuf.limit(0);
373                     fillBuffer();
374                 }
375             }
376             if (this.cBuf.position() != this.cBufMark) {
377                 throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " +
378                         "expected=" + this.cBufMark);
379             }
380             this.bBuf.position(this.bBufMark);
381             this.cBufMark = NO_MARK;
382             this.bBufMark = NO_MARK;
383         }
384         mark(0);
385     }
386 
387     @Override
388     public long skip(long n) throws IOException {
389         //
390         // This could be made more efficient by using position to skip within the current buffer.
391         //
392         long skipped = 0;
393         while (n > 0 && available() > 0) {
394             this.read();
395             n--;
396             skipped++;
397         }
398         return skipped;
399     }
400 
401 }