1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.io.input;
19
20 import static org.apache.commons.io.IOUtils.EOF;
21
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.nio.ByteBuffer;
25 import java.nio.CharBuffer;
26 import java.nio.charset.CharacterCodingException;
27 import java.nio.charset.Charset;
28 import java.nio.charset.CharsetEncoder;
29 import java.nio.charset.CoderResult;
30 import java.nio.charset.CodingErrorAction;
31
32 import org.apache.commons.io.Charsets;
33 import org.apache.commons.io.IOUtils;
34 import org.apache.commons.io.build.AbstractStreamBuilder;
35 import org.apache.commons.io.charset.CharsetEncoders;
36 import org.apache.commons.io.function.Uncheck;
37
38 /**
39 * Implements an {@link InputStream} to read bytes from String, StringBuffer, StringBuilder or CharBuffer,
40 * encoded using the specified Charset. The Charset defaults to Charset.defaultCharset().
41 * <p>
42 * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}.
43 * </p>
44 * <p>
45 * To build an instance, use {@link Builder}.
46 * </p>
47 *
48 * @see Builder
49 * @since 2.2
50 */
51 public class CharSequenceInputStream extends InputStream {
52
53 //@formatter:off
54 /**
55 * Builds a new {@link CharSequenceInputStream}.
56 *
57 * <p>
58 * For example:
59 * </p>
60 * <h2>Using a Charset</h2>
61 * <pre>{@code
62 * CharSequenceInputStream s = CharSequenceInputStream.builder()
63 * .setBufferSize(8192)
64 * .setCharSequence("String")
65 * .setCharset(Charset.defaultCharset())
66 * .get();}
67 * </pre>
68 * <h2>Using a CharsetEncoder</h2>
69 * <pre>{@code
70 * CharSequenceInputStream s = CharSequenceInputStream.builder()
71 * .setBufferSize(8192)
72 * .setCharSequence("String")
73 * .setCharsetEncoder(Charset.defaultCharset().newEncoder()
74 * .onMalformedInput(CodingErrorAction.REPLACE)
75 * .onUnmappableCharacter(CodingErrorAction.REPLACE))
76 * .get();}
77 * </pre>
78 *
79 * @see #get()
80 * @since 2.13.0
81 */
82 //@formatter:on
83 public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> {
84
85 private CharsetEncoder charsetEncoder = newEncoder(getCharset());
86
87 /**
88 * Constructs a new builder of {@link CharSequenceInputStream}.
89 */
90 public Builder() {
91 // empty
92 }
93
94 /**
95 * Builds a new {@link CharSequenceInputStream}.
96 * <p>
97 * You must set an aspect that supports {@link #getCharSequence()}, otherwise, this method throws an exception.
98 * </p>
99 * <p>
100 * This builder uses the following aspects:
101 * </p>
102 * <ul>
103 * <li>{@link #getCharSequence()} gets the target aspect.</li>
104 * <li>{@link #getBufferSize()}</li>
105 * <li>{@link CharsetEncoder}</li>
106 * </ul>
107 *
108 * @return a new instance.
109 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
110 * @see #getUnchecked()
111 */
112 @Override
113 public CharSequenceInputStream get() {
114 return Uncheck.get(() -> new CharSequenceInputStream(this));
115 }
116
117 CharsetEncoder getCharsetEncoder() {
118 return charsetEncoder;
119 }
120
121 @Override
122 public Builder setCharset(final Charset charset) {
123 super.setCharset(charset);
124 charsetEncoder = newEncoder(getCharset());
125 return this;
126 }
127
128 /**
129 * Sets the charset encoder. Assumes that the caller has configured the encoder.
130 *
131 * @param newEncoder the charset encoder.
132 * @return {@code this} instance.
133 * @since 2.13.0
134 */
135 public Builder setCharsetEncoder(final CharsetEncoder newEncoder) {
136 charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault()));
137 super.setCharset(charsetEncoder.charset());
138 return this;
139 }
140
141 }
142
143 private static final int NO_MARK = -1;
144
145 /**
146 * Constructs a new {@link Builder}.
147 *
148 * @return a new {@link Builder}.
149 * @since 2.12.0
150 */
151 public static Builder builder() {
152 return new Builder();
153 }
154
155 private static CharsetEncoder newEncoder(final Charset charset) {
156 // @formatter:off
157 return Charsets.toCharset(charset).newEncoder()
158 .onMalformedInput(CodingErrorAction.REPLACE)
159 .onUnmappableCharacter(CodingErrorAction.REPLACE);
160 // @formatter:on
161 }
162
163 private final ByteBuffer bBuf;
164 private int bBufMark; // position in bBuf
165 private final CharBuffer cBuf;
166 private int cBufMark; // position in cBuf
167 private final CharsetEncoder charsetEncoder;
168
169 private CharSequenceInputStream(final Builder builder) {
170 this.charsetEncoder = builder.charsetEncoder;
171 // Ensure that buffer is long enough to hold a complete character
172 this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(builder.charsetEncoder, builder.getBufferSize()));
173 this.bBuf.flip();
174 this.cBuf = CharBuffer.wrap(Uncheck.get(() -> builder.getCharSequence()));
175 this.cBufMark = NO_MARK;
176 this.bBufMark = NO_MARK;
177 try {
178 fillBuffer();
179 } catch (final CharacterCodingException ex) {
180 // Reset everything without filling the buffer
181 // so the same exception can be thrown again later.
182 this.bBuf.clear();
183 this.bBuf.flip();
184 this.cBuf.rewind();
185 }
186 }
187
188 /**
189 * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
190 *
191 * @param cs the input character sequence.
192 * @param charset the character set name to use.
193 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
194 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
195 */
196 @Deprecated
197 public CharSequenceInputStream(final CharSequence cs, final Charset charset) {
198 this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
199 }
200
201 /**
202 * Constructs a new instance.
203 *
204 * @param cs the input character sequence.
205 * @param charset the character set name to use, null maps to the default Charset.
206 * @param bufferSize the buffer size to use.
207 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
208 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
209 */
210 @Deprecated
211 public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) {
212 this(builder().setCharSequence(cs).setCharset(charset).setBufferSize(bufferSize));
213 }
214
215 /**
216 * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
217 *
218 * @param cs the input character sequence.
219 * @param charset the character set name to use.
220 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
221 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
222 */
223 @Deprecated
224 public CharSequenceInputStream(final CharSequence cs, final String charset) {
225 this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
226 }
227
228 /**
229 * Constructs a new instance.
230 *
231 * @param cs the input character sequence.
232 * @param charset the character set name to use, null maps to the default Charset.
233 * @param bufferSize the buffer size to use.
234 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
235 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
236 */
237 @Deprecated
238 public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) {
239 this(cs, Charsets.toCharset(charset), bufferSize);
240 }
241
242 /**
243 * Gets a lower bound on the number of bytes remaining in the byte stream.
244 *
245 * @return the count of bytes that can be read without blocking (or returning EOF).
246 * @throws IOException if an error occurs (probably not possible).
247 */
248 @Override
249 public int available() throws IOException {
250 return this.bBuf.remaining();
251 }
252
253 @Override
254 public void close() throws IOException {
255 bBuf.position(bBuf.limit());
256 }
257
258 /**
259 * Fills the byte output buffer from the input char buffer.
260 *
261 * @throws CharacterCodingException
262 * an error encoding data.
263 */
264 private void fillBuffer() throws CharacterCodingException {
265 this.bBuf.compact();
266 final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true);
267 if (result.isError()) {
268 result.throwException();
269 }
270 this.bBuf.flip();
271 }
272
273 /**
274 * Gets the CharsetEncoder.
275 *
276 * @return the CharsetEncoder.
277 */
278 CharsetEncoder getCharsetEncoder() {
279 return charsetEncoder;
280 }
281
282 /**
283 * {@inheritDoc}
284 * @param readLimit max read limit (ignored).
285 */
286 @Override
287 public synchronized void mark(final int readLimit) {
288 this.cBufMark = this.cBuf.position();
289 this.bBufMark = this.bBuf.position();
290 this.cBuf.mark();
291 this.bBuf.mark();
292 // It would be nice to be able to use mark & reset on the cBuf and bBuf;
293 // however the bBuf is re-used so that won't work
294 }
295
296 @Override
297 public boolean markSupported() {
298 return true;
299 }
300
301 @Override
302 public int read() throws IOException {
303 for (;;) {
304 if (this.bBuf.hasRemaining()) {
305 return this.bBuf.get() & 0xFF;
306 }
307 fillBuffer();
308 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
309 return EOF;
310 }
311 }
312 }
313
314 @Override
315 public int read(final byte[] b) throws IOException {
316 return read(b, 0, b.length);
317 }
318
319 @Override
320 public int read(final byte[] array, int off, int len) throws IOException {
321 IOUtils.checkFromIndexSize(array, off, len);
322 if (len == 0) {
323 return 0; // must return 0 for zero length read
324 }
325 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
326 return EOF;
327 }
328 int bytesRead = 0;
329 while (len > 0) {
330 if (this.bBuf.hasRemaining()) {
331 final int chunk = Math.min(this.bBuf.remaining(), len);
332 this.bBuf.get(array, off, chunk);
333 off += chunk;
334 len -= chunk;
335 bytesRead += chunk;
336 } else {
337 fillBuffer();
338 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
339 break;
340 }
341 }
342 }
343 return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead;
344 }
345
346 @Override
347 public synchronized void reset() throws IOException {
348 //
349 // This is not the most efficient implementation, as it re-encodes from the beginning.
350 //
351 // Since the bBuf is re-used, in general it's necessary to re-encode the data.
352 //
353 // It should be possible to apply some optimizations however:
354 // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since
355 // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is
356 // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to
357 // restart from there.
358 //
359 if (this.cBufMark != NO_MARK) {
360 // if cBuf is at 0, we have not started reading anything, so skip re-encoding
361 if (this.cBuf.position() != 0) {
362 this.charsetEncoder.reset();
363 this.cBuf.rewind();
364 this.bBuf.rewind();
365 this.bBuf.limit(0); // rewind does not clear the buffer
366 while (this.cBuf.position() < this.cBufMark) {
367 this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing)
368 this.bBuf.limit(0);
369 fillBuffer();
370 }
371 }
372 if (this.cBuf.position() != this.cBufMark) {
373 throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " +
374 "expected=" + this.cBufMark);
375 }
376 this.bBuf.position(this.bBufMark);
377 this.cBufMark = NO_MARK;
378 this.bBufMark = NO_MARK;
379 }
380 mark(0);
381 }
382
383 @Override
384 public long skip(long n) throws IOException {
385 //
386 // This could be made more efficient by using position to skip within the current buffer.
387 //
388 long skipped = 0;
389 while (n > 0 && available() > 0) {
390 this.read();
391 n--;
392 skipped++;
393 }
394 return skipped;
395 }
396
397 }