001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.io.input;
019
020import static org.apache.commons.io.IOUtils.EOF;
021
022import java.io.IOException;
023import java.io.InputStream;
024import java.nio.ByteBuffer;
025import java.nio.CharBuffer;
026import java.nio.charset.CharacterCodingException;
027import java.nio.charset.Charset;
028import java.nio.charset.CharsetEncoder;
029import java.nio.charset.CoderResult;
030import java.nio.charset.CodingErrorAction;
031
032import org.apache.commons.io.Charsets;
033import org.apache.commons.io.IOUtils;
034import org.apache.commons.io.build.AbstractStreamBuilder;
035import org.apache.commons.io.charset.CharsetEncoders;
036import org.apache.commons.io.function.Uncheck;
037
038/**
039 * Implements an {@link InputStream} to read bytes from String, StringBuffer, StringBuilder or CharBuffer,
040 * encoded using the specified Charset. The Charset defaults to Charset.defaultCharset().
041 * <p>
042 * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}.
043 * </p>
044 * <p>
045 * To build an instance, use {@link Builder}.
046 * </p>
047 *
048 * @see Builder
049 * @since 2.2
050 */
051public class CharSequenceInputStream extends InputStream {
052
053    //@formatter:off
054    /**
055     * Builds a new {@link CharSequenceInputStream}.
056     *
057     * <p>
058     * For example:
059     * </p>
060     * <h2>Using a Charset</h2>
061     * <pre>{@code
062     * CharSequenceInputStream s = CharSequenceInputStream.builder()
063     *   .setBufferSize(8192)
064     *   .setCharSequence("String")
065     *   .setCharset(Charset.defaultCharset())
066     *   .get();}
067     * </pre>
068     * <h2>Using a CharsetEncoder</h2>
069     * <pre>{@code
070     * CharSequenceInputStream s = CharSequenceInputStream.builder()
071     *   .setBufferSize(8192)
072     *   .setCharSequence("String")
073     *   .setCharsetEncoder(Charset.defaultCharset().newEncoder()
074     *     .onMalformedInput(CodingErrorAction.REPLACE)
075     *     .onUnmappableCharacter(CodingErrorAction.REPLACE))
076     *   .get();}
077     * </pre>
078     *
079     * @see #get()
080     * @since 2.13.0
081     */
082    //@formatter:on
083    public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> {
084
085        private CharsetEncoder charsetEncoder = newEncoder(getCharset());
086
087        /**
088         * Constructs a new builder of {@link CharSequenceInputStream}.
089         */
090        public Builder() {
091            // empty
092        }
093
094        /**
095         * Builds a new {@link CharSequenceInputStream}.
096         * <p>
097         * You must set an aspect that supports {@link #getCharSequence()}, otherwise, this method throws an exception.
098         * </p>
099         * <p>
100         * This builder uses the following aspects:
101         * </p>
102         * <ul>
103         * <li>{@link #getCharSequence()} gets the target aspect.</li>
104         * <li>{@link #getBufferSize()}</li>
105         * <li>{@link CharsetEncoder}</li>
106         * </ul>
107         *
108         * @return a new instance.
109         * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
110         * @see #getUnchecked()
111         */
112        @Override
113        public CharSequenceInputStream get() {
114            return Uncheck.get(() -> new CharSequenceInputStream(this));
115        }
116
117        CharsetEncoder getCharsetEncoder() {
118            return charsetEncoder;
119        }
120
121        @Override
122        public Builder setCharset(final Charset charset) {
123            super.setCharset(charset);
124            charsetEncoder = newEncoder(getCharset());
125            return this;
126        }
127
128        /**
129         * Sets the charset encoder. Assumes that the caller has configured the encoder.
130         *
131         * @param newEncoder the charset encoder.
132         * @return {@code this} instance.
133         * @since 2.13.0
134         */
135        public Builder setCharsetEncoder(final CharsetEncoder newEncoder) {
136            charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault()));
137            super.setCharset(charsetEncoder.charset());
138            return this;
139        }
140
141    }
142
143    private static final int NO_MARK = -1;
144
145    /**
146     * Constructs a new {@link Builder}.
147     *
148     * @return a new {@link Builder}.
149     * @since 2.12.0
150     */
151    public static Builder builder() {
152        return new Builder();
153    }
154
155    private static CharsetEncoder newEncoder(final Charset charset) {
156        // @formatter:off
157        return Charsets.toCharset(charset).newEncoder()
158                .onMalformedInput(CodingErrorAction.REPLACE)
159                .onUnmappableCharacter(CodingErrorAction.REPLACE);
160        // @formatter:on
161    }
162
163    private final ByteBuffer bBuf;
164    private int bBufMark; // position in bBuf
165    private final CharBuffer cBuf;
166    private int cBufMark; // position in cBuf
167    private final CharsetEncoder charsetEncoder;
168
169    private CharSequenceInputStream(final Builder builder) {
170        this.charsetEncoder = builder.charsetEncoder;
171        // Ensure that buffer is long enough to hold a complete character
172        this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(builder.charsetEncoder, builder.getBufferSize()));
173        this.bBuf.flip();
174        this.cBuf = CharBuffer.wrap(Uncheck.get(() -> builder.getCharSequence()));
175        this.cBufMark = NO_MARK;
176        this.bBufMark = NO_MARK;
177        try {
178            fillBuffer();
179        } catch (final CharacterCodingException ex) {
180            // Reset everything without filling the buffer
181            // so the same exception can be thrown again later.
182            this.bBuf.clear();
183            this.bBuf.flip();
184            this.cBuf.rewind();
185        }
186    }
187
188    /**
189     * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
190     *
191     * @param cs the input character sequence.
192     * @param charset the character set name to use.
193     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
194     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
195     */
196    @Deprecated
197    public CharSequenceInputStream(final CharSequence cs, final Charset charset) {
198        this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
199    }
200
201    /**
202     * Constructs a new instance.
203     *
204     * @param cs the input character sequence.
205     * @param charset the character set name to use, null maps to the default Charset.
206     * @param bufferSize the buffer size to use.
207     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
208     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
209     */
210    @Deprecated
211    public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) {
212        this(builder().setCharSequence(cs).setCharset(charset).setBufferSize(bufferSize));
213    }
214
215    /**
216     * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
217     *
218     * @param cs the input character sequence.
219     * @param charset the character set name to use.
220     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
221     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
222     */
223    @Deprecated
224    public CharSequenceInputStream(final CharSequence cs, final String charset) {
225        this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
226    }
227
228    /**
229     * Constructs a new instance.
230     *
231     * @param cs the input character sequence.
232     * @param charset the character set name to use, null maps to the default Charset.
233     * @param bufferSize the buffer size to use.
234     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
235     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
236     */
237    @Deprecated
238    public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) {
239        this(cs, Charsets.toCharset(charset), bufferSize);
240    }
241
242    /**
243     * Gets a lower bound on the number of bytes remaining in the byte stream.
244     *
245     * @return the count of bytes that can be read without blocking (or returning EOF).
246     * @throws IOException if an error occurs (probably not possible).
247     */
248    @Override
249    public int available() throws IOException {
250        return this.bBuf.remaining();
251    }
252
253    @Override
254    public void close() throws IOException {
255        bBuf.position(bBuf.limit());
256    }
257
258    /**
259     * Fills the byte output buffer from the input char buffer.
260     *
261     * @throws CharacterCodingException
262     *             an error encoding data.
263     */
264    private void fillBuffer() throws CharacterCodingException {
265        this.bBuf.compact();
266        final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true);
267        if (result.isError()) {
268            result.throwException();
269        }
270        this.bBuf.flip();
271    }
272
273    /**
274     * Gets the CharsetEncoder.
275     *
276     * @return the CharsetEncoder.
277     */
278    CharsetEncoder getCharsetEncoder() {
279        return charsetEncoder;
280    }
281
282    /**
283     * {@inheritDoc}
284     * @param readLimit max read limit (ignored).
285     */
286    @Override
287    public synchronized void mark(final int readLimit) {
288        this.cBufMark = this.cBuf.position();
289        this.bBufMark = this.bBuf.position();
290        this.cBuf.mark();
291        this.bBuf.mark();
292        // It would be nice to be able to use mark & reset on the cBuf and bBuf;
293        // however the bBuf is re-used so that won't work
294    }
295
296    @Override
297    public boolean markSupported() {
298        return true;
299    }
300
301    @Override
302    public int read() throws IOException {
303        for (;;) {
304            if (this.bBuf.hasRemaining()) {
305                return this.bBuf.get() & 0xFF;
306            }
307            fillBuffer();
308            if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
309                return EOF;
310            }
311        }
312    }
313
314    @Override
315    public int read(final byte[] b) throws IOException {
316        return read(b, 0, b.length);
317    }
318
319    @Override
320    public int read(final byte[] array, int off, int len) throws IOException {
321        IOUtils.checkFromIndexSize(array, off, len);
322        if (len == 0) {
323            return 0; // must return 0 for zero length read
324        }
325        if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
326            return EOF;
327        }
328        int bytesRead = 0;
329        while (len > 0) {
330            if (this.bBuf.hasRemaining()) {
331                final int chunk = Math.min(this.bBuf.remaining(), len);
332                this.bBuf.get(array, off, chunk);
333                off += chunk;
334                len -= chunk;
335                bytesRead += chunk;
336            } else {
337                fillBuffer();
338                if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
339                    break;
340                }
341            }
342        }
343        return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead;
344    }
345
346    @Override
347    public synchronized void reset() throws IOException {
348        //
349        // This is not the most efficient implementation, as it re-encodes from the beginning.
350        //
351        // Since the bBuf is re-used, in general it's necessary to re-encode the data.
352        //
353        // It should be possible to apply some optimizations however:
354        // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since
355        // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is
356        // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to
357        // restart from there.
358        //
359        if (this.cBufMark != NO_MARK) {
360            // if cBuf is at 0, we have not started reading anything, so skip re-encoding
361            if (this.cBuf.position() != 0) {
362                this.charsetEncoder.reset();
363                this.cBuf.rewind();
364                this.bBuf.rewind();
365                this.bBuf.limit(0); // rewind does not clear the buffer
366                while (this.cBuf.position() < this.cBufMark) {
367                    this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing)
368                    this.bBuf.limit(0);
369                    fillBuffer();
370                }
371            }
372            if (this.cBuf.position() != this.cBufMark) {
373                throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " +
374                        "expected=" + this.cBufMark);
375            }
376            this.bBuf.position(this.bBufMark);
377            this.cBufMark = NO_MARK;
378            this.bBufMark = NO_MARK;
379        }
380        mark(0);
381    }
382
383    @Override
384    public long skip(long n) throws IOException {
385        //
386        // This could be made more efficient by using position to skip within the current buffer.
387        //
388        long skipped = 0;
389        while (n > 0 && available() > 0) {
390            this.read();
391            n--;
392            skipped++;
393        }
394        return skipped;
395    }
396
397}