001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.io.input;
019
020import static org.apache.commons.io.IOUtils.EOF;
021
022import java.io.IOException;
023import java.io.InputStream;
024import java.nio.ByteBuffer;
025import java.nio.CharBuffer;
026import java.nio.charset.CharacterCodingException;
027import java.nio.charset.Charset;
028import java.nio.charset.CharsetEncoder;
029import java.nio.charset.CoderResult;
030import java.nio.charset.CodingErrorAction;
031import java.util.Objects;
032
033import org.apache.commons.io.Charsets;
034import org.apache.commons.io.IOUtils;
035import org.apache.commons.io.build.AbstractStreamBuilder;
036import org.apache.commons.io.charset.CharsetEncoders;
037import org.apache.commons.io.function.Uncheck;
038
039/**
040 * Implements an {@link InputStream} to read bytes from String, StringBuffer, StringBuilder or CharBuffer,
041 * encoded using the specified Charset. The Charset defaults to Charset.defaultCharset().
042 * <p>
043 * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}.
044 * </p>
045 * <p>
046 * To build an instance, use {@link Builder}.
047 * </p>
048 *
049 * @see Builder
050 * @since 2.2
051 */
052public class CharSequenceInputStream extends InputStream {
053
054    //@formatter:off
055    /**
056     * Builds a new {@link CharSequenceInputStream}.
057     *
058     * <p>
059     * For example:
060     * </p>
061     * <h2>Using a Charset</h2>
062     * <pre>{@code
063     * CharSequenceInputStream s = CharSequenceInputStream.builder()
064     *   .setBufferSize(8192)
065     *   .setCharSequence("String")
066     *   .setCharset(Charset.defaultCharset())
067     *   .get();}
068     * </pre>
069     * <h2>Using a CharsetEncoder</h2>
070     * <pre>{@code
071     * CharSequenceInputStream s = CharSequenceInputStream.builder()
072     *   .setBufferSize(8192)
073     *   .setCharSequence("String")
074     *   .setCharsetEncoder(Charset.defaultCharset().newEncoder()
075     *     .onMalformedInput(CodingErrorAction.REPLACE)
076     *     .onUnmappableCharacter(CodingErrorAction.REPLACE))
077     *   .get();}
078     * </pre>
079     *
080     * @see #get()
081     * @since 2.13.0
082     */
083    //@formatter:on
084    public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> {
085
086        private CharsetEncoder charsetEncoder = newEncoder(getCharset());
087
088        /**
089         * Constructs a new builder of {@link CharSequenceInputStream}.
090         */
091        public Builder() {
092            // empty
093        }
094
095        /**
096         * Builds a new {@link CharSequenceInputStream}.
097         * <p>
098         * You must set an aspect that supports {@link #getCharSequence()}, otherwise, this method throws an exception.
099         * </p>
100         * <p>
101         * This builder uses the following aspects:
102         * </p>
103         * <ul>
104         * <li>{@link #getCharSequence()} gets the target aspect.</li>
105         * <li>{@link #getBufferSize()}</li>
106         * <li>{@link CharsetEncoder}</li>
107         * </ul>
108         *
109         * @return a new instance.
110         * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
111         * @see #getUnchecked()
112         */
113        @Override
114        public CharSequenceInputStream get() {
115            return Uncheck.get(() -> new CharSequenceInputStream(this));
116        }
117
118        CharsetEncoder getCharsetEncoder() {
119            return charsetEncoder;
120        }
121
122        @Override
123        public Builder setCharset(final Charset charset) {
124            super.setCharset(charset);
125            charsetEncoder = newEncoder(getCharset());
126            return this;
127        }
128
129        /**
130         * Sets the charset encoder. Assumes that the caller has configured the encoder.
131         *
132         * @param newEncoder the charset encoder.
133         * @return {@code this} instance.
134         * @since 2.13.0
135         */
136        public Builder setCharsetEncoder(final CharsetEncoder newEncoder) {
137            charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault()));
138            super.setCharset(charsetEncoder.charset());
139            return this;
140        }
141
142    }
143
144    private static final int NO_MARK = -1;
145
146    /**
147     * Constructs a new {@link Builder}.
148     *
149     * @return a new {@link Builder}.
150     * @since 2.12.0
151     */
152    public static Builder builder() {
153        return new Builder();
154    }
155
156    private static CharsetEncoder newEncoder(final Charset charset) {
157        // @formatter:off
158        return Charsets.toCharset(charset).newEncoder()
159                .onMalformedInput(CodingErrorAction.REPLACE)
160                .onUnmappableCharacter(CodingErrorAction.REPLACE);
161        // @formatter:on
162    }
163
164    private final ByteBuffer bBuf;
165    private int bBufMark; // position in bBuf
166    private final CharBuffer cBuf;
167    private int cBufMark; // position in cBuf
168    private final CharsetEncoder charsetEncoder;
169
170    private CharSequenceInputStream(final Builder builder) {
171        this.charsetEncoder = builder.charsetEncoder;
172        // Ensure that buffer is long enough to hold a complete character
173        this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(builder.charsetEncoder, builder.getBufferSize()));
174        this.bBuf.flip();
175        this.cBuf = CharBuffer.wrap(Uncheck.get(() -> builder.getCharSequence()));
176        this.cBufMark = NO_MARK;
177        this.bBufMark = NO_MARK;
178        try {
179            fillBuffer();
180        } catch (final CharacterCodingException ex) {
181            // Reset everything without filling the buffer
182            // so the same exception can be thrown again later.
183            this.bBuf.clear();
184            this.bBuf.flip();
185            this.cBuf.rewind();
186        }
187    }
188
189    /**
190     * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
191     *
192     * @param cs the input character sequence.
193     * @param charset the character set name to use.
194     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
195     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
196     */
197    @Deprecated
198    public CharSequenceInputStream(final CharSequence cs, final Charset charset) {
199        this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
200    }
201
202    /**
203     * Constructs a new instance.
204     *
205     * @param cs the input character sequence.
206     * @param charset the character set name to use, null maps to the default Charset.
207     * @param bufferSize the buffer size to use.
208     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
209     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
210     */
211    @Deprecated
212    public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) {
213        this(builder().setCharSequence(cs).setCharset(charset).setBufferSize(bufferSize));
214    }
215
216    /**
217     * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
218     *
219     * @param cs the input character sequence.
220     * @param charset the character set name to use.
221     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
222     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
223     */
224    @Deprecated
225    public CharSequenceInputStream(final CharSequence cs, final String charset) {
226        this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
227    }
228
229    /**
230     * Constructs a new instance.
231     *
232     * @param cs the input character sequence.
233     * @param charset the character set name to use, null maps to the default Charset.
234     * @param bufferSize the buffer size to use.
235     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
236     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
237     */
238    @Deprecated
239    public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) {
240        this(cs, Charsets.toCharset(charset), bufferSize);
241    }
242
243    /**
244     * Gets a lower bound on the number of bytes remaining in the byte stream.
245     *
246     * @return the count of bytes that can be read without blocking (or returning EOF).
247     * @throws IOException if an error occurs (probably not possible).
248     */
249    @Override
250    public int available() throws IOException {
251        return this.bBuf.remaining();
252    }
253
254    @Override
255    public void close() throws IOException {
256        bBuf.position(bBuf.limit());
257    }
258
259    /**
260     * Fills the byte output buffer from the input char buffer.
261     *
262     * @throws CharacterCodingException
263     *             an error encoding data.
264     */
265    private void fillBuffer() throws CharacterCodingException {
266        this.bBuf.compact();
267        final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true);
268        if (result.isError()) {
269            result.throwException();
270        }
271        this.bBuf.flip();
272    }
273
274    /**
275     * Gets the CharsetEncoder.
276     *
277     * @return the CharsetEncoder.
278     */
279    CharsetEncoder getCharsetEncoder() {
280        return charsetEncoder;
281    }
282
283    /**
284     * {@inheritDoc}
285     * @param readLimit max read limit (ignored).
286     */
287    @Override
288    public synchronized void mark(final int readLimit) {
289        this.cBufMark = this.cBuf.position();
290        this.bBufMark = this.bBuf.position();
291        this.cBuf.mark();
292        this.bBuf.mark();
293        // It would be nice to be able to use mark & reset on the cBuf and bBuf;
294        // however the bBuf is re-used so that won't work
295    }
296
297    @Override
298    public boolean markSupported() {
299        return true;
300    }
301
302    @Override
303    public int read() throws IOException {
304        for (;;) {
305            if (this.bBuf.hasRemaining()) {
306                return this.bBuf.get() & 0xFF;
307            }
308            fillBuffer();
309            if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
310                return EOF;
311            }
312        }
313    }
314
315    @Override
316    public int read(final byte[] b) throws IOException {
317        return read(b, 0, b.length);
318    }
319
320    @Override
321    public int read(final byte[] array, int off, int len) throws IOException {
322        Objects.requireNonNull(array, "array");
323        if (len < 0 || off + len > array.length) {
324            throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len);
325        }
326        if (len == 0) {
327            return 0; // must return 0 for zero length read
328        }
329        if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
330            return EOF;
331        }
332        int bytesRead = 0;
333        while (len > 0) {
334            if (this.bBuf.hasRemaining()) {
335                final int chunk = Math.min(this.bBuf.remaining(), len);
336                this.bBuf.get(array, off, chunk);
337                off += chunk;
338                len -= chunk;
339                bytesRead += chunk;
340            } else {
341                fillBuffer();
342                if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
343                    break;
344                }
345            }
346        }
347        return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead;
348    }
349
350    @Override
351    public synchronized void reset() throws IOException {
352        //
353        // This is not the most efficient implementation, as it re-encodes from the beginning.
354        //
355        // Since the bBuf is re-used, in general it's necessary to re-encode the data.
356        //
357        // It should be possible to apply some optimizations however:
358        // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since
359        // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is
360        // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to
361        // restart from there.
362        //
363        if (this.cBufMark != NO_MARK) {
364            // if cBuf is at 0, we have not started reading anything, so skip re-encoding
365            if (this.cBuf.position() != 0) {
366                this.charsetEncoder.reset();
367                this.cBuf.rewind();
368                this.bBuf.rewind();
369                this.bBuf.limit(0); // rewind does not clear the buffer
370                while (this.cBuf.position() < this.cBufMark) {
371                    this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing)
372                    this.bBuf.limit(0);
373                    fillBuffer();
374                }
375            }
376            if (this.cBuf.position() != this.cBufMark) {
377                throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " +
378                        "expected=" + this.cBufMark);
379            }
380            this.bBuf.position(this.bBufMark);
381            this.cBufMark = NO_MARK;
382            this.bBufMark = NO_MARK;
383        }
384        mark(0);
385    }
386
387    @Override
388    public long skip(long n) throws IOException {
389        //
390        // This could be made more efficient by using position to skip within the current buffer.
391        //
392        long skipped = 0;
393        while (n > 0 && available() > 0) {
394            this.read();
395            n--;
396            skipped++;
397        }
398        return skipped;
399    }
400
401}