001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.io.input;
019
020import static org.apache.commons.io.IOUtils.EOF;
021
022import java.io.IOException;
023import java.io.InputStream;
024import java.nio.ByteBuffer;
025import java.nio.CharBuffer;
026import java.nio.charset.CharacterCodingException;
027import java.nio.charset.Charset;
028import java.nio.charset.CharsetEncoder;
029import java.nio.charset.CoderResult;
030import java.nio.charset.CodingErrorAction;
031
032/**
033 * {@link InputStream} implementation that can read from String, StringBuffer,
034 * StringBuilder or CharBuffer.
035 * <p>
036 * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}.
037 *
038 * @since 2.2
039 */
040public class CharSequenceInputStream extends InputStream {
041
042    private static final int BUFFER_SIZE = 2048;
043
044    private static final int NO_MARK = -1;
045
046    private final CharsetEncoder encoder;
047    private final CharBuffer cbuf;
048    private final ByteBuffer bbuf;
049
050    private int mark_cbuf; // position in cbuf
051    private int mark_bbuf; // position in bbuf
052
053    /**
054     * Constructor.
055     *
056     * @param cs the input character sequence
057     * @param charset the character set name to use
058     * @param bufferSize the buffer size to use.
059     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character
060     */
061    public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) {
062        super();
063        this.encoder = charset.newEncoder()
064            .onMalformedInput(CodingErrorAction.REPLACE)
065            .onUnmappableCharacter(CodingErrorAction.REPLACE);
066        // Ensure that buffer is long enough to hold a complete character
067        final float maxBytesPerChar = encoder.maxBytesPerChar();
068        if (bufferSize < maxBytesPerChar) {
069            throw new IllegalArgumentException("Buffer size " + bufferSize + " is less than maxBytesPerChar " +
070                    maxBytesPerChar);
071        }
072        this.bbuf = ByteBuffer.allocate(bufferSize);
073        this.bbuf.flip();
074        this.cbuf = CharBuffer.wrap(cs);
075        this.mark_cbuf = NO_MARK;
076        this.mark_bbuf = NO_MARK;
077    }
078
079    /**
080     * Constructor, calls {@link #CharSequenceInputStream(CharSequence, Charset, int)}.
081     *
082     * @param cs the input character sequence
083     * @param charset the character set name to use
084     * @param bufferSize the buffer size to use.
085     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character
086     */
087    public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) {
088        this(cs, Charset.forName(charset), bufferSize);
089    }
090
091    /**
092     * Constructor, calls {@link #CharSequenceInputStream(CharSequence, Charset, int)}
093     * with a buffer size of 2048.
094     *
095     * @param cs the input character sequence
096     * @param charset the character set name to use
097     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character
098     */
099    public CharSequenceInputStream(final CharSequence cs, final Charset charset) {
100        this(cs, charset, BUFFER_SIZE);
101    }
102
103    /**
104     * Constructor, calls {@link #CharSequenceInputStream(CharSequence, String, int)}
105     * with a buffer size of 2048.
106     *
107     * @param cs the input character sequence
108     * @param charset the character set name to use
109     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character
110     */
111    public CharSequenceInputStream(final CharSequence cs, final String charset) {
112        this(cs, charset, BUFFER_SIZE);
113    }
114
115    /**
116     * Fills the byte output buffer from the input char buffer.
117     *
118     * @throws CharacterCodingException
119     *             an error encoding data
120     */
121    private void fillBuffer() throws CharacterCodingException {
122        this.bbuf.compact();
123        final CoderResult result = this.encoder.encode(this.cbuf, this.bbuf, true);
124        if (result.isError()) {
125            result.throwException();
126        }
127        this.bbuf.flip();
128    }
129
130    @Override
131    public int read(final byte[] b, int off, int len) throws IOException {
132        if (b == null) {
133            throw new NullPointerException("Byte array is null");
134        }
135        if (len < 0 || (off + len) > b.length) {
136            throw new IndexOutOfBoundsException("Array Size=" + b.length +
137                    ", offset=" + off + ", length=" + len);
138        }
139        if (len == 0) {
140            return 0; // must return 0 for zero length read
141        }
142        if (!this.bbuf.hasRemaining() && !this.cbuf.hasRemaining()) {
143            return EOF;
144        }
145        int bytesRead = 0;
146        while (len > 0) {
147            if (this.bbuf.hasRemaining()) {
148                final int chunk = Math.min(this.bbuf.remaining(), len);
149                this.bbuf.get(b, off, chunk);
150                off += chunk;
151                len -= chunk;
152                bytesRead += chunk;
153            } else {
154                fillBuffer();
155                if (!this.bbuf.hasRemaining() && !this.cbuf.hasRemaining()) {
156                    break;
157                }
158            }
159        }
160        return bytesRead == 0 && !this.cbuf.hasRemaining() ? EOF : bytesRead;
161    }
162
163    @Override
164    public int read() throws IOException {
165        for (;;) {
166            if (this.bbuf.hasRemaining()) {
167                return this.bbuf.get() & 0xFF;
168            }
169            fillBuffer();
170            if (!this.bbuf.hasRemaining() && !this.cbuf.hasRemaining()) {
171                return EOF;
172            }
173        }
174    }
175
176    @Override
177    public int read(final byte[] b) throws IOException {
178        return read(b, 0, b.length);
179    }
180
181    @Override
182    public long skip(long n) throws IOException {
183        /*
184         * This could be made more efficient by using position to skip within the current buffer.
185         */
186        long skipped = 0;
187        while (n > 0 && available() > 0) {
188            this.read();
189            n--;
190            skipped++;
191        }
192        return skipped;
193    }
194
195    /**
196     * Return an estimate of the number of bytes remaining in the byte stream.
197     * @return the count of bytes that can be read without blocking (or returning EOF).
198     *
199     * @throws IOException if an error occurs (probably not possible)
200     */
201    @Override
202    public int available() throws IOException {
203        // The cached entries are in bbuf; since encoding always creates at least one byte
204        // per character, we can add the two to get a better estimate (e.g. if bbuf is empty)
205        // Note that the previous implementation (2.4) could return zero even though there were
206        // encoded bytes still available.
207        return this.bbuf.remaining() + this.cbuf.remaining();
208    }
209
210    @Override
211    public void close() throws IOException {
212    }
213
214    /**
215     * {@inheritDoc}
216     * @param readlimit max read limit (ignored)
217     */
218    @Override
219    public synchronized void mark(final int readlimit) {
220        this.mark_cbuf = this.cbuf.position();
221        this.mark_bbuf = this.bbuf.position();
222        this.cbuf.mark();
223        this.bbuf.mark();
224        // It would be nice to be able to use mark & reset on the cbuf and bbuf;
225        // however the bbuf is re-used so that won't work
226    }
227
228    @Override
229    public synchronized void reset() throws IOException {
230        /*
231         * This is not the most efficient implementation, as it re-encodes from the beginning.
232         *
233         * Since the bbuf is re-used, in general it's necessary to re-encode the data.
234         *
235         * It should be possible to apply some optimisations however:
236         * + use mark/reset on the cbuf and bbuf. This would only work if the buffer had not been (re)filled since
237         * the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is
238         * valid otherwise. + Try saving the state of the cbuf before each fillBuffer; it might be possible to
239         * restart from there.
240         */
241        if (this.mark_cbuf != NO_MARK) {
242            // if cbuf is at 0, we have not started reading anything, so skip re-encoding
243            if (this.cbuf.position() != 0) {
244                this.encoder.reset();
245                this.cbuf.rewind();
246                this.bbuf.rewind();
247                this.bbuf.limit(0); // rewind does not clear the buffer
248                while(this.cbuf.position() < this.mark_cbuf) {
249                    this.bbuf.rewind(); // empty the buffer (we only refill when empty during normal processing)
250                    this.bbuf.limit(0);
251                    fillBuffer();
252                }
253            }
254            if (this.cbuf.position() != this.mark_cbuf) {
255                throw new IllegalStateException("Unexpected CharBuffer postion: actual=" + cbuf.position() + " " +
256                        "expected=" + this.mark_cbuf);
257            }
258            this.bbuf.position(this.mark_bbuf);
259            this.mark_cbuf = NO_MARK;
260            this.mark_bbuf = NO_MARK;
261        }
262    }
263
264    @Override
265    public boolean markSupported() {
266        return true;
267    }
268
269}