001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import static org.apache.commons.io.IOUtils.EOF;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.Reader;
024import java.nio.ByteBuffer;
025import java.nio.CharBuffer;
026import java.nio.charset.Charset;
027import java.nio.charset.CharsetEncoder;
028import java.nio.charset.CoderResult;
029import java.nio.charset.CodingErrorAction;
030
031/**
032 * {@link InputStream} implementation that reads a character stream from a {@link Reader}
033 * and transforms it to a byte stream using a specified charset encoding. The stream
034 * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset
035 * encodings supported by the JRE are handled correctly. In particular for charsets such as
036 * UTF-16, the implementation ensures that one and only one byte order marker
037 * is produced.
038 * <p>
039 * Since in general it is not possible to predict the number of characters to be read from the
040 * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from
041 * the {@link Reader} are buffered. There is therefore no well defined correlation
042 * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}.
043 * This also implies that in general there is no need to wrap the underlying {@link Reader}
044 * in a {@link java.io.BufferedReader}.
045 * <p>
046 * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader};
047 * in the following example, reading from {@code in2} would return the same byte
048 * sequence as reading from {@code in} (provided that the initial byte sequence is legal
049 * with respect to the charset encoding):
050 * <pre>
051 * InputStream in = ...
052 * Charset cs = ...
053 * InputStreamReader reader = new InputStreamReader(in, cs);
054 * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre>
055 * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter},
056 * except that the control flow is reversed: both classes transform a character stream
057 * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream,
058 * while {@link ReaderInputStream} pulls it from the underlying stream.
059 * <p>
060 * Note that while there are use cases where there is no alternative to using
061 * this class, very often the need to use this class is an indication of a flaw
062 * in the design of the code. This class is typically used in situations where an existing
063 * API only accepts an {@link InputStream}, but where the most natural way to produce the data
064 * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation
065 * where this problem may appear is when implementing the {@link javax.activation.DataSource}
066 * interface from the Java Activation Framework.
067 * <p>
068 * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next
069 * read operation will block or not, it is not possible to provide a meaningful
070 * implementation of the {@link InputStream#available()} method. A call to this method
071 * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}.
072 * <p>
073 * Instances of {@link ReaderInputStream} are not thread safe.
074 *
075 * @see org.apache.commons.io.output.WriterOutputStream
076 *
077 * @since 2.0
078 */
079public class ReaderInputStream extends InputStream {
080    private static final int DEFAULT_BUFFER_SIZE = 1024;
081
082    private final Reader reader;
083    private final CharsetEncoder encoder;
084
085    /**
086     * CharBuffer used as input for the decoder. It should be reasonably
087     * large as we read data from the underlying Reader into this buffer.
088     */
089    private final CharBuffer encoderIn;
090
091    /**
092     * ByteBuffer used as output for the decoder. This buffer can be small
093     * as it is only used to transfer data from the decoder to the
094     * buffer provided by the caller.
095     */
096    private final ByteBuffer encoderOut;
097
098    private CoderResult lastCoderResult;
099    private boolean endOfInput;
100
101    /**
102     * Construct a new {@link ReaderInputStream}.
103     *
104     * @param reader the target {@link Reader}
105     * @param encoder the charset encoder
106     * @since 2.1
107     */
108    public ReaderInputStream(final Reader reader, final CharsetEncoder encoder) {
109        this(reader, encoder, DEFAULT_BUFFER_SIZE);
110    }
111
112    /**
113     * Construct a new {@link ReaderInputStream}.
114     *
115     * @param reader the target {@link Reader}
116     * @param encoder the charset encoder
117     * @param bufferSize the size of the input buffer in number of characters
118     * @since 2.1
119     */
120    public ReaderInputStream(final Reader reader, final CharsetEncoder encoder, final int bufferSize) {
121        this.reader = reader;
122        this.encoder = encoder;
123        this.encoderIn = CharBuffer.allocate(bufferSize);
124        this.encoderIn.flip();
125        this.encoderOut = ByteBuffer.allocate(128);
126        this.encoderOut.flip();
127    }
128
129    /**
130     * Construct a new {@link ReaderInputStream}.
131     *
132     * @param reader the target {@link Reader}
133     * @param charset the charset encoding
134     * @param bufferSize the size of the input buffer in number of characters
135     */
136    public ReaderInputStream(final Reader reader, final Charset charset, final int bufferSize) {
137        this(reader,
138             charset.newEncoder()
139                    .onMalformedInput(CodingErrorAction.REPLACE)
140                    .onUnmappableCharacter(CodingErrorAction.REPLACE),
141             bufferSize);
142    }
143
144    /**
145     * Construct a new {@link ReaderInputStream} with a default input buffer size of
146     * 1024 characters.
147     *
148     * @param reader the target {@link Reader}
149     * @param charset the charset encoding
150     */
151    public ReaderInputStream(final Reader reader, final Charset charset) {
152        this(reader, charset, DEFAULT_BUFFER_SIZE);
153    }
154
155    /**
156     * Construct a new {@link ReaderInputStream}.
157     *
158     * @param reader the target {@link Reader}
159     * @param charsetName the name of the charset encoding
160     * @param bufferSize the size of the input buffer in number of characters
161     */
162    public ReaderInputStream(final Reader reader, final String charsetName, final int bufferSize) {
163        this(reader, Charset.forName(charsetName), bufferSize);
164    }
165
166    /**
167     * Construct a new {@link ReaderInputStream} with a default input buffer size of
168     * 1024 characters.
169     *
170     * @param reader the target {@link Reader}
171     * @param charsetName the name of the charset encoding
172     */
173    public ReaderInputStream(final Reader reader, final String charsetName) {
174        this(reader, charsetName, DEFAULT_BUFFER_SIZE);
175    }
176
177    /**
178     * Construct a new {@link ReaderInputStream} that uses the default character encoding
179     * with a default input buffer size of 1024 characters.
180     *
181     * @param reader the target {@link Reader}
182     * @deprecated 2.5 use {@link #ReaderInputStream(Reader, Charset)} instead
183     */
184    @Deprecated
185    public ReaderInputStream(final Reader reader) {
186        this(reader, Charset.defaultCharset());
187    }
188
189    /**
190     * Fills the internal char buffer from the reader.
191     *
192     * @throws IOException
193     *             If an I/O error occurs
194     */
195    private void fillBuffer() throws IOException {
196        if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) {
197            encoderIn.compact();
198            final int position = encoderIn.position();
199            // We don't use Reader#read(CharBuffer) here because it is more efficient
200            // to write directly to the underlying char array (the default implementation
201            // copies data to a temporary char array).
202            final int c = reader.read(encoderIn.array(), position, encoderIn.remaining());
203            if (c == EOF) {
204                endOfInput = true;
205            } else {
206                encoderIn.position(position+c);
207            }
208            encoderIn.flip();
209        }
210        encoderOut.compact();
211        lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput);
212        encoderOut.flip();
213    }
214
215    /**
216     * Read the specified number of bytes into an array.
217     *
218     * @param b the byte array to read into
219     * @param off the offset to start reading bytes into
220     * @param len the number of bytes to read
221     * @return the number of bytes read or <code>-1</code>
222     *         if the end of the stream has been reached
223     * @throws IOException if an I/O error occurs
224     */
225    @Override
226    public int read(final byte[] b, int off, int len) throws IOException {
227        if (b == null) {
228            throw new NullPointerException("Byte array must not be null");
229        }
230        if (len < 0 || off < 0 || (off + len) > b.length) {
231            throw new IndexOutOfBoundsException("Array Size=" + b.length +
232                    ", offset=" + off + ", length=" + len);
233        }
234        int read = 0;
235        if (len == 0) {
236            return 0; // Always return 0 if len == 0
237        }
238        while (len > 0) {
239            if (encoderOut.hasRemaining()) {
240                final int c = Math.min(encoderOut.remaining(), len);
241                encoderOut.get(b, off, c);
242                off += c;
243                len -= c;
244                read += c;
245            } else {
246                fillBuffer();
247                if (endOfInput && !encoderOut.hasRemaining()) {
248                    break;
249                }
250            }
251        }
252        return read == 0 && endOfInput ? EOF : read;
253    }
254
255    /**
256     * Read the specified number of bytes into an array.
257     *
258     * @param b the byte array to read into
259     * @return the number of bytes read or <code>-1</code>
260     *         if the end of the stream has been reached
261     * @throws IOException if an I/O error occurs
262     */
263    @Override
264    public int read(final byte[] b) throws IOException {
265        return read(b, 0, b.length);
266    }
267
268    /**
269     * Read a single byte.
270     *
271     * @return either the byte read or <code>-1</code> if the end of the stream
272     *         has been reached
273     * @throws IOException if an I/O error occurs
274     */
275    @Override
276    public int read() throws IOException {
277        for (;;) {
278            if (encoderOut.hasRemaining()) {
279                return encoderOut.get() & 0xFF;
280            }
281            fillBuffer();
282            if (endOfInput && !encoderOut.hasRemaining()) {
283                return EOF;
284            }
285        }
286    }
287
288    /**
289     * Close the stream. This method will cause the underlying {@link Reader}
290     * to be closed.
291     * @throws IOException if an I/O error occurs
292     */
293    @Override
294    public void close() throws IOException {
295        reader.close();
296    }
297}