001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.io.input;
018    
019    import java.io.IOException;
020    import java.io.InputStream;
021    import java.io.Reader;
022    import java.nio.ByteBuffer;
023    import java.nio.CharBuffer;
024    import java.nio.charset.Charset;
025    import java.nio.charset.CharsetEncoder;
026    import java.nio.charset.CoderResult;
027    import java.nio.charset.CodingErrorAction;
028    
029    /**
030     * {@link InputStream} implementation that reads a character stream from a {@link Reader}
031     * and transforms it to a byte stream using a specified charset encoding. The stream
032     * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset
033     * encodings supported by the JRE are handled correctly. In particular for charsets such as
034     * UTF-16, the implementation ensures that one and only one byte order marker
035     * is produced.
036     * <p>
037     * Since in general it is not possible to predict the number of characters to be read from the
038     * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from
039     * the {@link Reader} are buffered. There is therefore no well defined correlation
040     * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}.
041     * This also implies that in general there is no need to wrap the underlying {@link Reader}
042     * in a {@link java.io.BufferedReader}.
043     * <p>
044     * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader};
045     * in the following example, reading from <tt>in2</tt> would return the same byte
046     * sequence as reading from <tt>in</tt> (provided that the initial byte sequence is legal
047     * with respect to the charset encoding):
048     * <pre>
049     * InputStream in = ...
050     * Charset cs = ...
051     * InputStreamReader reader = new InputStreamReader(in, cs);
052     * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre>
053     * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter},
054     * except that the control flow is reversed: both classes transform a character stream
055     * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream,
056     * while {@link ReaderInputStream} pulls it from the underlying stream.
057     * <p>
058     * Note that while there are use cases where there is no alternative to using
059     * this class, very often the need to use this class is an indication of a flaw
060     * in the design of the code. This class is typically used in situations where an existing
061     * API only accepts an {@link InputStream}, but where the most natural way to produce the data
062     * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation
063     * where this problem may appear is when implementing the {@link javax.activation.DataSource}
064     * interface from the Java Activation Framework.
065     * <p>
066     * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next
067     * read operation will block or not, it is not possible to provide a meaningful
068     * implementation of the {@link InputStream#available()} method. A call to this method
069     * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}.
070     * <p>
071     * Instances of {@link ReaderInputStream} are not thread safe.
072     * 
073     * @see org.apache.commons.io.output.WriterOutputStream
074     * 
075     * @author <a href="mailto:veithen@apache.org">Andreas Veithen</a>
076     * @since Commons IO 2.0
077     */
078    public class ReaderInputStream extends InputStream {
079        private static final int DEFAULT_BUFFER_SIZE = 1024;
080    
081        private final Reader reader;
082        private final CharsetEncoder encoder;
083    
084        /**
085         * CharBuffer used as input for the decoder. It should be reasonably
086         * large as we read data from the underlying Reader into this buffer.
087         */
088        private final CharBuffer encoderIn;
089    
090        /**
091         * ByteBuffer used as output for the decoder. This buffer can be small
092         * as it is only used to transfer data from the decoder to the
093         * buffer provided by the caller.
094         */
095        private final ByteBuffer encoderOut = ByteBuffer.allocate(128);
096    
097        private CoderResult lastCoderResult;
098        private boolean endOfInput;
099    
100        /**
101         * Construct a new {@link ReaderInputStream}.
102         * 
103         * @param reader the target {@link Reader}
104         * @param encoder the charset encoder
105         * @since Commons IO 2.1
106         */
107        public ReaderInputStream(Reader reader, CharsetEncoder encoder) {
108            this(reader, encoder, DEFAULT_BUFFER_SIZE);
109        }
110    
111        /**
112         * Construct a new {@link ReaderInputStream}.
113         * 
114         * @param reader the target {@link Reader}
115         * @param encoder the charset encoder
116         * @param bufferSize the size of the input buffer in number of characters
117         * @since Commons IO 2.1
118         */
119        public ReaderInputStream(Reader reader, CharsetEncoder encoder, int bufferSize) {
120            this.reader = reader;
121            this.encoder = encoder;
122            encoderIn = CharBuffer.allocate(bufferSize);
123            encoderIn.flip();
124        }
125    
126        /**
127         * Construct a new {@link ReaderInputStream}.
128         * 
129         * @param reader the target {@link Reader}
130         * @param charset the charset encoding
131         * @param bufferSize the size of the input buffer in number of characters
132         */
133        public ReaderInputStream(Reader reader, Charset charset, int bufferSize) {
134            this(reader,
135                 charset.newEncoder()
136                        .onMalformedInput(CodingErrorAction.REPLACE)
137                        .onUnmappableCharacter(CodingErrorAction.REPLACE),
138                 bufferSize);
139        }
140    
141        /**
142         * Construct a new {@link ReaderInputStream} with a default input buffer size of
143         * 1024 characters.
144         * 
145         * @param reader the target {@link Reader}
146         * @param charset the charset encoding
147         */
148        public ReaderInputStream(Reader reader, Charset charset) {
149            this(reader, charset, DEFAULT_BUFFER_SIZE);
150        }
151    
152        /**
153         * Construct a new {@link ReaderInputStream}.
154         * 
155         * @param reader the target {@link Reader}
156         * @param charsetName the name of the charset encoding
157         * @param bufferSize the size of the input buffer in number of characters
158         */
159        public ReaderInputStream(Reader reader, String charsetName, int bufferSize) {
160            this(reader, Charset.forName(charsetName), bufferSize);
161        }
162    
163        /**
164         * Construct a new {@link ReaderInputStream} with a default input buffer size of
165         * 1024 characters.
166         * 
167         * @param reader the target {@link Reader}
168         * @param charsetName the name of the charset encoding
169         */
170        public ReaderInputStream(Reader reader, String charsetName) {
171            this(reader, charsetName, DEFAULT_BUFFER_SIZE);
172        }
173    
174        /**
175         * Construct a new {@link ReaderInputStream} that uses the default character encoding
176         * with a default input buffer size of 1024 characters.
177         * 
178         * @param reader the target {@link Reader}
179         */
180        public ReaderInputStream(Reader reader) {
181            this(reader, Charset.defaultCharset());
182        }
183    
184        /**
185         * Read the specified number of bytes into an array.
186         * 
187         * @param b the byte array to read into
188         * @param off the offset to start reading bytes into
189         * @param len the number of bytes to read
190         * @return the number of bytes read or <code>-1</code>
191         *         if the end of the stream has been reached
192         * @throws IOException if an I/O error occurs
193         */
194        @Override
195        public int read(byte[] b, int off, int len) throws IOException {
196            int read = 0;
197            while (len > 0) {
198                if (encoderOut.position() > 0) {
199                    encoderOut.flip();
200                    int c = Math.min(encoderOut.remaining(), len);
201                    encoderOut.get(b, off, c);
202                    off += c;
203                    len -= c;
204                    read += c;
205                    encoderOut.compact();
206                } else {
207                    if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) {
208                        encoderIn.compact();
209                        int position = encoderIn.position();
210                        // We don't use Reader#read(CharBuffer) here because it is more efficient
211                        // to write directly to the underlying char array (the default implementation
212                        // copies data to a temporary char array).
213                        int c = reader.read(encoderIn.array(), position, encoderIn.remaining());
214                        if (c == -1) {
215                            endOfInput = true;
216                        } else {
217                            encoderIn.position(position+c);
218                        }
219                        encoderIn.flip();
220                    }
221                    lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput);
222                    if (endOfInput && encoderOut.position() == 0) {
223                        break;
224                    }
225                }
226            }
227            return read == 0 && endOfInput ? -1 : read;
228        }
229    
230        /**
231         * Read the specified number of bytes into an array.
232         * 
233         * @param b the byte array to read into
234         * @return the number of bytes read or <code>-1</code>
235         *         if the end of the stream has been reached
236         * @throws IOException if an I/O error occurs
237         */
238        @Override
239        public int read(byte[] b) throws IOException {
240            return read(b, 0, b.length);
241        }
242    
243        /**
244         * Read a single byte.
245         *
246         * @return either the byte read or <code>-1</code> if the end of the stream
247         *         has been reached
248         * @throws IOException if an I/O error occurs
249         */
250        @Override
251        public int read() throws IOException {
252            byte[] b = new byte[1];
253            return read(b) == -1 ? -1 : b[0] & 0xFF;
254        }
255    
256        /**
257         * Close the stream. This method will cause the underlying {@link Reader}
258         * to be closed.
259         * @throws IOException if an I/O error occurs
260         */
261        @Override
262        public void close() throws IOException {
263            reader.close();
264        }
265    }