001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.io.input;
018    
019    import java.io.IOException;
020    import java.io.InputStream;
021    import java.io.Reader;
022    import java.nio.ByteBuffer;
023    import java.nio.CharBuffer;
024    import java.nio.charset.Charset;
025    import java.nio.charset.CharsetEncoder;
026    import java.nio.charset.CoderResult;
027    import java.nio.charset.CodingErrorAction;
028    
029    /**
030     * {@link InputStream} implementation that reads a character stream from a {@link Reader}
031     * and transforms it to a byte stream using a specified charset encoding. The stream
032     * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset
033     * encodings supported by the JRE are handled correctly. In particular for charsets such as
034     * UTF-16, the implementation ensures that one and only one byte order marker
035     * is produced.
036     * <p>
037     * Since in general it is not possible to predict the number of characters to be read from the
038     * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from
039     * the {@link Reader} are buffered. There is therefore no well defined correlation
040     * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}.
041     * This also implies that in general there is no need to wrap the underlying {@link Reader}
042     * in a {@link java.io.BufferedReader}.
043     * <p>
044     * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader};
045     * in the following example, reading from <tt>in2</tt> would return the same byte
046     * sequence as reading from <tt>in</tt> (provided that the initial byte sequence is legal
047     * with respect to the charset encoding):
048     * <pre>
049     * InputStream in = ...
050     * Charset cs = ...
051     * InputStreamReader reader = new InputStreamReader(in, cs);
052     * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre>
053     * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter},
054     * except that the control flow is reversed: both classes transform a character stream
055     * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream,
056     * while {@link ReaderInputStream} pulls it from the underlying stream.
057     * <p>
058     * Note that while there are use cases where there is no alternative to using
059     * this class, very often the need to use this class is an indication of a flaw
060     * in the design of the code. This class is typically used in situations where an existing
061     * API only accepts an {@link InputStream}, but where the most natural way to produce the data
062     * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation
063     * where this problem may appear is when implementing the {@link javax.activation.DataSource}
064     * interface from the Java Activation Framework.
065     * <p>
066     * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next
067     * read operation will block or not, it is not possible to provide a meaningful
068     * implementation of the {@link InputStream#available()} method. A call to this method
069     * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}.
070     * <p>
071     * Instances of {@link ReaderInputStream} are not thread safe.
072     * 
073     * @see org.apache.commons.io.output.WriterOutputStream
074     * 
075     * @since 2.0
076     */
077    public class ReaderInputStream extends InputStream {
078        private static final int DEFAULT_BUFFER_SIZE = 1024;
079    
080        private final Reader reader;
081        private final CharsetEncoder encoder;
082    
083        /**
084         * CharBuffer used as input for the decoder. It should be reasonably
085         * large as we read data from the underlying Reader into this buffer.
086         */
087        private final CharBuffer encoderIn;
088    
089        /**
090         * ByteBuffer used as output for the decoder. This buffer can be small
091         * as it is only used to transfer data from the decoder to the
092         * buffer provided by the caller.
093         */
094        private final ByteBuffer encoderOut;
095    
096        private CoderResult lastCoderResult;
097        private boolean endOfInput;
098    
099        /**
100         * Construct a new {@link ReaderInputStream}.
101         * 
102         * @param reader the target {@link Reader}
103         * @param encoder the charset encoder
104         * @since 2.1
105         */
106        public ReaderInputStream(Reader reader, CharsetEncoder encoder) {
107            this(reader, encoder, DEFAULT_BUFFER_SIZE);
108        }
109    
110        /**
111         * Construct a new {@link ReaderInputStream}.
112         * 
113         * @param reader the target {@link Reader}
114         * @param encoder the charset encoder
115         * @param bufferSize the size of the input buffer in number of characters
116         * @since 2.1
117         */
118        public ReaderInputStream(Reader reader, CharsetEncoder encoder, int bufferSize) {
119            this.reader = reader;
120            this.encoder = encoder;
121            this.encoderIn = CharBuffer.allocate(bufferSize);
122            this.encoderIn.flip();
123            this.encoderOut = ByteBuffer.allocate(128);
124            this.encoderOut.flip();
125        }
126    
127        /**
128         * Construct a new {@link ReaderInputStream}.
129         * 
130         * @param reader the target {@link Reader}
131         * @param charset the charset encoding
132         * @param bufferSize the size of the input buffer in number of characters
133         */
134        public ReaderInputStream(Reader reader, Charset charset, int bufferSize) {
135            this(reader,
136                 charset.newEncoder()
137                        .onMalformedInput(CodingErrorAction.REPLACE)
138                        .onUnmappableCharacter(CodingErrorAction.REPLACE),
139                 bufferSize);
140        }
141    
142        /**
143         * Construct a new {@link ReaderInputStream} with a default input buffer size of
144         * 1024 characters.
145         * 
146         * @param reader the target {@link Reader}
147         * @param charset the charset encoding
148         */
149        public ReaderInputStream(Reader reader, Charset charset) {
150            this(reader, charset, DEFAULT_BUFFER_SIZE);
151        }
152    
153        /**
154         * Construct a new {@link ReaderInputStream}.
155         * 
156         * @param reader the target {@link Reader}
157         * @param charsetName the name of the charset encoding
158         * @param bufferSize the size of the input buffer in number of characters
159         */
160        public ReaderInputStream(Reader reader, String charsetName, int bufferSize) {
161            this(reader, Charset.forName(charsetName), bufferSize);
162        }
163    
164        /**
165         * Construct a new {@link ReaderInputStream} with a default input buffer size of
166         * 1024 characters.
167         * 
168         * @param reader the target {@link Reader}
169         * @param charsetName the name of the charset encoding
170         */
171        public ReaderInputStream(Reader reader, String charsetName) {
172            this(reader, charsetName, DEFAULT_BUFFER_SIZE);
173        }
174    
175        /**
176         * Construct a new {@link ReaderInputStream} that uses the default character encoding
177         * with a default input buffer size of 1024 characters.
178         * 
179         * @param reader the target {@link Reader}
180         */
181        public ReaderInputStream(Reader reader) {
182            this(reader, Charset.defaultCharset());
183        }
184    
185        /**
186         * Fills the internal char buffer from the reader.
187         * 
188         * @throws IOException
189         *             If an I/O error occurs
190         */
191        private void fillBuffer() throws IOException {
192            if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) {
193                encoderIn.compact();
194                int position = encoderIn.position();
195                // We don't use Reader#read(CharBuffer) here because it is more efficient
196                // to write directly to the underlying char array (the default implementation
197                // copies data to a temporary char array).
198                int c = reader.read(encoderIn.array(), position, encoderIn.remaining());
199                if (c == -1) {
200                    endOfInput = true;
201                } else {
202                    encoderIn.position(position+c);
203                }
204                encoderIn.flip();
205            }
206            encoderOut.compact();
207            lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput);
208            encoderOut.flip();
209        }
210        
211        /**
212         * Read the specified number of bytes into an array.
213         * 
214         * @param b the byte array to read into
215         * @param off the offset to start reading bytes into
216         * @param len the number of bytes to read
217         * @return the number of bytes read or <code>-1</code>
218         *         if the end of the stream has been reached
219         * @throws IOException if an I/O error occurs
220         */
221        @Override
222        public int read(byte[] b, int off, int len) throws IOException {
223            if (b == null) {
224                throw new NullPointerException("Byte array must not be null");
225            }
226            if (len < 0 || off < 0 || (off + len) > b.length) {
227                throw new IndexOutOfBoundsException("Array Size=" + b.length +
228                        ", offset=" + off + ", length=" + len);
229            }
230            int read = 0;
231            if (len == 0) {
232                return 0; // Always return 0 if len == 0
233            }
234            while (len > 0) {
235                if (encoderOut.hasRemaining()) {
236                    int c = Math.min(encoderOut.remaining(), len);
237                    encoderOut.get(b, off, c);
238                    off += c;
239                    len -= c;
240                    read += c;
241                } else {
242                    fillBuffer();
243                    if (endOfInput && !encoderOut.hasRemaining()) {
244                        break;
245                    }
246                }
247            }
248            return read == 0 && endOfInput ? -1 : read;
249        }
250    
251        /**
252         * Read the specified number of bytes into an array.
253         * 
254         * @param b the byte array to read into
255         * @return the number of bytes read or <code>-1</code>
256         *         if the end of the stream has been reached
257         * @throws IOException if an I/O error occurs
258         */
259        @Override
260        public int read(byte[] b) throws IOException {
261            return read(b, 0, b.length);
262        }
263    
264        /**
265         * Read a single byte.
266         *
267         * @return either the byte read or <code>-1</code> if the end of the stream
268         *         has been reached
269         * @throws IOException if an I/O error occurs
270         */
271        @Override
272        public int read() throws IOException {
273            for (;;) {
274                if (encoderOut.hasRemaining()) {
275                    return encoderOut.get() & 0xFF;
276                } else {
277                    fillBuffer();
278                    if (endOfInput && !encoderOut.hasRemaining()) {
279                        return -1;
280                    }
281                }
282            }
283        }
284    
285        /**
286         * Close the stream. This method will cause the underlying {@link Reader}
287         * to be closed.
288         * @throws IOException if an I/O error occurs
289         */
290        @Override
291        public void close() throws IOException {
292            reader.close();
293        }
294    }