View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.apache.commons.io.IOUtils.EOF;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.Reader;
24  import java.nio.ByteBuffer;
25  import java.nio.CharBuffer;
26  import java.nio.charset.Charset;
27  import java.nio.charset.CharsetEncoder;
28  import java.nio.charset.CoderResult;
29  import java.nio.charset.CodingErrorAction;
30  import java.util.Objects;
31  
32  /**
33   * {@link InputStream} implementation that reads a character stream from a {@link Reader}
34   * and transforms it to a byte stream using a specified charset encoding. The stream
35   * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset
36   * encodings supported by the JRE are handled correctly. In particular for charsets such as
37   * UTF-16, the implementation ensures that one and only one byte order marker
38   * is produced.
39   * <p>
40   * Since in general it is not possible to predict the number of characters to be read from the
41   * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from
42   * the {@link Reader} are buffered. There is therefore no well defined correlation
43   * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}.
44   * This also implies that in general there is no need to wrap the underlying {@link Reader}
45   * in a {@link java.io.BufferedReader}.
46   * <p>
47   * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader};
48   * in the following example, reading from {@code in2} would return the same byte
49   * sequence as reading from {@code in} (provided that the initial byte sequence is legal
50   * with respect to the charset encoding):
51   * <pre>
52   * InputStream inputStream = ...
53   * Charset cs = ...
54   * InputStreamReader reader = new InputStreamReader(inputStream, cs);
55   * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre>
56   *
57   * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter},
58   * except that the control flow is reversed: both classes transform a character stream
59   * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream,
60   * while {@link ReaderInputStream} pulls it from the underlying stream.
61   * <p>
62   * Note that while there are use cases where there is no alternative to using
63   * this class, very often the need to use this class is an indication of a flaw
64   * in the design of the code. This class is typically used in situations where an existing
65   * API only accepts an {@link InputStream}, but where the most natural way to produce the data
66   * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation
67   * where this problem may appear is when implementing the {@code javax.activation.DataSource}
68   * interface from the Java Activation Framework.
69   * <p>
70   * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next
71   * read operation will block or not, it is not possible to provide a meaningful
72   * implementation of the {@link InputStream#available()} method. A call to this method
73   * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}.
74   * </p>
75   * <p>
76   * Instances of {@link ReaderInputStream} are not thread safe.
77   * </p>
78   *
79   * @see org.apache.commons.io.output.WriterOutputStream
80   *
81   * @since 2.0
82   */
83  public class ReaderInputStream extends InputStream {
84      private static final int DEFAULT_BUFFER_SIZE = 1024;
85  
86      private final Reader reader;
87      private final CharsetEncoder encoder;
88  
89      /**
90       * CharBuffer used as input for the decoder. It should be reasonably
91       * large as we read data from the underlying Reader into this buffer.
92       */
93      private final CharBuffer encoderIn;
94  
95      /**
96       * ByteBuffer used as output for the decoder. This buffer can be small
97       * as it is only used to transfer data from the decoder to the
98       * buffer provided by the caller.
99       */
100     private final ByteBuffer encoderOut;
101 
102     private CoderResult lastCoderResult;
103     private boolean endOfInput;
104 
105     /**
106      * Construct a new {@link ReaderInputStream}.
107      *
108      * @param reader the target {@link Reader}
109      * @param encoder the charset encoder
110      * @since 2.1
111      */
112     public ReaderInputStream(final Reader reader, final CharsetEncoder encoder) {
113         this(reader, encoder, DEFAULT_BUFFER_SIZE);
114     }
115 
116     /**
117      * Construct a new {@link ReaderInputStream}.
118      *
119      * @param reader the target {@link Reader}
120      * @param encoder the charset encoder
121      * @param bufferSize the size of the input buffer in number of characters
122      * @since 2.1
123      */
124     public ReaderInputStream(final Reader reader, final CharsetEncoder encoder, final int bufferSize) {
125         this.reader = reader;
126         this.encoder = encoder;
127         this.encoderIn = CharBuffer.allocate(bufferSize);
128         this.encoderIn.flip();
129         this.encoderOut = ByteBuffer.allocate(128);
130         this.encoderOut.flip();
131     }
132 
133     /**
134      * Construct a new {@link ReaderInputStream}.
135      *
136      * @param reader the target {@link Reader}
137      * @param charset the charset encoding
138      * @param bufferSize the size of the input buffer in number of characters
139      */
140     public ReaderInputStream(final Reader reader, final Charset charset, final int bufferSize) {
141         this(reader,
142              charset.newEncoder()
143                     .onMalformedInput(CodingErrorAction.REPLACE)
144                     .onUnmappableCharacter(CodingErrorAction.REPLACE),
145              bufferSize);
146     }
147 
148     /**
149      * Construct a new {@link ReaderInputStream} with a default input buffer size of
150      * {@value #DEFAULT_BUFFER_SIZE} characters.
151      *
152      * @param reader the target {@link Reader}
153      * @param charset the charset encoding
154      */
155     public ReaderInputStream(final Reader reader, final Charset charset) {
156         this(reader, charset, DEFAULT_BUFFER_SIZE);
157     }
158 
159     /**
160      * Construct a new {@link ReaderInputStream}.
161      *
162      * @param reader the target {@link Reader}
163      * @param charsetName the name of the charset encoding
164      * @param bufferSize the size of the input buffer in number of characters
165      */
166     public ReaderInputStream(final Reader reader, final String charsetName, final int bufferSize) {
167         this(reader, Charset.forName(charsetName), bufferSize);
168     }
169 
170     /**
171      * Construct a new {@link ReaderInputStream} with a default input buffer size of
172      * {@value #DEFAULT_BUFFER_SIZE} characters.
173      *
174      * @param reader the target {@link Reader}
175      * @param charsetName the name of the charset encoding
176      */
177     public ReaderInputStream(final Reader reader, final String charsetName) {
178         this(reader, charsetName, DEFAULT_BUFFER_SIZE);
179     }
180 
181     /**
182      * Construct a new {@link ReaderInputStream} that uses the default character encoding
183      * with a default input buffer size of {@value #DEFAULT_BUFFER_SIZE} characters.
184      *
185      * @param reader the target {@link Reader}
186      * @deprecated 2.5 use {@link #ReaderInputStream(Reader, Charset)} instead
187      */
188     @Deprecated
189     public ReaderInputStream(final Reader reader) {
190         this(reader, Charset.defaultCharset());
191     }
192 
193     /**
194      * Fills the internal char buffer from the reader.
195      *
196      * @throws IOException
197      *             If an I/O error occurs
198      */
199     private void fillBuffer() throws IOException {
200         if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) {
201             encoderIn.compact();
202             final int position = encoderIn.position();
203             // We don't use Reader#read(CharBuffer) here because it is more efficient
204             // to write directly to the underlying char array (the default implementation
205             // copies data to a temporary char array).
206             final int c = reader.read(encoderIn.array(), position, encoderIn.remaining());
207             if (c == EOF) {
208                 endOfInput = true;
209             } else {
210                 encoderIn.position(position+c);
211             }
212             encoderIn.flip();
213         }
214         encoderOut.compact();
215         lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput);
216         encoderOut.flip();
217     }
218 
219     /**
220      * Read the specified number of bytes into an array.
221      *
222      * @param array the byte array to read into
223      * @param off the offset to start reading bytes into
224      * @param len the number of bytes to read
225      * @return the number of bytes read or {@code -1}
226      *         if the end of the stream has been reached
227      * @throws IOException if an I/O error occurs.
228      */
229     @Override
230     public int read(final byte[] array, int off, int len) throws IOException {
231         Objects.requireNonNull(array, "array");
232         if (len < 0 || off < 0 || (off + len) > array.length) {
233             throw new IndexOutOfBoundsException("Array Size=" + array.length +
234                     ", offset=" + off + ", length=" + len);
235         }
236         int read = 0;
237         if (len == 0) {
238             return 0; // Always return 0 if len == 0
239         }
240         while (len > 0) {
241             if (encoderOut.hasRemaining()) {
242                 final int c = Math.min(encoderOut.remaining(), len);
243                 encoderOut.get(array, off, c);
244                 off += c;
245                 len -= c;
246                 read += c;
247             } else {
248                 fillBuffer();
249                 if (endOfInput && !encoderOut.hasRemaining()) {
250                     break;
251                 }
252             }
253         }
254         return read == 0 && endOfInput ? EOF : read;
255     }
256 
257     /**
258      * Read the specified number of bytes into an array.
259      *
260      * @param b the byte array to read into
261      * @return the number of bytes read or {@code -1}
262      *         if the end of the stream has been reached
263      * @throws IOException if an I/O error occurs.
264      */
265     @Override
266     public int read(final byte[] b) throws IOException {
267         return read(b, 0, b.length);
268     }
269 
270     /**
271      * Read a single byte.
272      *
273      * @return either the byte read or {@code -1} if the end of the stream
274      *         has been reached
275      * @throws IOException if an I/O error occurs.
276      */
277     @Override
278     public int read() throws IOException {
279         for (;;) {
280             if (encoderOut.hasRemaining()) {
281                 return encoderOut.get() & 0xFF;
282             }
283             fillBuffer();
284             if (endOfInput && !encoderOut.hasRemaining()) {
285                 return EOF;
286             }
287         }
288     }
289 
290     /**
291      * Close the stream. This method will cause the underlying {@link Reader}
292      * to be closed.
293      * @throws IOException if an I/O error occurs.
294      */
295     @Override
296     public void close() throws IOException {
297         reader.close();
298     }
299 }