001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.io.input;
018
019 import java.io.IOException;
020 import java.io.InputStream;
021 import java.io.Reader;
022 import java.nio.ByteBuffer;
023 import java.nio.CharBuffer;
024 import java.nio.charset.Charset;
025 import java.nio.charset.CharsetEncoder;
026 import java.nio.charset.CoderResult;
027 import java.nio.charset.CodingErrorAction;
028
029 /**
030 * {@link InputStream} implementation that reads a character stream from a {@link Reader}
031 * and transforms it to a byte stream using a specified charset encoding. The stream
032 * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset
033 * encodings supported by the JRE are handled correctly. In particular for charsets such as
034 * UTF-16, the implementation ensures that one and only one byte order marker
035 * is produced.
036 * <p>
037 * Since in general it is not possible to predict the number of characters to be read from the
038 * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from
039 * the {@link Reader} are buffered. There is therefore no well defined correlation
040 * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}.
041 * This also implies that in general there is no need to wrap the underlying {@link Reader}
042 * in a {@link java.io.BufferedReader}.
043 * <p>
044 * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader};
045 * in the following example, reading from <tt>in2</tt> would return the same byte
046 * sequence as reading from <tt>in</tt> (provided that the initial byte sequence is legal
047 * with respect to the charset encoding):
048 * <pre>
049 * InputStream in = ...
050 * Charset cs = ...
051 * InputStreamReader reader = new InputStreamReader(in, cs);
052 * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre>
053 * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter},
054 * except that the control flow is reversed: both classes transform a character stream
055 * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream,
056 * while {@link ReaderInputStream} pulls it from the underlying stream.
057 * <p>
058 * Note that while there are use cases where there is no alternative to using
059 * this class, very often the need to use this class is an indication of a flaw
060 * in the design of the code. This class is typically used in situations where an existing
061 * API only accepts an {@link InputStream}, but where the most natural way to produce the data
062 * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation
063 * where this problem may appear is when implementing the {@link javax.activation.DataSource}
064 * interface from the Java Activation Framework.
065 * <p>
066 * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next
067 * read operation will block or not, it is not possible to provide a meaningful
068 * implementation of the {@link InputStream#available()} method. A call to this method
069 * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}.
070 * <p>
071 * Instances of {@link ReaderInputStream} are not thread safe.
072 *
073 * @see org.apache.commons.io.output.WriterOutputStream
074 *
075 * @author <a href="mailto:veithen@apache.org">Andreas Veithen</a>
076 * @since Commons IO 2.0
077 */
078 public class ReaderInputStream extends InputStream {
079 private static final int DEFAULT_BUFFER_SIZE = 1024;
080
081 private final Reader reader;
082 private final CharsetEncoder encoder;
083
084 /**
085 * CharBuffer used as input for the decoder. It should be reasonably
086 * large as we read data from the underlying Reader into this buffer.
087 */
088 private final CharBuffer encoderIn;
089
090 /**
091 * ByteBuffer used as output for the decoder. This buffer can be small
092 * as it is only used to transfer data from the decoder to the
093 * buffer provided by the caller.
094 */
095 private final ByteBuffer encoderOut = ByteBuffer.allocate(128);
096
097 private CoderResult lastCoderResult;
098 private boolean endOfInput;
099
100 /**
101 * Construct a new {@link ReaderInputStream}.
102 *
103 * @param reader the target {@link Reader}
104 * @param encoder the charset encoder
105 * @since Commons IO 2.1
106 */
107 public ReaderInputStream(Reader reader, CharsetEncoder encoder) {
108 this(reader, encoder, DEFAULT_BUFFER_SIZE);
109 }
110
111 /**
112 * Construct a new {@link ReaderInputStream}.
113 *
114 * @param reader the target {@link Reader}
115 * @param encoder the charset encoder
116 * @param bufferSize the size of the input buffer in number of characters
117 * @since Commons IO 2.1
118 */
119 public ReaderInputStream(Reader reader, CharsetEncoder encoder, int bufferSize) {
120 this.reader = reader;
121 this.encoder = encoder;
122 encoderIn = CharBuffer.allocate(bufferSize);
123 encoderIn.flip();
124 }
125
126 /**
127 * Construct a new {@link ReaderInputStream}.
128 *
129 * @param reader the target {@link Reader}
130 * @param charset the charset encoding
131 * @param bufferSize the size of the input buffer in number of characters
132 */
133 public ReaderInputStream(Reader reader, Charset charset, int bufferSize) {
134 this(reader,
135 charset.newEncoder()
136 .onMalformedInput(CodingErrorAction.REPLACE)
137 .onUnmappableCharacter(CodingErrorAction.REPLACE),
138 bufferSize);
139 }
140
141 /**
142 * Construct a new {@link ReaderInputStream} with a default input buffer size of
143 * 1024 characters.
144 *
145 * @param reader the target {@link Reader}
146 * @param charset the charset encoding
147 */
148 public ReaderInputStream(Reader reader, Charset charset) {
149 this(reader, charset, DEFAULT_BUFFER_SIZE);
150 }
151
152 /**
153 * Construct a new {@link ReaderInputStream}.
154 *
155 * @param reader the target {@link Reader}
156 * @param charsetName the name of the charset encoding
157 * @param bufferSize the size of the input buffer in number of characters
158 */
159 public ReaderInputStream(Reader reader, String charsetName, int bufferSize) {
160 this(reader, Charset.forName(charsetName), bufferSize);
161 }
162
163 /**
164 * Construct a new {@link ReaderInputStream} with a default input buffer size of
165 * 1024 characters.
166 *
167 * @param reader the target {@link Reader}
168 * @param charsetName the name of the charset encoding
169 */
170 public ReaderInputStream(Reader reader, String charsetName) {
171 this(reader, charsetName, DEFAULT_BUFFER_SIZE);
172 }
173
174 /**
175 * Construct a new {@link ReaderInputStream} that uses the default character encoding
176 * with a default input buffer size of 1024 characters.
177 *
178 * @param reader the target {@link Reader}
179 */
180 public ReaderInputStream(Reader reader) {
181 this(reader, Charset.defaultCharset());
182 }
183
184 /**
185 * Read the specified number of bytes into an array.
186 *
187 * @param b the byte array to read into
188 * @param off the offset to start reading bytes into
189 * @param len the number of bytes to read
190 * @return the number of bytes read or <code>-1</code>
191 * if the end of the stream has been reached
192 * @throws IOException if an I/O error occurs
193 */
194 @Override
195 public int read(byte[] b, int off, int len) throws IOException {
196 int read = 0;
197 while (len > 0) {
198 if (encoderOut.position() > 0) {
199 encoderOut.flip();
200 int c = Math.min(encoderOut.remaining(), len);
201 encoderOut.get(b, off, c);
202 off += c;
203 len -= c;
204 read += c;
205 encoderOut.compact();
206 } else {
207 if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) {
208 encoderIn.compact();
209 int position = encoderIn.position();
210 // We don't use Reader#read(CharBuffer) here because it is more efficient
211 // to write directly to the underlying char array (the default implementation
212 // copies data to a temporary char array).
213 int c = reader.read(encoderIn.array(), position, encoderIn.remaining());
214 if (c == -1) {
215 endOfInput = true;
216 } else {
217 encoderIn.position(position+c);
218 }
219 encoderIn.flip();
220 }
221 lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput);
222 if (endOfInput && encoderOut.position() == 0) {
223 break;
224 }
225 }
226 }
227 return read == 0 && endOfInput ? -1 : read;
228 }
229
230 /**
231 * Read the specified number of bytes into an array.
232 *
233 * @param b the byte array to read into
234 * @return the number of bytes read or <code>-1</code>
235 * if the end of the stream has been reached
236 * @throws IOException if an I/O error occurs
237 */
238 @Override
239 public int read(byte[] b) throws IOException {
240 return read(b, 0, b.length);
241 }
242
243 /**
244 * Read a single byte.
245 *
246 * @return either the byte read or <code>-1</code> if the end of the stream
247 * has been reached
248 * @throws IOException if an I/O error occurs
249 */
250 @Override
251 public int read() throws IOException {
252 byte[] b = new byte[1];
253 return read(b) == -1 ? -1 : b[0] & 0xFF;
254 }
255
256 /**
257 * Close the stream. This method will cause the underlying {@link Reader}
258 * to be closed.
259 * @throws IOException if an I/O error occurs
260 */
261 @Override
262 public void close() throws IOException {
263 reader.close();
264 }
265 }