001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.io.input;
018
019 import java.io.IOException;
020 import java.io.InputStream;
021 import java.io.Reader;
022 import java.nio.ByteBuffer;
023 import java.nio.CharBuffer;
024 import java.nio.charset.Charset;
025 import java.nio.charset.CharsetEncoder;
026 import java.nio.charset.CoderResult;
027
028 /**
029 * {@link InputStream} implementation that reads a character stream from a {@link Reader}
030 * and transforms it to a byte stream using a specified charset encoding. The stream
031 * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset
032 * encodings supported by the JRE are handled correctly. In particular for charsets such as
033 * UTF-16, the implementation ensures that one and only one byte order marker
034 * is produced.
035 * <p>
036 * Since in general it is not possible to predict the number of characters to be read from the
037 * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from
038 * the {@link Reader} are buffered. There is therefore no well defined correlation
039 * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}.
040 * This also implies that in general there is no need to wrap the underlying {@link Reader}
041 * in a {@link java.io.BufferedReader}.
042 * <p>
043 * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader};
044 * in the following example, reading from <tt>in2</tt> would return the same byte
045 * sequence as reading from <tt>in</tt> (provided that the initial byte sequence is legal
046 * with respect to the charset encoding):
047 * <pre>
048 * InputStream in = ...
049 * Charset cs = ...
050 * InputStreamReader reader = new InputStreamReader(in, cs);
051 * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre>
052 * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter},
053 * except that the control flow is reversed: both classes transform a character stream
054 * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream,
055 * while {@link ReaderInputStream} pulls it from the underlying stream.
056 * <p>
057 * Note that while there are use cases where there is no alternative to using
058 * this class, very often the need to use this class is an indication of a flaw
059 * in the design of the code. This class is typically used in situations where an existing
060 * API only accepts an {@link InputStream}, but where the most natural way to produce the data
061 * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation
062 * where this problem may appear is when implementing the {@link javax.activation.DataSource}
063 * interface from the Java Activation Framework.
064 * <p>
065 * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next
066 * read operation will block or not, it is not possible to provide a meaningful
067 * implementation of the {@link InputStream#available()} method. A call to this method
068 * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}.
069 * <p>
070 * Instances of {@link ReaderInputStream} are not thread safe.
071 *
072 * @see org.apache.commons.io.output.WriterOutputStream
073 *
074 * @author <a href="mailto:veithen@apache.org">Andreas Veithen</a>
075 * @since Commons IO 2.0
076 */
077 public class ReaderInputStream extends InputStream {
078 private static final int DEFAULT_BUFFER_SIZE = 1024;
079
080 private final Reader reader;
081 private final CharsetEncoder encoder;
082
083 /**
084 * CharBuffer used as input for the decoder. It should be reasonably
085 * large as we read data from the underlying Reader into this buffer.
086 */
087 private final CharBuffer encoderIn;
088
089 /**
090 * ByteBuffer used as output for the decoder. This buffer can be small
091 * as it is only used to transfer data from the decoder to the
092 * buffer provided by the caller.
093 */
094 private final ByteBuffer encoderOut = ByteBuffer.allocate(128);
095
096 private CoderResult lastCoderResult;
097 private boolean endOfInput;
098
099 /**
100 * Construct a new {@link ReaderInputStream}.
101 *
102 * @param reader the target {@link Reader}
103 * @param charset the charset encoding
104 * @param bufferSize the size of the input buffer in number of characters
105 */
106 public ReaderInputStream(Reader reader, Charset charset, int bufferSize) {
107 this.reader = reader;
108 encoder = charset.newEncoder();
109 encoderIn = CharBuffer.allocate(bufferSize);
110 encoderIn.flip();
111 }
112
113 /**
114 * Construct a new {@link ReaderInputStream} with a default input buffer size of
115 * 1024 characters.
116 *
117 * @param reader the target {@link Reader}
118 * @param charset the charset encoding
119 */
120 public ReaderInputStream(Reader reader, Charset charset) {
121 this(reader, charset, DEFAULT_BUFFER_SIZE);
122 }
123
124 /**
125 * Construct a new {@link ReaderInputStream}.
126 *
127 * @param reader the target {@link Reader}
128 * @param charsetName the name of the charset encoding
129 * @param bufferSize the size of the input buffer in number of characters
130 */
131 public ReaderInputStream(Reader reader, String charsetName, int bufferSize) {
132 this(reader, Charset.forName(charsetName), bufferSize);
133 }
134
135 /**
136 * Construct a new {@link ReaderInputStream} with a default input buffer size of
137 * 1024 characters.
138 *
139 * @param reader the target {@link Reader}
140 * @param charsetName the name of the charset encoding
141 */
142 public ReaderInputStream(Reader reader, String charsetName) {
143 this(reader, charsetName, DEFAULT_BUFFER_SIZE);
144 }
145
146 /**
147 * Construct a new {@link ReaderInputStream} that uses the default character encoding
148 * with a default input buffer size of 1024 characters.
149 *
150 * @param reader the target {@link Reader}
151 */
152 public ReaderInputStream(Reader reader) {
153 this(reader, Charset.defaultCharset());
154 }
155
156 /**
157 * Read the specified number of bytes into an array.
158 *
159 * @param b the byte array to read into
160 * @param off the offset to start reading bytes into
161 * @param len the number of bytes to read
162 * @return the number of bytes read or <code>-1</code>
163 * if the end of the stream has been reached
164 * @throws IOException if an I/O error occurs
165 */
166 @Override
167 public int read(byte[] b, int off, int len) throws IOException {
168 int read = 0;
169 while (len > 0) {
170 if (encoderOut.position() > 0) {
171 encoderOut.flip();
172 int c = Math.min(encoderOut.remaining(), len);
173 encoderOut.get(b, off, c);
174 off += c;
175 len -= c;
176 read += c;
177 encoderOut.compact();
178 } else {
179 if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) {
180 encoderIn.compact();
181 int position = encoderIn.position();
182 // We don't use Reader#read(CharBuffer) here because it is more efficient
183 // to write directly to the underlying char array (the default implementation
184 // copies data to a temporary char array).
185 int c = reader.read(encoderIn.array(), position, encoderIn.remaining());
186 if (c == -1) {
187 endOfInput = true;
188 } else {
189 encoderIn.position(position+c);
190 }
191 encoderIn.flip();
192 }
193 lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput);
194 if (endOfInput && encoderOut.position() == 0) {
195 break;
196 }
197 }
198 }
199 return read == 0 && endOfInput ? -1 : read;
200 }
201
202 /**
203 * Read the specified number of bytes into an array.
204 *
205 * @param b the byte array to read into
206 * @return the number of bytes read or <code>-1</code>
207 * if the end of the stream has been reached
208 * @throws IOException if an I/O error occurs
209 */
210 @Override
211 public int read(byte[] b) throws IOException {
212 return read(b, 0, b.length);
213 }
214
215 /**
216 * Read a single byte.
217 *
218 * @return either the byte read or <code>-1</code> if the end of the stream
219 * has been reached
220 * @throws IOException if an I/O error occurs
221 */
222 @Override
223 public int read() throws IOException {
224 byte[] b = new byte[1];
225 return read(b) == -1 ? -1 : b[0] & 0xFF;
226 }
227
228 /**
229 * Close the stream. This method will cause the underlying {@link Reader}
230 * to be closed.
231 * @throws IOException if an I/O error occurs
232 */
233 @Override
234 public void close() throws IOException {
235 reader.close();
236 }
237 }