001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import static org.apache.commons.io.IOUtils.EOF; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.io.Reader; 024import java.nio.ByteBuffer; 025import java.nio.CharBuffer; 026import java.nio.charset.Charset; 027import java.nio.charset.CharsetEncoder; 028import java.nio.charset.CoderResult; 029import java.nio.charset.CodingErrorAction; 030 031/** 032 * {@link InputStream} implementation that reads a character stream from a {@link Reader} 033 * and transforms it to a byte stream using a specified charset encoding. The stream 034 * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset 035 * encodings supported by the JRE are handled correctly. In particular for charsets such as 036 * UTF-16, the implementation ensures that one and only one byte order marker 037 * is produced. 038 * <p> 039 * Since in general it is not possible to predict the number of characters to be read from the 040 * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from 041 * the {@link Reader} are buffered. There is therefore no well defined correlation 042 * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}. 043 * This also implies that in general there is no need to wrap the underlying {@link Reader} 044 * in a {@link java.io.BufferedReader}. 045 * <p> 046 * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader}; 047 * in the following example, reading from {@code in2} would return the same byte 048 * sequence as reading from {@code in} (provided that the initial byte sequence is legal 049 * with respect to the charset encoding): 050 * <pre> 051 * InputStream in = ... 052 * Charset cs = ... 053 * InputStreamReader reader = new InputStreamReader(in, cs); 054 * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre> 055 * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter}, 056 * except that the control flow is reversed: both classes transform a character stream 057 * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream, 058 * while {@link ReaderInputStream} pulls it from the underlying stream. 059 * <p> 060 * Note that while there are use cases where there is no alternative to using 061 * this class, very often the need to use this class is an indication of a flaw 062 * in the design of the code. This class is typically used in situations where an existing 063 * API only accepts an {@link InputStream}, but where the most natural way to produce the data 064 * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation 065 * where this problem may appear is when implementing the {@link javax.activation.DataSource} 066 * interface from the Java Activation Framework. 067 * <p> 068 * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next 069 * read operation will block or not, it is not possible to provide a meaningful 070 * implementation of the {@link InputStream#available()} method. A call to this method 071 * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}. 072 * <p> 073 * Instances of {@link ReaderInputStream} are not thread safe. 074 * 075 * @see org.apache.commons.io.output.WriterOutputStream 076 * 077 * @since 2.0 078 */ 079public class ReaderInputStream extends InputStream { 080 private static final int DEFAULT_BUFFER_SIZE = 1024; 081 082 private final Reader reader; 083 private final CharsetEncoder encoder; 084 085 /** 086 * CharBuffer used as input for the decoder. It should be reasonably 087 * large as we read data from the underlying Reader into this buffer. 088 */ 089 private final CharBuffer encoderIn; 090 091 /** 092 * ByteBuffer used as output for the decoder. This buffer can be small 093 * as it is only used to transfer data from the decoder to the 094 * buffer provided by the caller. 095 */ 096 private final ByteBuffer encoderOut; 097 098 private CoderResult lastCoderResult; 099 private boolean endOfInput; 100 101 /** 102 * Construct a new {@link ReaderInputStream}. 103 * 104 * @param reader the target {@link Reader} 105 * @param encoder the charset encoder 106 * @since 2.1 107 */ 108 public ReaderInputStream(final Reader reader, final CharsetEncoder encoder) { 109 this(reader, encoder, DEFAULT_BUFFER_SIZE); 110 } 111 112 /** 113 * Construct a new {@link ReaderInputStream}. 114 * 115 * @param reader the target {@link Reader} 116 * @param encoder the charset encoder 117 * @param bufferSize the size of the input buffer in number of characters 118 * @since 2.1 119 */ 120 public ReaderInputStream(final Reader reader, final CharsetEncoder encoder, final int bufferSize) { 121 this.reader = reader; 122 this.encoder = encoder; 123 this.encoderIn = CharBuffer.allocate(bufferSize); 124 this.encoderIn.flip(); 125 this.encoderOut = ByteBuffer.allocate(128); 126 this.encoderOut.flip(); 127 } 128 129 /** 130 * Construct a new {@link ReaderInputStream}. 131 * 132 * @param reader the target {@link Reader} 133 * @param charset the charset encoding 134 * @param bufferSize the size of the input buffer in number of characters 135 */ 136 public ReaderInputStream(final Reader reader, final Charset charset, final int bufferSize) { 137 this(reader, 138 charset.newEncoder() 139 .onMalformedInput(CodingErrorAction.REPLACE) 140 .onUnmappableCharacter(CodingErrorAction.REPLACE), 141 bufferSize); 142 } 143 144 /** 145 * Construct a new {@link ReaderInputStream} with a default input buffer size of 146 * 1024 characters. 147 * 148 * @param reader the target {@link Reader} 149 * @param charset the charset encoding 150 */ 151 public ReaderInputStream(final Reader reader, final Charset charset) { 152 this(reader, charset, DEFAULT_BUFFER_SIZE); 153 } 154 155 /** 156 * Construct a new {@link ReaderInputStream}. 157 * 158 * @param reader the target {@link Reader} 159 * @param charsetName the name of the charset encoding 160 * @param bufferSize the size of the input buffer in number of characters 161 */ 162 public ReaderInputStream(final Reader reader, final String charsetName, final int bufferSize) { 163 this(reader, Charset.forName(charsetName), bufferSize); 164 } 165 166 /** 167 * Construct a new {@link ReaderInputStream} with a default input buffer size of 168 * 1024 characters. 169 * 170 * @param reader the target {@link Reader} 171 * @param charsetName the name of the charset encoding 172 */ 173 public ReaderInputStream(final Reader reader, final String charsetName) { 174 this(reader, charsetName, DEFAULT_BUFFER_SIZE); 175 } 176 177 /** 178 * Construct a new {@link ReaderInputStream} that uses the default character encoding 179 * with a default input buffer size of 1024 characters. 180 * 181 * @param reader the target {@link Reader} 182 * @deprecated 2.5 use {@link #ReaderInputStream(Reader, Charset)} instead 183 */ 184 @Deprecated 185 public ReaderInputStream(final Reader reader) { 186 this(reader, Charset.defaultCharset()); 187 } 188 189 /** 190 * Fills the internal char buffer from the reader. 191 * 192 * @throws IOException 193 * If an I/O error occurs 194 */ 195 private void fillBuffer() throws IOException { 196 if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) { 197 encoderIn.compact(); 198 final int position = encoderIn.position(); 199 // We don't use Reader#read(CharBuffer) here because it is more efficient 200 // to write directly to the underlying char array (the default implementation 201 // copies data to a temporary char array). 202 final int c = reader.read(encoderIn.array(), position, encoderIn.remaining()); 203 if (c == EOF) { 204 endOfInput = true; 205 } else { 206 encoderIn.position(position+c); 207 } 208 encoderIn.flip(); 209 } 210 encoderOut.compact(); 211 lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput); 212 encoderOut.flip(); 213 } 214 215 /** 216 * Read the specified number of bytes into an array. 217 * 218 * @param b the byte array to read into 219 * @param off the offset to start reading bytes into 220 * @param len the number of bytes to read 221 * @return the number of bytes read or <code>-1</code> 222 * if the end of the stream has been reached 223 * @throws IOException if an I/O error occurs 224 */ 225 @Override 226 public int read(final byte[] b, int off, int len) throws IOException { 227 if (b == null) { 228 throw new NullPointerException("Byte array must not be null"); 229 } 230 if (len < 0 || off < 0 || (off + len) > b.length) { 231 throw new IndexOutOfBoundsException("Array Size=" + b.length + 232 ", offset=" + off + ", length=" + len); 233 } 234 int read = 0; 235 if (len == 0) { 236 return 0; // Always return 0 if len == 0 237 } 238 while (len > 0) { 239 if (encoderOut.hasRemaining()) { 240 final int c = Math.min(encoderOut.remaining(), len); 241 encoderOut.get(b, off, c); 242 off += c; 243 len -= c; 244 read += c; 245 } else { 246 fillBuffer(); 247 if (endOfInput && !encoderOut.hasRemaining()) { 248 break; 249 } 250 } 251 } 252 return read == 0 && endOfInput ? EOF : read; 253 } 254 255 /** 256 * Read the specified number of bytes into an array. 257 * 258 * @param b the byte array to read into 259 * @return the number of bytes read or <code>-1</code> 260 * if the end of the stream has been reached 261 * @throws IOException if an I/O error occurs 262 */ 263 @Override 264 public int read(final byte[] b) throws IOException { 265 return read(b, 0, b.length); 266 } 267 268 /** 269 * Read a single byte. 270 * 271 * @return either the byte read or <code>-1</code> if the end of the stream 272 * has been reached 273 * @throws IOException if an I/O error occurs 274 */ 275 @Override 276 public int read() throws IOException { 277 for (;;) { 278 if (encoderOut.hasRemaining()) { 279 return encoderOut.get() & 0xFF; 280 } 281 fillBuffer(); 282 if (endOfInput && !encoderOut.hasRemaining()) { 283 return EOF; 284 } 285 } 286 } 287 288 /** 289 * Close the stream. This method will cause the underlying {@link Reader} 290 * to be closed. 291 * @throws IOException if an I/O error occurs 292 */ 293 @Override 294 public void close() throws IOException { 295 reader.close(); 296 } 297}